diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp index ce74e8cf199..11ca6ff3e90 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved. * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,31 +24,27 @@ * */ - -#include "gc/shared/gcCause.hpp" #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp" +#include "gc/shenandoah/shenandoahAllocRate.inline.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" -#include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" -#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "logging/log.hpp" #include "logging/logTag.hpp" -#include "runtime/globals.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/quickSort.hpp" -// These constants are used to adjust the margin of error for the moving -// average of the allocation rate and cycle time. The units are standard -// deviations. -const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2; -const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1; +#include + +#define PROPERFMT_F "%.1f %s" +#define PROPERFMT_F_ARGS(s) byte_size_in_proper_unit(s), proper_unit_for_byte_size(s) // These are used to decide if we want to make any adjustments at all // at the end of a successful concurrent cycle. -const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5; -const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5; +constexpr double LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5; +constexpr double HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5; // These values are the confidence interval expressed as standard deviations. // At the minimum confidence level, there is a 25% chance that the true value of @@ -57,66 +53,15 @@ const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance // that the true value of our estimate is outside the interval. These are used // as bounds on the adjustments applied at the outcome of a GC cycle. -const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25% -const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9% - - -// To enable detection of GC time trends, we keep separate track of the recent history of gc time. During initialization, -// for example, the amount of live memory may be increasing, which is likely to cause the GC times to increase. This history -// allows us to predict increasing GC times rather than always assuming average recent GC time is the best predictor. -const size_t ShenandoahAdaptiveHeuristics::GC_TIME_SAMPLE_SIZE = 3; - -// We also keep separate track of recently sampled allocation rates for two purposes: -// 1. The number of samples examined to determine acceleration of allocation is represented by -// ShenandoahRateAccelerationSampleSize -// 2. The number of most recent samples averaged to determine a momentary allocation spike is represented by -// ShenandoahMomentaryAllocationRateSpikeSampleSize - -// Allocation rates are sampled by the regulator thread, which typically runs every ms. There may be jitter in the scheduling -// of the regulator thread. To reduce signal noise and synchronization overhead, we do not sample allocation rate with every -// iteration of the regulator. We prefer sample time longer than 1 ms so that there can be a statistically significant number -// of allocations occuring within each sample period. The regulator thread samples allocation rate only if at least -// ShenandoahAccelerationSamplePeriod ms have passed since it previously sampled the allocation rate. -// -// This trigger responds much more quickly than the traditional trigger, which monitors 100 ms spans. When acceleration is -// detected, the impact of acceleration on anticipated consumption of available memory is also much more impactful -// than the assumed constant allocation rate consumption of available memory. +constexpr double MINIMUM_CONFIDENCE = 0.319; // 25% +constexpr double MAXIMUM_CONFIDENCE = 3.291; // 99.9% ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info) : ShenandoahHeuristics(space_info), _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence), - _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold), _last_trigger(OTHER), - _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor), - _free_set(nullptr), - _previous_acceleration_sample_timestamp(0.0), - _gc_time_first_sample_index(0), - _gc_time_num_samples(0), - _gc_time_timestamps(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), - _gc_time_samples(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), - _gc_time_xy(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), - _gc_time_xx(NEW_C_HEAP_ARRAY(double, GC_TIME_SAMPLE_SIZE, mtGC)), - _gc_time_sum_of_timestamps(0), - _gc_time_sum_of_samples(0), - _gc_time_sum_of_xy(0), - _gc_time_sum_of_xx(0), - _gc_time_m(0.0), - _gc_time_b(0.0), - _gc_time_sd(0.0), - _spike_acceleration_buffer_size(MAX2(ShenandoahRateAccelerationSampleSize, 1+ShenandoahMomentaryAllocationRateSpikeSampleSize)), - _spike_acceleration_first_sample_index(0), - _spike_acceleration_num_samples(0), - _spike_acceleration_rate_samples(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)), - _spike_acceleration_rate_timestamps(NEW_C_HEAP_ARRAY(double, _spike_acceleration_buffer_size, mtGC)) { - } - -ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() { - FREE_C_HEAP_ARRAY(_spike_acceleration_rate_samples); - FREE_C_HEAP_ARRAY(_spike_acceleration_rate_timestamps); - FREE_C_HEAP_ARRAY(_gc_time_timestamps); - FREE_C_HEAP_ARRAY(_gc_time_samples); - FREE_C_HEAP_ARRAY(_gc_time_xy); - FREE_C_HEAP_ARRAY(_gc_time_xx); + _available(Moving_Average_Samples), + _headroom_adjustment(0) { } void ShenandoahAdaptiveHeuristics::initialize() { @@ -125,7 +70,6 @@ void ShenandoahAdaptiveHeuristics::initialize() { void ShenandoahAdaptiveHeuristics::post_initialize() { ShenandoahHeuristics::post_initialize(); - _free_set = ShenandoahHeap::heap()->free_set(); assert(!ShenandoahHeap::heap()->mode()->is_generational(), "ShenandoahGenerationalHeuristics overrides this method"); compute_headroom_adjustment(); } @@ -136,9 +80,9 @@ void ShenandoahAdaptiveHeuristics::compute_headroom_adjustment() { // intend to finish GC before the amount of available memory is less than the allocation headroom. Headroom is the planned // safety buffer to allow a small amount of additional allocation to take place in case we were overly optimistic in delaying // our trigger. - size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); - size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; - size_t penalties = capacity / 100 * _gc_time_penalties; + const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); + const size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; + const size_t penalties = capacity / 100 * _gc_time_penalties; _headroom_adjustment = spike_headroom + penalties; } @@ -172,17 +116,14 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme, // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit. - size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); - size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste); - size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset; - size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0); + const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); + const size_t max_cset = shenandoah_safe_size_cast(1.0 * capacity / 100 * ShenandoahEvacReserve / ShenandoahEvacWaste); + const size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset; + const size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0); - log_info(gc, ergo)("Adaptive CSet Selection. Target Free: %zu%s, Actual Free: " - "%zu%s, Max Evacuation: %zu%s, Min Garbage: %zu%s", - byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target), - byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free), - byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset), - byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage)); + log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " PROPERFMT ", Actual Free: " PROPERFMT + ", Max Evacuation: " PROPERFMT ", Min Garbage: " PROPERFMT , + PROPERFMTARGS(free_target), PROPERFMTARGS(actual_free), PROPERFMTARGS(max_cset), PROPERFMTARGS(min_garbage)); // Better select garbage-first regions QuickSort::sort(data, size, compare_by_garbage); @@ -194,8 +135,8 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand for (size_t idx = 0; idx < size; idx++) { ShenandoahHeapRegion* r = data[idx].get_region(); - size_t new_cset = cur_cset + r->get_live_data_bytes(); - size_t new_garbage = cur_garbage + r->garbage(); + const size_t new_cset = cur_cset + r->get_live_data_bytes(); + const size_t new_garbage = cur_garbage + r->garbage(); if (new_cset > max_cset) { break; @@ -210,122 +151,33 @@ void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(Shenand } void ShenandoahAdaptiveHeuristics::add_degenerated_gc_time(double time_at_start, double gc_time) { - // Conservatively add sample into linear model If this time is above the predicted concurrent gc time - if (predict_gc_time(time_at_start) < gc_time) { - add_gc_time(time_at_start, gc_time); + // Conservatively add sample into linear model, if this time is above the predicted concurrent gc time + if (_cycles.predict_duration(time_at_start, _margin_of_error_sd) < gc_time) { + _cycles.record_duration(time_at_start, gc_time); } } -void ShenandoahAdaptiveHeuristics::add_gc_time(double time_at_start, double gc_time) { - // Update best-fit linear predictor of GC time - uint index = (_gc_time_first_sample_index + _gc_time_num_samples) % GC_TIME_SAMPLE_SIZE; - if (_gc_time_num_samples == GC_TIME_SAMPLE_SIZE) { - _gc_time_sum_of_timestamps -= _gc_time_timestamps[index]; - _gc_time_sum_of_samples -= _gc_time_samples[index]; - _gc_time_sum_of_xy -= _gc_time_xy[index]; - _gc_time_sum_of_xx -= _gc_time_xx[index]; - } - _gc_time_timestamps[index] = time_at_start; - _gc_time_samples[index] = gc_time; - _gc_time_xy[index] = time_at_start * gc_time; - _gc_time_xx[index] = time_at_start * time_at_start; - - _gc_time_sum_of_timestamps += _gc_time_timestamps[index]; - _gc_time_sum_of_samples += _gc_time_samples[index]; - _gc_time_sum_of_xy += _gc_time_xy[index]; - _gc_time_sum_of_xx += _gc_time_xx[index]; - - if (_gc_time_num_samples < GC_TIME_SAMPLE_SIZE) { - _gc_time_num_samples++; - } else { - _gc_time_first_sample_index = (_gc_time_first_sample_index + 1) % GC_TIME_SAMPLE_SIZE; - } - - if (_gc_time_num_samples == 1) { - // The predictor is constant (horizontal line) - _gc_time_m = 0; - _gc_time_b = gc_time; - _gc_time_sd = 0.0; - } else if (_gc_time_num_samples == 2) { - - assert(time_at_start > _gc_time_timestamps[_gc_time_first_sample_index], - "Two GC cycles cannot finish at same time: %.6f vs %.6f, with GC times %.6f and %.6f", time_at_start, - _gc_time_timestamps[_gc_time_first_sample_index], gc_time, _gc_time_samples[_gc_time_first_sample_index]); - - // Two points define a line - double delta_x = time_at_start - _gc_time_timestamps[_gc_time_first_sample_index]; - double delta_y = gc_time - _gc_time_samples[_gc_time_first_sample_index]; - _gc_time_m = delta_y / delta_x; - // y = mx + b - // so b = y0 - mx0 - _gc_time_b = gc_time - _gc_time_m * time_at_start; - _gc_time_sd = 0.0; - } else { - // Since timestamps are monotonically increasing, denominator does not equal zero. - double denominator = _gc_time_num_samples * _gc_time_sum_of_xx - _gc_time_sum_of_timestamps * _gc_time_sum_of_timestamps; - assert(denominator != 0.0, "Invariant: samples: %u, sum_of_xx: %.6f, sum_of_timestamps: %.6f", - _gc_time_num_samples, _gc_time_sum_of_xx, _gc_time_sum_of_timestamps); - _gc_time_m = ((_gc_time_num_samples * _gc_time_sum_of_xy - _gc_time_sum_of_timestamps * _gc_time_sum_of_samples) / - denominator); - _gc_time_b = (_gc_time_sum_of_samples - _gc_time_m * _gc_time_sum_of_timestamps) / _gc_time_num_samples; - double sum_of_squared_deviations = 0.0; - for (size_t i = 0; i < _gc_time_num_samples; i++) { - uint index = (_gc_time_first_sample_index + i) % GC_TIME_SAMPLE_SIZE; - double x = _gc_time_timestamps[index]; - double predicted_y = _gc_time_m * x + _gc_time_b; - double deviation = predicted_y - _gc_time_samples[index]; - sum_of_squared_deviations += deviation * deviation; - } - _gc_time_sd = sqrt(sum_of_squared_deviations / _gc_time_num_samples); - } -} - -double ShenandoahAdaptiveHeuristics::predict_gc_time(double timestamp_at_start) { - return _gc_time_m * timestamp_at_start + _gc_time_b + _gc_time_sd * _margin_of_error_sd; -} - -void ShenandoahAdaptiveHeuristics::add_rate_to_acceleration_history(double timestamp, double rate) { - uint new_sample_index = - (_spike_acceleration_first_sample_index + _spike_acceleration_num_samples) % _spike_acceleration_buffer_size; - _spike_acceleration_rate_timestamps[new_sample_index] = timestamp; - _spike_acceleration_rate_samples[new_sample_index] = rate; - if (_spike_acceleration_num_samples == _spike_acceleration_buffer_size) { - _spike_acceleration_first_sample_index++; - if (_spike_acceleration_first_sample_index == _spike_acceleration_buffer_size) { - _spike_acceleration_first_sample_index = 0; - } - } else { - _spike_acceleration_num_samples++; - } -} - -void ShenandoahAdaptiveHeuristics::record_cycle_start() { - ShenandoahHeuristics::record_cycle_start(); - _allocation_rate.allocation_counter_reset(); -} - void ShenandoahAdaptiveHeuristics::record_success_concurrent() { ShenandoahHeuristics::record_success_concurrent(); - double now = os::elapsedTime(); - // Should we not add GC time if this was an abbreviated cycle? - add_gc_time(_cycle_start, elapsed_cycle_time()); - - size_t available = _space_info->available(); + // We add this time even if it is a shortened cycle. There is a risk that this pulls + // the gc time trend down, but it is still a more accurate view than excluding times + // from shortened cycles. Suppose we did excluded shortened times, the risk would then + // be running the collector more often than necessary because it continues to believe + // the average cycle time is much higher than it otherwise would be. + _cycles.record_duration(_cycle_start, elapsed_cycle_time()); double z_score = 0.0; - double available_sd = _available.sd(); + const double available = static_cast(_space_info->available()); + const double available_sd = _available.sd(); if (available_sd > 0) { - double available_avg = _available.avg(); - z_score = (double(available) - available_avg) / available_sd; - log_debug(gc, ergo)("Available: %zu %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.", - byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), - z_score, - byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg), - byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd)); + const double available_avg = _available.avg(); + z_score = (available - available_avg) / available_sd; + log_debug(gc, ergo)("Available: " PROPERFMT_F "B, z-score=%.3f. Average available: " PROPERFMT_F "B +/- " PROPERFMT_F "B.", + PROPERFMT_F_ARGS(available), z_score, PROPERFMT_F_ARGS(available_avg), PROPERFMT_F_ARGS(available_sd)); } - _available.add(double(available)); + _available.add(available); // In the case when a concurrent GC cycle completes successfully but with an // unusually small amount of available memory we will adjust our trigger @@ -352,90 +204,24 @@ void ShenandoahAdaptiveHeuristics::record_success_concurrent() { // property allows us to adjust the trigger parameters proportionally. // // The `100` here is used to attenuate the size of our adjustments. This - // number was chosen empirically. It also means the adjustments at the end of - // a concurrent cycle are an order of magnitude smaller than the adjustments - // made for a degenerated or full GC cycle (which themselves were also - // chosen empirically). - adjust_last_trigger_parameters(z_score / -100); + // number was chosen empirically. + if (_last_trigger == RATE) { + adjust_margin_of_error(z_score / -100); + } } } void ShenandoahAdaptiveHeuristics::record_degenerated() { ShenandoahHeuristics::record_degenerated(); add_degenerated_gc_time(_precursor_cycle_start, elapsed_degenerated_cycle_time()); - // Adjust both trigger's parameters in the case of a degenerated GC because - // either of them should have triggered earlier to avoid this case. - adjust_margin_of_error(DEGENERATE_PENALTY_SD); - adjust_spike_threshold(DEGENERATE_PENALTY_SD); } -void ShenandoahAdaptiveHeuristics::record_success_full() { - ShenandoahHeuristics::record_success_full(); - // Adjust both trigger's parameters in the case of a full GC because - // either of them should have triggered earlier to avoid this case. - adjust_margin_of_error(FULL_PENALTY_SD); - adjust_spike_threshold(FULL_PENALTY_SD); -} - -static double saturate(double value, double min, double max) { - return MAX2(MIN2(value, max), min); -} - -// Rationale: -// The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of -// allocations that exceed the average allocation rate. What do these spikes look like? -// -// 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly -// allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec -// allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike" -// accommodation to give us enough runway to recalibrate our "average allocation rate". -// -// 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means -// our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us -// enough runway to recalibrate our "average allocation rate". -// -// 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We -// allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for -// the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the -// period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking: -// -// a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing -// the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately. -// b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations. -// If there are more than one of these bursts, we can "approximate" that these will be separated by spans of -// time with very little or no allocations so the "average" allocation rate should be a suitable approximation -// of how this will behave. -// -// For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change -// in operation mode. We want some way to decide that the average rate has changed, while keeping average -// allocation rate computation independent. bool ShenandoahAdaptiveHeuristics::should_start_gc() { - size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); - size_t available = _space_info->soft_mutator_available(); - size_t allocated = _space_info->bytes_allocated_since_gc_start(); + const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); + const size_t available = _space_info->soft_mutator_available(); - double avg_cycle_time = 0; - double avg_alloc_rate = 0; - double now = get_most_recent_wake_time(); - size_t allocatable_words = this->allocatable(available); - double predicted_future_accelerated_gc_time = 0.0; - size_t allocated_bytes_since_last_sample = 0; - double instantaneous_rate_words_per_second = 0.0; - size_t consumption_accelerated = 0; - double acceleration = 0.0; - double current_rate_by_acceleration = 0.0; - size_t min_threshold = min_free_threshold(); - double predicted_future_gc_time = 0; - double future_planned_gc_time = 0; - bool future_planned_gc_time_is_average = false; - bool is_spiking = false; - - log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", " - "allocated_since_gc_start: " PROPERFMT, - PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(allocated)); - - // Track allocation rate even if we decide to start a cycle for other reasons. - double rate = _allocation_rate.sample(allocated); + log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT, + PROPERFMTARGS(available), PROPERFMTARGS(capacity)); if (_start_gc_is_pending) { log_trigger("GC start is already pending"); @@ -444,467 +230,199 @@ bool ShenandoahAdaptiveHeuristics::should_start_gc() { _last_trigger = OTHER; - if (available < min_threshold) { - log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")", - PROPERFMTARGS(available), PROPERFMTARGS(min_threshold)); - accept_trigger_with_type(OTHER); + if (trigger_min_free_threshold(available, capacity)) { return true; } - // Check if we need to learn a bit about the application - const size_t max_learn = ShenandoahLearningSteps; - if (_gc_times_learned < max_learn) { - size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold; - if (available < init_threshold) { - log_trigger("Learning %zu of %zu. Free (%zu%s) is below initial threshold (%zu%s)", - _gc_times_learned + 1, max_learn, - byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), - byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold)); - accept_trigger_with_type(OTHER); - return true; - } - } - - // The test (3 * allocated > available) below is intended to prevent triggers from firing so quickly that there - // has not been sufficient time to create garbage that can be reclaimed during the triggered GC cycle. If we trigger before - // garbage has been created, the concurrent GC will find no garbage. This has been observed to result in degens which - // experience OOM during evac or that experience "bad progress", both of which escalate to Full GC. Note that garbage that - // was allocated following the start of the current GC cycle cannot be reclaimed in this GC cycle. Here is the derivation - // of the expression: - // - // Let R (runway) represent the total amount of memory that can be allocated following the start of GC(N). The runway - // represents memory available at the start of the current GC plus garbage reclaimed by the current GC. In a balanced, - // fully utilized configuration, we will be starting each new GC cycle immediately following completion of the preceding - // GC cycle. In this configuration, we would expect half of R to be consumed during concurrent cycle GC(N) and half - // to be consumed during concurrent GC(N+1). - // - // Assume we want to delay GC trigger until: A/V > 0.33 - // This is equivalent to enforcing that: A > 0.33V - // which is: 3A > V - // Since A+V equals R, we have: A + 3A > A + V = R - // which is to say that: A > R/4 - // - // Postponing the trigger until at least 1/4 of the runway has been consumed helps to improve the efficiency of the - // triggered GC. Under heavy steady state workload, this delay condition generally has no effect: if the allocation - // runway is divided "equally" between the current GC and the next GC, then at any potential trigger point (which cannot - // happen any sooner than completion of the first GC), it is already the case that roughly A > R/2. - if (3 * allocated <= available) { - // Even though we will not issue an adaptive trigger unless a minimum threshold of memory has been allocated, - // we still allow more generic triggers, such as guaranteed GC intervals, to act. - return ShenandoahHeuristics::should_start_gc(); - } - - avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); - avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); - if ((now - _previous_acceleration_sample_timestamp) >= (ShenandoahAccelerationSamplePeriod / 1000.0)) { - predicted_future_accelerated_gc_time = - predict_gc_time(now + MAX2(get_planned_sleep_interval(), ShenandoahAccelerationSamplePeriod / 1000.0)); - double future_accelerated_planned_gc_time; - bool future_accelerated_planned_gc_time_is_average; - if (predicted_future_accelerated_gc_time > avg_cycle_time) { - future_accelerated_planned_gc_time = predicted_future_accelerated_gc_time; - future_accelerated_planned_gc_time_is_average = false; - } else { - future_accelerated_planned_gc_time = avg_cycle_time; - future_accelerated_planned_gc_time_is_average = true; - } - allocated_bytes_since_last_sample = _free_set->get_bytes_allocated_since_previous_sample(); - instantaneous_rate_words_per_second = - (allocated_bytes_since_last_sample / HeapWordSize) / (now - _previous_acceleration_sample_timestamp); - - _previous_acceleration_sample_timestamp = now; - add_rate_to_acceleration_history(now, instantaneous_rate_words_per_second); - current_rate_by_acceleration = instantaneous_rate_words_per_second; - consumption_accelerated = - accelerated_consumption(acceleration, current_rate_by_acceleration, avg_alloc_rate / HeapWordSize, - (ShenandoahAccelerationSamplePeriod / 1000.0) + future_accelerated_planned_gc_time); - - // Note that even a single thread that wakes up and begins to allocate excessively can manifest as accelerating allocation - // rate. This thread will initially allocate a TLAB of minimum size. Then it will allocate a TLAB twice as big a bit later, - // and then twice as big again after another short delay. When a phase change causes many threads to increase their - // allocation behavior, this effect is multiplied, and compounded by jitter in the times that individual threads experience - // the phase change. - // - // The following trace represents an actual workload, with allocation rates sampled at 10 Hz, the default behavior before - // introduction of accelerated allocation rate detection. Though the allocation rate is seen to be increasing at times - // 101.907 and 102.007 and 102.108, the newly sampled allocation rate is not enough to trigger GC because the headroom is - // still quite large. In fact, GC is not triggered until time 102.409s, and this GC degenerates. - // - // Sample Time (s) Allocation Rate (MB/s) Headroom (GB) - // 101.807 0.0 26.93 - // <--- accelerated spike can trigger here, around time 101.9s - // 101.907 477.6 26.85 - // 102.007 3,206.0 26.35 - // 102.108 23,797.8 24.19 - // 102.208 24,164.5 21.83 - // 102.309 23,965.0 19.47 - // 102.409 24,624.35 17.05 <--- without accelerated rate detection, we trigger here - // - // Though the above measurements are from actual workload, the following details regarding sampled allocation rates at 3ms - // period were not measured directly for this run-time sample. These are hypothetical, though they represent a plausible - // result that correlates with the actual measurements. - // - // For most of the 100 ms time span that precedes the sample at 101.907, the allocation rate still remains at zero. The phase - // change that causes increasing allocations occurs near the end ot this time segment. When sampled with a 3 ms period, - // acceration of allocation can be triggered at approximately time 101.88s. - // - // In the default configuration, accelerated allocation rate is detected by examining a sequence of 8 allocation rate samples. - // - // Even a single allocation rate sample above the norm can be interpreted as acceleration of allocation rate. For example, the - // the best-fit line for the following samples has an acceleration rate of 3,553.3 MB/s/s. This is not enough to trigger GC, - // especially given the abundance of Headroom at this moment in time. - // - // TimeStamp (s) Alloc rate (MB/s) - // 101.857 0 - // 101.860 0 - // 101.863 0 - // 101.866 0 - // 101.869 53.3 - // - // At the next sample time, we will compute a slightly higher acceration, 9,150 MB/s/s. This is also insufficient to trigger - // GC. - // - // TimeStamp (s) Alloc rate (MB/s) - // 101.860 0 - // 101.863 0 - // 101.866 0 - // 101.869 53.3 - // 101.872 110.6 - // - // Eventually, we will observe a full history of accelerating rate samples, computing acceleration of 18,500 MB/s/s. This will - // trigger GC over 500 ms earlier than was previously possible. - // - // TimeStamp (s) Alloc rate (MB/s) - // 101.866 0 - // 101.869 53.3 - // 101.872 110.6 - // 101.875 165.9 - // 101.878 221.2 - // - // The accelerated rate heuristic is based on the following idea: - // - // Assume allocation rate is accelerating at a constant rate. If we postpone the spike trigger until the subsequent - // sample point, will there be enough memory to satisfy allocations that occur during the anticipated concurrent GC - // cycle? If not, we should trigger right now. - // - // Outline of this heuristic triggering technique: - // - // 1. We remember the N (e.g. N=3) most recent samples of spike allocation rate r0, r1, r2 samples at t0, t1, and t2 - // 2. if r1 < r0 or r2 < r1, approximate Acceleration = 0.0, Rate = Average(r0, r1, r2) - // 3. Otherwise, use least squares method to compute best-fit line of rate vs time - // 4. The slope of this line represents Acceleration. The y-intercept of this line represents "initial rate" - // 5. Use r2 to rrpresent CurrentRate - // 6. Use Consumption = CurrentRate * GCTime + 1/2 * Acceleration * GCTime * GCTime - // (See High School physics discussions on constant acceleration: D = v0 * t + 1/2 * a * t^2) - // 7. if Consumption exceeds headroom, trigger now - // - // Though larger sample size may improve quality of predictor, it also delays trigger response. Smaller sample sizes - // are more susceptible to false triggers based on random noise. The default configuration uses a sample size of 8 and - // a sample period of roughly 15 ms, spanning approximately 120 ms of execution. - if (consumption_accelerated > allocatable_words) { - size_t size_t_alloc_rate = (size_t) current_rate_by_acceleration * HeapWordSize; - if (acceleration > 0) { - size_t size_t_acceleration = (size_t) acceleration * HeapWordSize; - log_trigger("Accelerated consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at " - "current rate (" PROPERFMT "/s) with acceleration (" PROPERFMT "/s/s) for planned %s GC time (%.2f ms)", - PROPERFMTARGS(consumption_accelerated * HeapWordSize), - PROPERFMTARGS(allocatable_words * HeapWordSize), - PROPERFMTARGS(size_t_alloc_rate), - PROPERFMTARGS(size_t_acceleration), - future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)", - future_accelerated_planned_gc_time * 1000); - } else { - log_trigger("Momentary spike consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at " - "current rate (" PROPERFMT "/s) for planned %s GC time (%.2f ms) (spike threshold = %.2f)", - PROPERFMTARGS(consumption_accelerated * HeapWordSize), - PROPERFMTARGS(allocatable_words * HeapWordSize), - PROPERFMTARGS(size_t_alloc_rate), - future_accelerated_planned_gc_time_is_average? "(from average)": "(by linear prediction)", - future_accelerated_planned_gc_time * 1000, _spike_threshold_sd); - - - } - _spike_acceleration_num_samples = 0; - _spike_acceleration_first_sample_index = 0; - - // Count this as a form of RATE trigger for purposes of adjusting heuristic triggering configuration because this - // trigger is influenced more by margin_of_error_sd than by spike_threshold_sd. - accept_trigger_with_type(RATE); - return true; - } - } - - // Suppose we don't trigger now, but decide to trigger in the next regulator cycle. What will be the GC time then? - predicted_future_gc_time = predict_gc_time(now + get_planned_sleep_interval()); - if (predicted_future_gc_time > avg_cycle_time) { - future_planned_gc_time = predicted_future_gc_time; - future_planned_gc_time_is_average = false; - } else { - future_planned_gc_time = avg_cycle_time; - future_planned_gc_time_is_average = true; - } - - log_debug(gc)("%s: average GC time: %.2f ms, predicted GC time: %.2f ms, allocation rate: %.0f %s/s", - _space_info->name(), avg_cycle_time * 1000, predicted_future_gc_time * 1000, - byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate)); - size_t allocatable_bytes = allocatable_words * HeapWordSize; - - if (future_planned_gc_time * avg_alloc_rate > allocatable_bytes) { - log_trigger("%s GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)" - " to deplete free headroom (%zu%s) (margin of error = %.2f)", - future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000, - byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate), - byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes), - _margin_of_error_sd); - - size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; - size_t penalties = capacity / 100 * _gc_time_penalties; - size_t allocation_headroom = available; - allocation_headroom -= MIN2(allocation_headroom, spike_headroom); - allocation_headroom -= MIN2(allocation_headroom, penalties); - log_info(gc, ergo)("Free headroom: " PROPERFMT " (free) - " PROPERFMT "(spike) - " PROPERFMT " (penalties) = " PROPERFMT, - PROPERFMTARGS(available), - PROPERFMTARGS(spike_headroom), - PROPERFMTARGS(penalties), - PROPERFMTARGS(allocation_headroom)); - accept_trigger_with_type(RATE); + if (trigger_learning(available, capacity)) { return true; } - is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); - if (is_spiking && (future_planned_gc_time * rate > allocatable_bytes)) { - log_trigger("%s GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s)" - " to deplete free headroom (%zu%s) (spike threshold = %.2f)", - future_planned_gc_time_is_average? "Average": "Linear prediction of", future_planned_gc_time * 1000, - byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate), - byte_size_in_proper_unit(allocatable_bytes), proper_unit_for_byte_size(allocatable_bytes), - _spike_threshold_sd); - accept_trigger_with_type(SPIKE); + const double anticipated_gc_start_time = get_most_recent_wake_time() + get_planned_sleep_interval(); + const double anticipated_gc_duration = _cycles.predict_duration(anticipated_gc_start_time, _margin_of_error_sd); + ShenandoahAllocationRate& alloc_rate = ShenandoahHeap::heap()->alloc_rate(); + const ShenandoahAnticipatedConsumption consumption = alloc_rate.snapshot(anticipated_gc_duration, _margin_of_error_sd); + const size_t allocatable_bytes = allocatable(available); + maybe_log_rate_trigger_parameters(consumption, allocatable_bytes); + + if (trigger_accelerating_allocation_rate(consumption, allocatable_bytes)) { return true; } + + if (trigger_average_allocation_rate(consumption, allocatable_bytes)) { + return true; + } + return ShenandoahHeuristics::should_start_gc(); } -void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) { - switch (_last_trigger) { - case RATE: - adjust_margin_of_error(amount); - break; - case SPIKE: - adjust_spike_threshold(amount); - break; - case OTHER: - // nothing to adjust here. - break; - default: - ShouldNotReachHere(); +bool ShenandoahAdaptiveHeuristics::trigger_min_free_threshold(size_t available, size_t capacity) { + const size_t min_threshold = min_free_threshold(capacity); + if (available < min_threshold) { + log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")", + PROPERFMTARGS(available), PROPERFMTARGS(min_threshold)); + accept_trigger_with_type(OTHER); + return true; } + return false; } -void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) { - _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); - log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd); -} - -void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) { - _spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); - log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd); -} - -size_t ShenandoahAdaptiveHeuristics::min_free_threshold() { - return ShenandoahHeap::heap()->soft_max_capacity() / 100 * ShenandoahMinFreeThreshold; -} - -// This is called each time a new rate sample has been gathered, as governed by ShenandoahAccelerationSamplePeriod. -// Unlike traditional calculation of average allocation rate, there is no adjustment for standard deviation of the -// accelerated rate prediction. -size_t ShenandoahAdaptiveHeuristics::accelerated_consumption(double& acceleration, double& current_rate, - double avg_alloc_rate_words_per_second, - double predicted_cycle_time) const -{ - double *x_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); - double *y_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); - double x_sum = 0.0; - double y_sum = 0.0; - - assert(_spike_acceleration_num_samples > 0, "At minimum, we should have sample from this period"); - - double weighted_average_alloc; - if (_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) { - double weighted_y_sum = 0; - double total_weight = 0; - double previous_x = 0; - uint delta = _spike_acceleration_num_samples - ShenandoahRateAccelerationSampleSize; - for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) { - uint index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size; - x_array[i] = _spike_acceleration_rate_timestamps[index]; - x_sum += x_array[i]; - y_array[i] = _spike_acceleration_rate_samples[index]; - if (i > 0) { - // first sample not included in weighted average because it has no weight. - double sample_weight = x_array[i] - x_array[i-1]; - weighted_y_sum += y_array[i] * sample_weight; - total_weight += sample_weight; - } - y_sum += y_array[i]; - } - weighted_average_alloc = (total_weight > 0)? weighted_y_sum / total_weight: 0; - } else { - weighted_average_alloc = 0; - } - - double momentary_rate; - if (_spike_acceleration_num_samples > ShenandoahMomentaryAllocationRateSpikeSampleSize) { - // Num samples must be strictly greater than sample size, because we need one extra sample to compute rate and weights - // In this context, the weight of a y value (an allocation rate) is the duration for which this allocation rate was - // active (the time since previous y value was reported). An allocation rate measured over a span of 300 ms (e.g. during - // concurrent GC) has much more "weight" than an allocation rate measured over a span of 15 s. - double weighted_y_sum = 0; - double total_weight = 0; - double sum_for_average = 0.0; - uint delta = _spike_acceleration_num_samples - ShenandoahMomentaryAllocationRateSpikeSampleSize; - for (uint i = 0; i < ShenandoahMomentaryAllocationRateSpikeSampleSize; i++) { - uint sample_index = (_spike_acceleration_first_sample_index + delta + i) % _spike_acceleration_buffer_size; - uint preceding_index = (sample_index == 0)? _spike_acceleration_buffer_size - 1: sample_index - 1; - double sample_weight = (_spike_acceleration_rate_timestamps[sample_index] - - _spike_acceleration_rate_timestamps[preceding_index]); - weighted_y_sum += _spike_acceleration_rate_samples[sample_index] * sample_weight; - total_weight += sample_weight; - } - momentary_rate = weighted_y_sum / total_weight; - bool is_spiking = _allocation_rate.is_spiking(momentary_rate, _spike_threshold_sd); - if (!is_spiking) { - // Disable momentary spike trigger unless allocation rate delta from average exceeds sd - momentary_rate = 0.0; - } - } else { - momentary_rate = 0.0; - } - - // By default, use momentary_rate for current rate and zero acceleration. Overwrite iff best-fit line has positive slope. - current_rate = momentary_rate; - acceleration = 0.0; - if ((_spike_acceleration_num_samples >= ShenandoahRateAccelerationSampleSize) - && (weighted_average_alloc >= avg_alloc_rate_words_per_second)) { - // If the average rate across the acceleration samples is below the overall average, this sample is not eligible to - // represent acceleration of allocation rate. We may just be catching up with allocations after a lull. - - double *xy_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); - double *x2_array = (double *) alloca(ShenandoahRateAccelerationSampleSize * sizeof(double)); - double xy_sum = 0.0; - double x2_sum = 0.0; - for (uint i = 0; i < ShenandoahRateAccelerationSampleSize; i++) { - xy_array[i] = x_array[i] * y_array[i]; - xy_sum += xy_array[i]; - x2_array[i] = x_array[i] * x_array[i]; - x2_sum += x2_array[i]; - } - // Find the best-fit least-squares linear representation of rate vs time - double m; /* slope */ - double b; /* y-intercept */ - - m = ((ShenandoahRateAccelerationSampleSize * xy_sum - x_sum * y_sum) - / (ShenandoahRateAccelerationSampleSize * x2_sum - x_sum * x_sum)); - b = (y_sum - m * x_sum) / ShenandoahRateAccelerationSampleSize; - - if (m > 0) { - double proposed_current_rate = m * x_array[ShenandoahRateAccelerationSampleSize - 1] + b; - acceleration = m; - current_rate = proposed_current_rate; - } - // else, leave current_rate = momentary_rate, acceleration = 0 - } - // and here also, leave current_rate = momentary_rate, acceleration = 0 - - double time_delta = get_planned_sleep_interval() + predicted_cycle_time; - size_t words_to_be_consumed = (size_t) (current_rate * time_delta + 0.5 * acceleration * time_delta * time_delta); - return words_to_be_consumed; -} - -ShenandoahAllocationRate::ShenandoahAllocationRate() : - _last_sample_time(os::elapsedTime()), - _last_sample_value(0), - _interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz), - _rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor), - _rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) { -} - -double ShenandoahAllocationRate::force_sample(size_t allocated, size_t &unaccounted_bytes_allocated) { - const double MinSampleTime = 0.002; // Do not sample if time since last update is less than 2 ms - double now = os::elapsedTime(); - double time_since_last_update = now - _last_sample_time; - double rate = 0.0; - if (time_since_last_update < MinSampleTime) { - // If we choose not to sample right now, the unaccounted_bytes_allocated will be added - // into the next sample taken. These unaccounted_bytes_allocated will be added to - // any additional bytes that are allocated during this GC cycle at the time the rate is - // next sampled. We do not overwrite _last_sample_time on this path, because the - // unaccounted_bytes_allocated were allocated following _last_sample_time. - unaccounted_bytes_allocated = allocated - _last_sample_value; - } else { - rate = instantaneous_rate(now, allocated); - _rate.add(rate); - _rate_avg.add(_rate.avg()); - _last_sample_time = now; - unaccounted_bytes_allocated = 0; - } - // force_sample() is called when resetting bytes allocated since gc start. All subsequent - // requests to sample allocated bytes during this GC cycle are measured as a delta from - // _last_sample_value. In the case that we choose not to sample now, we will count the - // unaccounted_bytes_allocated as if they were allocated following the start of this GC - // cycle (but the time span over which these bytes were allocated begins at - // _last_sample_time, which we do not overwrite). - _last_sample_value = 0; - return rate; -} - -double ShenandoahAllocationRate::sample(size_t allocated) { - double now = os::elapsedTime(); - double rate = 0.0; - if (now - _last_sample_time > _interval_sec) { - rate = instantaneous_rate(now, allocated); - _rate.add(rate); - _rate_avg.add(_rate.avg()); - _last_sample_time = now; - _last_sample_value = allocated; - } - return rate; -} - -double ShenandoahAllocationRate::upper_bound(double sds) const { - // Here we are using the standard deviation of the computed running - // average, rather than the standard deviation of the samples that went - // into the moving average. This is a much more stable value and is tied - // to the actual statistic in use (moving average over samples of averages). - return _rate.davg() + (sds * _rate_avg.dsd()); -} - -void ShenandoahAllocationRate::allocation_counter_reset() { - _last_sample_time = os::elapsedTime(); - _last_sample_value = 0; -} - -bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const { - if (rate <= 0.0) { - return false; - } - - double sd = _rate.sd(); - if (sd > 0) { - // There is a small chance that that rate has already been sampled, but it seems not to matter in practice. - // Note that z_score reports how close the rate is to the average. A value between -1 and 1 means we are within one - // standard deviation. A value between -3 and +3 means we are within 3 standard deviations. We only check for z_score - // greater than threshold because we are looking for an allocation spike which is greater than the mean. - double z_score = (rate - _rate.avg()) / sd; - if (z_score > threshold) { +bool ShenandoahAdaptiveHeuristics::trigger_learning(size_t available, size_t capacity) { + // Check if we need to learn a bit about the application + if (_gc_times_learned < ShenandoahLearningSteps) { + const size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold; + if (available < init_threshold) { + log_trigger("Learning %zu of %zu. Free (" PROPERFMT ") is below initial threshold (" PROPERFMT ")", + _gc_times_learned + 1, ShenandoahLearningSteps, PROPERFMTARGS(available), PROPERFMTARGS(init_threshold)); + accept_trigger_with_type(OTHER); return true; } } return false; } -double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const { - assert(allocated >= _last_sample_value, "Must be"); - assert(time > _last_sample_time, "Must be"); - return (allocated - _last_sample_value) / (time - _last_sample_time); +bool ShenandoahAdaptiveHeuristics::trigger_average_allocation_rate(const ShenandoahAnticipatedConsumption& rate, const size_t allocatable_bytes) { + if (rate.baseline_consumption() > allocatable_bytes) { + log_trigger("Anticipated GC duration (%.2f ms) is above the time for average allocation rate (" PROPERFMT_F "/s)" + " to deplete free headroom (" PROPERFMT ") (margin of error = %.2f)", + rate.duration_seconds() * 1000, + PROPERFMT_F_ARGS(rate.baseline_rate()), PROPERFMTARGS(allocatable_bytes), _margin_of_error_sd); + accept_trigger_with_type(RATE); + return true; + } + return false; } + +// Note that even a single thread that wakes up and begins to allocate excessively can manifest as accelerating allocation +// rate. This thread will initially allocate a TLAB of minimum size. Then it will allocate a TLAB twice as big a bit later, +// and then twice as big again after another short delay. When a phase change causes many threads to increase their +// allocation behavior, this effect is multiplied, and compounded by jitter in the times that individual threads experience +// the phase change. +// +// The following trace represents an actual workload, with allocation rates sampled at 10 Hz, the default behavior before +// introduction of accelerated allocation rate detection. Though the allocation rate is seen to be increasing at times +// 101.907 and 102.007 and 102.108, the newly sampled allocation rate is not enough to trigger GC because the headroom is +// still quite large. In fact, GC is not triggered until time 102.409s, and this GC degenerates. +// +// Sample Time (s) Allocation Rate (MB/s) Headroom (GB) +// 101.807 0.0 26.93 +// <--- accelerated spike can trigger here, around time 101.9s +// 101.907 477.6 26.85 +// 102.007 3,206.0 26.35 +// 102.108 23,797.8 24.19 +// 102.208 24,164.5 21.83 +// 102.309 23,965.0 19.47 +// 102.409 24,624.35 17.05 <--- without accelerated rate detection, we trigger here +// +// Though the above measurements are from actual workload, the following details regarding sampled allocation rates at 3ms +// period were not measured directly for this run-time sample. These are hypothetical, though they represent a plausible +// result that correlates with the actual measurements. +// +// For most of the 100 ms time span that precedes the sample at 101.907, the allocation rate still remains at zero. The phase +// change that causes increasing allocations occurs near the end ot this time segment. When sampled with a 3 ms period, +// acceleration of allocation can be triggered at approximately time 101.88s. +// +// In the default configuration, accelerated allocation rate is detected by examining a sequence of 8 allocation rate samples. +// +// Even a single allocation rate sample above the norm can be interpreted as acceleration of allocation rate. For example, +// the best-fit line for the following samples has an acceleration rate of 3,553.3 MB/s/s. This is not enough to trigger GC, +// especially given the abundance of Headroom at this moment in time. +// +// TimeStamp (s) Alloc rate (MB/s) +// 101.857 0 +// 101.860 0 +// 101.863 0 +// 101.866 0 +// 101.869 53.3 +// +// At the next sample time, we will compute a slightly higher acceleration, 9,150 MB/s/s. This is also insufficient to trigger +// GC. +// +// TimeStamp (s) Alloc rate (MB/s) +// 101.860 0 +// 101.863 0 +// 101.866 0 +// 101.869 53.3 +// 101.872 110.6 +// +// Eventually, we will observe a full history of accelerating rate samples, computing acceleration of 18,500 MB/s/s. This will +// trigger GC over 500 ms earlier than was previously possible. +// +// TimeStamp (s) Alloc rate (MB/s) +// 101.866 0 +// 101.869 53.3 +// 101.872 110.6 +// 101.875 165.9 +// 101.878 221.2 +// +// The accelerated rate heuristic is based on the following idea: +// +// Assume allocation rate is accelerating at a constant rate. If we postpone the spike trigger until the subsequent +// sample point, will there be enough memory to satisfy allocations that occur during the anticipated concurrent GC +// cycle? If not, we should trigger right now. +// +// Outline of this heuristic triggering technique: +// +// 1. We remember the N (e.g. N=3) most recent samples of spike allocation rate r0, r1, r2 samples at t0, t1, and t2 +// 2. if r1 < r0 or r2 < r1, approximate Acceleration = 0.0, Rate = Average(r0, r1, r2) +// 3. Otherwise, use least squares method to compute best-fit line of rate vs time +// 4. The slope of this line represents Acceleration. The y-intercept of this line represents "initial rate" +// 5. Use r2 to represent CurrentRate +// 6. Use Consumption = CurrentRate * GCTime + 1/2 * Acceleration * GCTime * GCTime +// (See High School physics discussions on constant acceleration: D = v0 * t + 1/2 * a * t^2) +// 7. if Consumption exceeds headroom, trigger now +// +// Though larger sample size may improve quality of predictor, it also delays trigger response. Smaller sample sizes +// are more susceptible to false triggers based on random noise. The default configuration uses a sample size of 8 and +// a sample period of roughly 15 ms, spanning approximately 120 ms of execution. +bool ShenandoahAdaptiveHeuristics::trigger_accelerating_allocation_rate(const ShenandoahAnticipatedConsumption& rate, const size_t allocatable_bytes) { + if (rate.momentary_consumption() > allocatable_bytes) { + assert(rate.accelerated_consumption() == 0, "Momentary trigger is meant to exclude acceleration trigger"); + log_trigger("Momentary spike consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at " + "current rate (" PROPERFMT_F "/s) for anticipated GC duration (%.2f ms)", + PROPERFMTARGS(rate.momentary_consumption()), PROPERFMTARGS(allocatable_bytes), + PROPERFMT_F_ARGS(rate.momentary_rate()), rate.duration_seconds() * 1000); + accept_trigger_with_type(RATE); + return true; + } + + if (rate.accelerated_consumption() > allocatable_bytes) { + assert(rate.momentary_consumption() == 0, "Acceleration trigger is meant to exclude momentary trigger"); + log_trigger("Accelerated consumption (" PROPERFMT ") exceeds free headroom (" PROPERFMT ") at " + "current rate (" PROPERFMT_F "/s) with acceleration (" PROPERFMT_F "/s/s) for anticipated GC duration (%.2f ms)", + PROPERFMTARGS(rate.accelerated_consumption()), PROPERFMTARGS(allocatable_bytes), + PROPERFMT_F_ARGS(rate.predicted_rate()), PROPERFMT_F_ARGS(rate.acceleration()), rate.duration_seconds() * 1000); + accept_trigger_with_type(RATE); + return true; + } + + return false; +} + +void ShenandoahAdaptiveHeuristics::maybe_log_rate_trigger_parameters(const ShenandoahAnticipatedConsumption &consumption, + size_t allocatable_bytes) const { + if (log_is_enabled(Debug, gc, sampling)) { + log_debug(gc, sampling)( + "%s: Anticipated cycle duration: %.3fs, head room: " PROPERFMT ", margin of error: %.3f " + "Baseline consumption: " PROPERFMT ", Baseline rate: " PROPERFMT_F "/s, " + "Momentary consumption: " PROPERFMT ", Momentary rate: " PROPERFMT_F "/s, " + "Accelerated consumption: " PROPERFMT ", Predicted rate: " PROPERFMT_F "/s, Acceleration: %.3f", + _space_info->name(), consumption.duration_seconds(), PROPERFMTARGS(allocatable_bytes), _margin_of_error_sd, + PROPERFMTARGS(consumption.baseline_consumption()), PROPERFMT_F_ARGS(consumption.baseline_rate()), + PROPERFMTARGS(consumption.momentary_consumption()), PROPERFMT_F_ARGS(consumption.momentary_rate()), + PROPERFMTARGS(consumption.accelerated_consumption()), PROPERFMT_F_ARGS(consumption.predicted_rate()), consumption.acceleration() + ); + } +} + +void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) { + _margin_of_error_sd = clamp(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); + log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd); +} + +size_t ShenandoahAdaptiveHeuristics::min_free_threshold(size_t capacity) const { + return capacity / 100 * ShenandoahMinFreeThreshold; +} + +#undef PROPERFMT_F +#undef PROPERFMT_F_ARGS diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp index c761f2a82f3..75eb3a7facb 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved. * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -27,71 +27,11 @@ #define SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHADAPTIVEHEURISTICS_HPP #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" -#include "gc/shenandoah/shenandoahFreeSet.hpp" +#include "gc/shenandoah/shenandoahAllocRate.hpp" +#include "gc/shenandoah/shenandoahCycleDuration.hpp" #include "gc/shenandoah/shenandoahPhaseTimings.hpp" -#include "gc/shenandoah/shenandoahRegulatorThread.hpp" -#include "gc/shenandoah/shenandoahSharedVariables.hpp" -#include "memory/allocation.hpp" #include "utilities/numberSeq.hpp" -/** - * ShenandoahAllocationRate maintains a truncated history of recently sampled allocation rates for the purpose of providing - * informed estimates of current and future allocation rates based on weighted averages and standard deviations of the - * truncated history. More recently sampled allocations are weighted more heavily than older samples when computing - * averages and standard deviations. - */ -class ShenandoahAllocationRate : public CHeapObj { - public: - explicit ShenandoahAllocationRate(); - - // Reset the _last_sample_value to zero, _last_sample_time to current time. - void allocation_counter_reset(); - - // Force an allocation rate sample to be taken, even if the time since last sample is not greater than - // 1s/ShenandoahAdaptiveSampleFrequencyHz, except when current_time - _last_sample_time < MinSampleTime (2 ms). - // The sampled allocation rate is computed from (allocated - _last_sample_value) / (current_time - _last_sample_time). - // Return the newly computed rate if the sample is taken, zero if it is not an appropriate time to add a sample. - // In the case that a new sample is not taken, overwrite unaccounted_bytes_allocated with bytes allocated since - // the previous sample was taken (allocated - _last_sample_value). Otherwise, overwrite unaccounted_bytes_allocated - // with 0. - double force_sample(size_t allocated, size_t &unaccounted_bytes_allocated); - - // Add an allocation rate sample if the time since last sample is greater than 1s/ShenandoahAdaptiveSampleFrequencyHz. - // The sampled allocation rate is computed from (allocated - _last_sample_value) / (current_time - _last_sample_time). - // Return the newly computed rate if the sample is taken, zero if it is not an appropriate time to add a sample. - double sample(size_t allocated); - - // Return an estimate of the upper bound on allocation rate, with the upper bound computed as the weighted average - // of recently sampled instantaneous allocation rates added to sds times the standard deviation computed for the - // sequence of recently sampled average allocation rates. - double upper_bound(double sds) const; - - // Test whether rate significantly diverges from the computed average allocation rate. If so, return true. - // Otherwise, return false. Significant divergence is recognized if (rate - _rate.avg()) / _rate.sd() > threshold. - bool is_spiking(double rate, double threshold) const; - - private: - - // Return the instantaneous rate calculated from (allocated - _last_sample_value) / (time - _last_sample_time). - // Return Sentinel value 0.0 if (time - _last_sample_time) == 0 or if (allocated <= _last_sample_value). - double instantaneous_rate(double time, size_t allocated) const; - - // Time at which previous allocation rate sample was collected. - double _last_sample_time; - - // Bytes allocated as of the time at which previous allocation rate sample was collected. - size_t _last_sample_value; - - // The desired interval of time between consecutive samples of the allocation rate. - double _interval_sec; - - // Holds a sequence of the most recently sampled instantaneous allocation rates - TruncatedSeq _rate; - - // Holds a sequence of the most recently computed weighted average of allocation rates, with each weighted average - // computed immediately after an instantaneous rate was sampled - TruncatedSeq _rate_avg; -}; /* * The adaptive heuristic tracks the allocation behavior and average cycle @@ -106,38 +46,18 @@ class ShenandoahAllocationRate : public CHeapObj { */ class ShenandoahAdaptiveHeuristics : public ShenandoahHeuristics { public: - ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info); + explicit ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info); - virtual ~ShenandoahAdaptiveHeuristics(); + void initialize() override; - virtual void initialize() override; - - virtual void post_initialize() override; - - virtual void adjust_penalty(intx step) override; + void post_initialize() override; // At the end of GC(N), we idle GC until necessary to start the next GC. Compute the threshold of memory that can be allocated // before we need to start the next GC. void start_idle_span() override; - // Having observed a new allocation rate sample, add this to the acceleration history so that we can determine if allocation - // rate is accelerating. - void add_rate_to_acceleration_history(double timestamp, double rate); - - // Compute and return the current allocation rate, the current rate of acceleration, and the amount of memory that we expect - // to consume if we start GC right now and gc takes predicted_cycle_time to complete. - size_t accelerated_consumption(double& acceleration, double& current_rate, - double avg_rate_words_per_sec, double predicted_cycle_time) const; - - - void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, - RegionData* data, size_t size, - size_t actual_free) override; - - void record_cycle_start() override; void record_success_concurrent() override; void record_degenerated() override; - void record_success_full() override; bool should_start_gc() override; @@ -145,47 +65,33 @@ public: bool is_diagnostic() override { return false; } bool is_experimental() override { return false; } + // In preparation for a span during which GC will be idle, compute the headroom adjustment that will be used to + // detect when GC needs to trigger. + void compute_headroom_adjustment() override; + private: - // These are used to adjust the margin of error and the spike threshold - // in response to GC cycle outcomes. These values are shared, but the - // margin of error and spike threshold trend in opposite directions. - const static double FULL_PENALTY_SD; - const static double DEGENERATE_PENALTY_SD; - - const static double MINIMUM_CONFIDENCE; - const static double MAXIMUM_CONFIDENCE; - - const static double LOWEST_EXPECTED_AVAILABLE_AT_END; - const static double HIGHEST_EXPECTED_AVAILABLE_AT_END; - - const static size_t GC_TIME_SAMPLE_SIZE; - - friend class ShenandoahAllocationRate; - - // Used to record the last trigger that signaled to start a GC. - // This itself is used to decide whether or not to adjust the margin of - // error for the average cycle time and allocation rate or the allocation - // spike detection threshold. - enum Trigger { - SPIKE, RATE, OTHER - }; - - void adjust_last_trigger_parameters(double amount); void adjust_margin_of_error(double amount); - void adjust_spike_threshold(double amount); - // Returns number of words that can be allocated before we need to trigger next GC, given available in bytes. - inline size_t allocatable(size_t available) const { - return (available > _headroom_adjustment)? (available - _headroom_adjustment) / HeapWordSize: 0; + // Returns number of bytes that can be allocated before we need to trigger next GC, given available in bytes. + size_t allocatable(size_t available) const { + return available > _headroom_adjustment ? available - _headroom_adjustment : 0; } protected: - ShenandoahAllocationRate _allocation_rate; + void adjust_penalty(intx step) override; + void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, + RegionData* data, size_t size, + size_t actual_free) override; - // Invocations of should_start_gc() happen approximately once per ms. Queries of allocation rate only happen if a - // a certain amount of time has passed since the previous query. - size_t _allocated_at_previous_query; - double _time_of_previous_allocation_query; + + ShenandoahCycleDuration _cycles; + + // Used to record the last trigger that signaled to start a GC. + // This itself is used to decide whether to adjust the margin of + // error for the average cycle time. + enum Trigger { + RATE, OTHER + }; // The margin of error expressed in standard deviations to add to our // average cycle time and allocation rate. As this value increases we @@ -194,18 +100,9 @@ protected: // concurrent GCs. double _margin_of_error_sd; - // The allocation spike threshold is expressed in standard deviations. - // If the standard deviation of the most recent sample of the allocation - // rate exceeds this threshold, a GC cycle is started. As this value - // decreases the sensitivity to allocation spikes increases. In other - // words, lowering the spike threshold will tend to increase the number - // of concurrent GCs. - double _spike_threshold_sd; - // Remember which trigger is responsible for the last GC cycle. When the // outcome of the cycle is evaluated we will adjust the parameters for the - // corresponding triggers. Note that successful outcomes will raise - // the spike threshold and lower the margin of error. + // corresponding triggers. Trigger _last_trigger; // Keep track of the available memory at the end of a GC cycle. This @@ -213,67 +110,29 @@ protected: // source of feedback to adjust trigger parameters. TruncatedSeq _available; - ShenandoahFreeSet* _free_set; - - // This represents the time at which the allocation rate was most recently sampled for the purpose of detecting acceleration. - double _previous_acceleration_sample_timestamp; - size_t _total_allocations_at_start_of_idle; - // bytes of headroom at which we should trigger GC size_t _headroom_adjustment; - // Keep track of GC_TIME_SAMPLE_SIZE most recent concurrent GC cycle times - uint _gc_time_first_sample_index; - uint _gc_time_num_samples; - double* const _gc_time_timestamps; - double* const _gc_time_samples; - double* const _gc_time_xy; // timestamp * sample - double* const _gc_time_xx; // timestamp squared - double _gc_time_sum_of_timestamps; - double _gc_time_sum_of_samples; - double _gc_time_sum_of_xy; - double _gc_time_sum_of_xx; - - double _gc_time_m; // slope - double _gc_time_b; // y-intercept - double _gc_time_sd; // sd on deviance from prediction - - // In preparation for a span during which GC will be idle, compute the headroom adjustment that will be used to - // detect when GC needs to trigger. - void compute_headroom_adjustment() override; - - void add_gc_time(double timestamp_at_start, double duration); void add_degenerated_gc_time(double timestamp_at_start, double duration); - double predict_gc_time(double timestamp_at_start); - - // Keep track of SPIKE_ACCELERATION_SAMPLE_SIZE most recent spike allocation rate measurements. Note that it is - // typical to experience a small spike following end of GC cycle, as mutator threads refresh their TLABs. But - // there is generally an abundance of memory at this time as well, so this will not generally trigger GC. - uint _spike_acceleration_buffer_size; - uint _spike_acceleration_first_sample_index; - uint _spike_acceleration_num_samples; - double* const _spike_acceleration_rate_samples; // holds rates in words/second - double* const _spike_acceleration_rate_timestamps; // A conservative minimum threshold of free space that we'll try to maintain when possible. // For example, we might trigger a concurrent gc if we are likely to drop below // this threshold, or we might consider this when dynamically resizing generations // in the generational case. Controlled by global flag ShenandoahMinFreeThreshold. - size_t min_free_threshold(); + size_t min_free_threshold(size_t capacity) const; void accept_trigger_with_type(Trigger trigger_type) { _last_trigger = trigger_type; - ShenandoahHeuristics::accept_trigger(); + accept_trigger(); } -public: - // Sample the allocation rate at GC trigger time if possible. Return the number of allocated bytes that were - // not accounted for in the sample. This must be called before resetting bytes allocated since gc start. - size_t force_alloc_rate_sample(size_t bytes_allocated) override { - size_t unaccounted_bytes; - _allocation_rate.force_sample(bytes_allocated, unaccounted_bytes); - return unaccounted_bytes; - } + bool trigger_min_free_threshold(size_t available, size_t capacity); + bool trigger_learning(size_t available, size_t capacity); + bool trigger_average_allocation_rate(const ShenandoahAnticipatedConsumption& rate, size_t allocatable_bytes); + bool trigger_accelerating_allocation_rate(const ShenandoahAnticipatedConsumption& rate, size_t allocatable_bytes); + +private: + void maybe_log_rate_trigger_parameters(const ShenandoahAnticipatedConsumption & consumption, size_t allocatable_bytes) const; }; #endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHADAPTIVEHEURISTICS_HPP diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp index 28673b28612..4f5a750a1bf 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.cpp @@ -23,11 +23,11 @@ * */ - #include "gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" +#include "gc/shenandoah/shenandoahUtils.hpp" #include "logging/log.hpp" #include "logging/logTag.hpp" @@ -46,29 +46,27 @@ ShenandoahCompactHeuristics::ShenandoahCompactHeuristics(ShenandoahSpaceInfo* sp } bool ShenandoahCompactHeuristics::should_start_gc() { - size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); - size_t available = _space_info->soft_mutator_available(); - size_t bytes_allocated = _space_info->bytes_allocated_since_gc_start(); + const size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); + const size_t available = _space_info->soft_mutator_available(); + const size_t bytes_allocated = estimate_bytes_allocated_since_gc_start(); log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", " "allocated_since_gc_start: " PROPERFMT, PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(bytes_allocated)); - size_t threshold_bytes_allocated = capacity / 100 * ShenandoahAllocationThreshold; - size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold; + const size_t threshold_bytes_allocated = capacity / 100 * ShenandoahAllocationThreshold; + const size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold; if (available < min_threshold) { - log_trigger("Free (Soft) (%zu%s) is below minimum threshold (%zu%s)", - byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), - byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold)); + log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")", + PROPERFMTARGS(available), PROPERFMTARGS(min_threshold)); accept_trigger(); return true; } if (bytes_allocated > threshold_bytes_allocated) { - log_trigger("Allocated since last cycle (%zu%s) is larger than allocation threshold (%zu%s)", - byte_size_in_proper_unit(bytes_allocated), proper_unit_for_byte_size(bytes_allocated), - byte_size_in_proper_unit(threshold_bytes_allocated), proper_unit_for_byte_size(threshold_bytes_allocated)); + log_trigger("Allocated since last cycle started (" PROPERFMT ") is larger than allocation threshold (" PROPERFMT ")", + PROPERFMTARGS(bytes_allocated), PROPERFMTARGS(threshold_bytes_allocated)); accept_trigger(); return true; } @@ -80,21 +78,27 @@ void ShenandoahCompactHeuristics::choose_collection_set_from_regiondata(Shenando RegionData* data, size_t size, size_t actual_free) { // Do not select too large CSet that would overflow the available free space - size_t max_cset = actual_free * 3 / 4; + const size_t max_cset = actual_free * 3 / 4; - log_info(gc, ergo)("CSet Selection. Actual Free: %zu%s, Max CSet: %zu%s", - byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free), - byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset)); - - size_t threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100; + log_info(gc, ergo)("CSet Selection. Actual Free: " PROPERFMT ", Max CSet: " PROPERFMT, + PROPERFMTARGS(actual_free), PROPERFMTARGS(max_cset)); + const size_t threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100; size_t live_cset = 0; for (size_t idx = 0; idx < size; idx++) { ShenandoahHeapRegion* r = data[idx].get_region(); - size_t new_cset = live_cset + r->get_live_data_bytes(); + const size_t new_cset = live_cset + r->get_live_data_bytes(); if (new_cset < max_cset && r->garbage() > threshold) { live_cset = new_cset; cset->add_region(r); } } } + +size_t ShenandoahCompactHeuristics::estimate_bytes_allocated_since_gc_start() const { + ShenandoahHeap* heap = ShenandoahHeap::heap(); + const double average_allocation_rate = heap->alloc_rate().weighted_average(); + const double now = os::elapsedTime(); + const double elapsed_seconds = now - cycle_start_time_seconds(); + return shenandoah_safe_size_cast(average_allocation_rate * elapsed_seconds); +} diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp index a32c9c88478..55d62d2f707 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahCompactHeuristics.hpp @@ -1,5 +1,6 @@ /* - * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,14 +37,17 @@ public: explicit ShenandoahCompactHeuristics(ShenandoahSpaceInfo* space_info); bool should_start_gc() override; + const char* name() override { return "Compact"; } + bool is_diagnostic() override { return false; } + bool is_experimental() override { return false; } +protected: void choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, RegionData* data, size_t size, size_t actual_free) override; - const char* name() override { return "Compact"; } - bool is_diagnostic() override { return false; } - bool is_experimental() override { return false; } +private: + size_t estimate_bytes_allocated_since_gc_start() const; }; #endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHCOMPACTHEURISTICS_HPP diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp index 840459288c3..c880af7fd49 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp @@ -1,6 +1,6 @@ /* * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,7 @@ */ #include "gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp" +#include "gc/shenandoah/shenandoahAllocRate.inline.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahGeneration.hpp" @@ -32,6 +33,7 @@ #include "gc/shenandoah/shenandoahInPlacePromoter.hpp" #include "gc/shenandoah/shenandoahOldGeneration.hpp" #include "gc/shenandoah/shenandoahTrace.hpp" +#include "gc/shenandoah/shenandoahUtils.hpp" #include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "logging/log.hpp" #include "utilities/quickSort.hpp" @@ -48,10 +50,16 @@ static int compare_by_aged_live(AgedRegionData a, AgedRegionData b) { void ShenandoahGenerationalHeuristics::post_initialize() { ShenandoahHeuristics::post_initialize(); - _free_set = ShenandoahHeap::heap()->free_set(); compute_headroom_adjustment(); } +void ShenandoahGenerationalHeuristics::record_cycle_end() { + ShenandoahAdaptiveHeuristics::record_cycle_end(); + + ShenandoahAllocationRate& alloc_rate = ShenandoahHeap::heap()->alloc_rate(); + alloc_rate.update_minimum_sample_size(_space_info->soft_mutator_available()); +} + inline void assert_no_in_place_promotions() { #ifdef ASSERT class ShenandoahNoInPlacePromotions : public ShenandoahHeapRegionClosure { @@ -373,7 +381,7 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap* ShenandoahYoungGeneration* const young_generation = heap->young_generation(); const size_t old_evacuated = collection_set->get_live_bytes_in_old_regions(); - size_t old_evacuated_committed = (size_t) (ShenandoahOldEvacWaste * double(old_evacuated)); + size_t old_evacuated_committed = shenandoah_safe_size_cast(ShenandoahOldEvacWaste * static_cast(old_evacuated)); size_t old_evacuation_reserve = old_generation->get_evacuation_reserve(); if (old_evacuated_committed > old_evacuation_reserve) { @@ -391,11 +399,11 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap* old_generation->set_evacuation_reserve(old_evacuation_reserve); } - size_t young_advance_promoted = collection_set->get_live_bytes_in_tenurable_regions(); - size_t young_advance_promoted_reserve_used = (size_t) (ShenandoahPromoEvacWaste * double(young_advance_promoted)); + const double young_advance_promoted = collection_set->get_live_bytes_in_tenurable_regions(); + size_t young_advance_promoted_reserve_used = shenandoah_safe_size_cast(ShenandoahPromoEvacWaste * young_advance_promoted); - size_t young_evacuated = collection_set->get_live_bytes_in_untenurable_regions(); - size_t young_evacuated_reserve_used = (size_t) (ShenandoahEvacWaste * double(young_evacuated)); + const double young_evacuated = collection_set->get_live_bytes_in_untenurable_regions(); + const size_t young_evacuated_reserve_used = shenandoah_safe_size_cast(ShenandoahEvacWaste * young_evacuated); // In top_off_collection_set(), we shrunk planned future reserve by _add_regions_to_old * region_size_bytes, but we // didn't shrink available. The current reserve is not affected by the planned future reserve. Current available is diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp index 8ea5cdb36c8..a0e4ab78d5c 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp @@ -55,6 +55,9 @@ public: void post_initialize() override; + void record_cycle_end() override; + +protected: // Wraps budget computation, subclass region selection, budget adjustment, and tracing. void choose_collection_set_from_regiondata(ShenandoahCollectionSet* set, RegionData* data, size_t data_size, diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp index 9452e8b28cb..e4ac576aa6f 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp @@ -25,10 +25,11 @@ #include "gc/shenandoah/heuristics/shenandoahGlobalHeuristics.hpp" #include "gc/shenandoah/shenandoahAsserts.hpp" -#include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahGenerationalHeap.inline.hpp" #include "gc/shenandoah/shenandoahGlobalGeneration.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" +#include "gc/shenandoah/shenandoahUtils.hpp" +#include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "utilities/quickSort.hpp" bool ShenandoahEvacuationBudget::try_reserve(size_t bytes) { @@ -248,17 +249,17 @@ void ShenandoahGlobalCSetBudget::assert_budget_constraints_hold(size_t original_ assert(young_evac.live_bytes() * young_evac.waste_factor() <= young_evac.reserve() + young_evac.region_count(), "Young evac consumption (%zu) exceeds reserve (%zu) + region count (%zu)", - (size_t)(young_evac.live_bytes() * young_evac.waste_factor()), + shenandoah_safe_size_cast(young_evac.live_bytes() * young_evac.waste_factor()), young_evac.reserve(), young_evac.region_count()); assert(old_evac.live_bytes() * old_evac.waste_factor() <= old_evac.reserve() + old_evac.region_count(), "Old evac consumption (%zu) exceeds reserve (%zu) + region count (%zu)", - (size_t)(old_evac.live_bytes() * old_evac.waste_factor()), + shenandoah_safe_size_cast(old_evac.live_bytes() * old_evac.waste_factor()), old_evac.reserve(), old_evac.region_count()); assert(promo.live_bytes() * promo.waste_factor() <= promo.reserve() + promo.region_count(), "Promo consumption (%zu) exceeds reserve (%zu) + region count (%zu)", - (size_t)(promo.live_bytes() * promo.waste_factor()), + shenandoah_safe_size_cast(promo.live_bytes() * promo.waste_factor()), promo.reserve(), promo.region_count()); size_t total_post_reserves = young_evac.reserve() + old_evac.reserve() + promo.reserve(); diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp index d2010d921b1..2f247db2951 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved. * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,10 +26,12 @@ #include "gc/shared/gcCause.hpp" #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" +#include "gc/shenandoah/shenandoahAllocRate.inline.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp" #include "gc/shenandoah/shenandoahTrace.hpp" +#include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "logging/log.hpp" #include "logging/logTag.hpp" #include "runtime/globals_extension.hpp" @@ -60,7 +62,6 @@ ShenandoahHeuristics::ShenandoahHeuristics(ShenandoahSpaceInfo* space_info) : _last_cycle_end(0), _gc_times_learned(0), _gc_time_penalties(0), - _gc_cycle_time_history(new TruncatedSeq(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor)), _metaspace_oom() { size_t num_regions = ShenandoahHeap::heap()->num_regions(); @@ -174,6 +175,12 @@ void ShenandoahHeuristics::record_cycle_start() { void ShenandoahHeuristics::record_cycle_end() { _last_cycle_end = os::elapsedTime(); + + ShenandoahHeap* heap = ShenandoahHeap::heap(); + if (!heap->mode()->is_generational()) { + const size_t available = _space_info->soft_mutator_available(); + heap->alloc_rate().update_minimum_sample_size(available); + } } bool ShenandoahHeuristics::should_start_gc() { @@ -247,7 +254,6 @@ void ShenandoahHeuristics::log_trigger(const char* fmt, ...) { } void ShenandoahHeuristics::record_success_concurrent() { - _gc_cycle_time_history->add(elapsed_cycle_time()); _gc_times_learned++; adjust_penalty(Concurrent_Adjust); diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp index 9066cdfccac..3f3b3898f54 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2026, Red Hat, Inc. All rights reserved. * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -83,7 +83,7 @@ private: double _most_recent_planned_sleep_interval; protected: - static const uint Moving_Average_Samples = 10; // Number of samples to store in moving averages + static constexpr uint Moving_Average_Samples = 10; // Number of samples to store in moving averages bool _start_gc_is_pending; // True denotes that GC has been triggered, so no need to trigger again. size_t _declined_trigger_count; // This counts how many times since previous GC finished that this @@ -181,7 +181,6 @@ protected: size_t _gc_times_learned; intx _gc_time_penalties; - TruncatedSeq* _gc_cycle_time_history; // There may be many threads that contend to set this flag ShenandoahSharedFlag _metaspace_oom; @@ -230,6 +229,10 @@ public: // Default implementation does nothing. } + double cycle_start_time_seconds() const { + return _cycle_start; + } + virtual void record_cycle_start(); void record_degenerated_cycle_start(bool out_of_cycle); @@ -278,11 +281,6 @@ public: double elapsed_cycle_time() const; double elapsed_degenerated_cycle_time() const; - virtual size_t force_alloc_rate_sample(size_t bytes_allocated) { - // do nothing - return 0; - } - // Format prefix and emit log message indicating a GC cycle hs been triggered void log_trigger(const char* fmt, ...) ATTRIBUTE_PRINTF(2, 3); diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp index 765061a43ed..85c5d9fb2fb 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp @@ -44,11 +44,6 @@ public: virtual size_t available() const = 0; virtual size_t used() const = 0; - // Return an approximation of the bytes allocated since GC start. The value returned is monotonically non-decreasing - // in time within each GC cycle. For certain GC cycles, the value returned may include some bytes allocated before - // the start of the current GC cycle. - virtual size_t bytes_allocated_since_gc_start() const = 0; - // Return true if this region belongs to this space. virtual bool contains(ShenandoahHeapRegion* region) const = 0; }; diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp index 5f384f3dc73..98d679f86d9 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahStaticHeuristics.cpp @@ -40,14 +40,11 @@ ShenandoahStaticHeuristics::ShenandoahStaticHeuristics(ShenandoahSpaceInfo* spac bool ShenandoahStaticHeuristics::should_start_gc() { size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); size_t available = _space_info->soft_mutator_available(); - size_t allocated = _space_info->bytes_allocated_since_gc_start(); - log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT ", " - "allocated_since_gc_start: " PROPERFMT, - PROPERFMTARGS(available), PROPERFMTARGS(capacity), PROPERFMTARGS(allocated)); + log_debug(gc, ergo)("should_start_gc calculation: available: " PROPERFMT ", soft_max_capacity: " PROPERFMT, + PROPERFMTARGS(available), PROPERFMTARGS(capacity)); size_t threshold_available = capacity / 100 * ShenandoahMinFreeThreshold; - if (available < threshold_available) { log_trigger("Free (Soft) (" PROPERFMT ") is below minimum threshold (" PROPERFMT ")", PROPERFMTARGS(available), PROPERFMTARGS(threshold_available)); diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp index 27aa9a47510..28007637759 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.cpp @@ -1,6 +1,6 @@ /* * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,13 +23,17 @@ * */ +#include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahOldHeuristics.hpp" #include "gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp" -#include "gc/shenandoah/shenandoahCollectorPolicy.hpp" +#include "gc/shenandoah/shenandoahAllocRate.inline.hpp" #include "gc/shenandoah/shenandoahGenerationalHeap.inline.hpp" +#include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" #include "gc/shenandoah/shenandoahOldGeneration.hpp" #include "gc/shenandoah/shenandoahYoungGeneration.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/quickSort.hpp" ShenandoahYoungHeuristics::ShenandoahYoungHeuristics(ShenandoahYoungGeneration* generation) @@ -80,7 +84,7 @@ void ShenandoahYoungHeuristics::choose_young_collection_set(ShenandoahCollection // If this is mixed evacuation, the old-gen candidate regions have already been added. size_t cur_cset = 0; size_t cur_young_garbage = cset->garbage(); - const size_t max_cset = (size_t) (heap->young_generation()->get_evacuation_reserve() / ShenandoahEvacWaste); + const size_t max_cset = shenandoah_safe_size_cast(heap->young_generation()->get_evacuation_reserve() / ShenandoahEvacWaste); const size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset; const size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0; @@ -113,38 +117,24 @@ void ShenandoahYoungHeuristics::choose_young_collection_set(ShenandoahCollection } } - -bool ShenandoahYoungHeuristics::should_start_gc() { - auto heap = ShenandoahGenerationalHeap::heap(); - ShenandoahOldGeneration* old_generation = heap->old_generation(); - ShenandoahOldHeuristics* old_heuristics = old_generation->heuristics(); - - // Checks that an old cycle has run for at least ShenandoahMinimumOldTimeMs before allowing a young cycle. +bool ShenandoahYoungHeuristics::old_collection_needs_more_time(ShenandoahOldGeneration* old_generation, ShenandoahOldHeuristics* old_heuristics) { if (ShenandoahMinimumOldTimeMs > 0) { if (old_generation->is_preparing_for_mark() || old_generation->is_concurrent_mark_in_progress()) { - size_t old_time_elapsed = size_t(old_heuristics->elapsed_cycle_time() * 1000); - if (old_time_elapsed < ShenandoahMinimumOldTimeMs) { - // Do not decline_trigger() when waiting for minimum quantum of Old-gen marking. It is not at our discretion - // to trigger at this time. - log_debug(gc)("Young heuristics declines to trigger because old_time_elapsed < ShenandoahMinimumOldTimeMs"); - return false; - } + const auto old_time_elapsed = shenandoah_safe_size_cast(old_heuristics->elapsed_cycle_time() * 1000); + return old_time_elapsed < ShenandoahMinimumOldTimeMs; } } + return false; +} - // inherited triggers have already decided to start a cycle, so no further evaluation is required - if (ShenandoahAdaptiveHeuristics::should_start_gc()) { - // ShenandoahAdaptiveHeuristics::should_start_gc() has already accepted trigger, or declined it. - return true; - } - +bool ShenandoahYoungHeuristics::trigger_expedite_promotions(ShenandoahGenerationalHeap* heap, ShenandoahOldGeneration* old_generation) { // Get through promotions and mixed evacuations as quickly as possible. These cycles sometimes require significantly // more time than traditional young-generation cycles so start them up as soon as possible. This is a "mitigation" // for the reality that old-gen and young-gen activities are not truly "concurrent". If there is old-gen work to // be done, we start up the young-gen GC threads so they can do some of this old-gen work. As implemented, promotion // gets priority over old-gen marking. - size_t promo_expedite_threshold = percent_of(heap->young_generation()->max_capacity(), ShenandoahExpeditePromotionsThreshold); - size_t promo_potential = old_generation->get_promotion_potential(); + const size_t promo_expedite_threshold = percent_of(heap->young_generation()->max_capacity(), ShenandoahExpeditePromotionsThreshold); + const size_t promo_potential = old_generation->get_promotion_potential(); if (promo_potential > promo_expedite_threshold) { // Detect unsigned arithmetic underflow assert(promo_potential < heap->capacity(), "Sanity"); @@ -152,8 +142,11 @@ bool ShenandoahYoungHeuristics::should_start_gc() { accept_trigger(); return true; } + return false; +} - size_t mixed_candidates = old_heuristics->unprocessed_old_collection_candidates(); +bool ShenandoahYoungHeuristics::trigger_expedite_mixed(ShenandoahGenerationalHeap* heap, ShenandoahOldHeuristics* old_heuristics) { + const size_t mixed_candidates = old_heuristics->unprocessed_old_collection_candidates(); if (mixed_candidates > ShenandoahExpediteMixedThreshold && !heap->is_concurrent_weak_root_in_progress()) { // We need to run young GC in order to open up some free heap regions so we can finish mixed evacuations. // If concurrent weak root processing is in progress, it means the old cycle has chosen mixed collection @@ -163,6 +156,33 @@ bool ShenandoahYoungHeuristics::should_start_gc() { accept_trigger(); return true; } + return false; +} + +bool ShenandoahYoungHeuristics::should_start_gc() { + auto heap = ShenandoahGenerationalHeap::heap(); + ShenandoahOldGeneration* old_generation = heap->old_generation(); + ShenandoahOldHeuristics* old_heuristics = old_generation->heuristics(); + + // Checks that an old cycle has run for at least ShenandoahMinimumOldTimeMs before allowing a young cycle. + if (old_collection_needs_more_time(old_generation, old_heuristics)) { + log_debug(gc)("Young heuristics declines to trigger because old_time_elapsed < ShenandoahMinimumOldTimeMs"); + return false; + } + + if (ShenandoahAdaptiveHeuristics::should_start_gc()) { + // Inherited triggers have already decided to start a cycle, so no further evaluation is required + // ShenandoahAdaptiveHeuristics::should_start_gc() has already accepted trigger, or declined it. + return true; + } + + if (trigger_expedite_promotions(heap, old_generation)) { + return true; + } + + if (trigger_expedite_mixed(heap, old_heuristics)) { + return true; + } // Don't decline_trigger() here That was done in ShenandoahAdaptiveHeuristics::should_start_gc() return false; @@ -173,20 +193,16 @@ bool ShenandoahYoungHeuristics::should_start_gc() { // generation at the end of the current cycle (as represented by young_regions_to_be_reclaimed) and on the anticipated // amount of time required to perform a GC. size_t ShenandoahYoungHeuristics::bytes_of_allocation_runway_before_gc_trigger(size_t young_regions_to_be_reclaimed) { - size_t capacity = _space_info->max_capacity(); - size_t usage = _space_info->used(); - size_t available = (capacity > usage)? capacity - usage: 0; - size_t allocated = _free_set->get_bytes_allocated_since_gc_start(); - size_t anticipated_available = available + young_regions_to_be_reclaimed * ShenandoahHeapRegion::region_size_bytes(); + const size_t capacity = _space_info->max_capacity(); + const size_t usage = _space_info->used(); + const size_t available = (capacity > usage) ? capacity - usage: 0; + const size_t anticipated_available = available + young_regions_to_be_reclaimed * ShenandoahHeapRegion::region_size_bytes(); - size_t spike_headroom = capacity * ShenandoahAllocSpikeFactor / 100; - size_t penalties = capacity * _gc_time_penalties / 100; + const size_t spike_headroom = capacity * ShenandoahAllocSpikeFactor / 100; + const size_t penalties = capacity * _gc_time_penalties / 100; - double rate = _allocation_rate.sample(allocated); - - // At what value of available, would avg and spike triggers occur? + // At what value of available, would avg rate trigger occur? // if allocation_headroom < avg_cycle_time * avg_alloc_rate, then we experience avg trigger - // if allocation_headroom < avg_cycle_time * rate, then we experience spike trigger if is_spiking // // allocation_headroom = // 0, if penalties > available or if penalties + spike_headroom > available @@ -199,34 +215,19 @@ size_t ShenandoahYoungHeuristics::bytes_of_allocation_runway_before_gc_trigger(s // since avg_cycle_time * avg_alloc_rate > 0, the first test is sufficient to test both conditions // // thus, evac_slack_avg is MIN2(0, available - avg_cycle_time * avg_alloc_rate + penalties + spike_headroom) - // - // similarly, evac_slack_spiking is MIN2(0, available - avg_cycle_time * rate + penalties + spike_headroom) - // but evac_slack_spiking is only relevant if is_spiking, as defined below. - double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); - double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); + const double avg_cycle_time = _cycles.predict_duration(os::elapsedTime(), _margin_of_error_sd); + const double avg_alloc_rate = ShenandoahHeap::heap()->alloc_rate().upper_bound(_margin_of_error_sd); + const double remaining_before_gc = avg_cycle_time * avg_alloc_rate + penalties + spike_headroom; size_t evac_slack_avg; - if (anticipated_available > avg_cycle_time * avg_alloc_rate + penalties + spike_headroom) { - evac_slack_avg = anticipated_available - (avg_cycle_time * avg_alloc_rate + penalties + spike_headroom); + if (anticipated_available > remaining_before_gc) { + evac_slack_avg = shenandoah_safe_size_cast(anticipated_available - remaining_before_gc); } else { // we have no slack because it's already time to trigger evac_slack_avg = 0; } - bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); - size_t evac_slack_spiking; - if (is_spiking) { - if (anticipated_available > avg_cycle_time * rate + penalties + spike_headroom) { - evac_slack_spiking = anticipated_available - (avg_cycle_time * rate + penalties + spike_headroom); - } else { - // we have no slack because it's already time to trigger - evac_slack_spiking = 0; - } - } else { - evac_slack_spiking = evac_slack_avg; - } - - size_t threshold = min_free_threshold(); - size_t evac_min_threshold = (anticipated_available > threshold)? anticipated_available - threshold: 0; - return MIN3(evac_slack_spiking, evac_slack_avg, evac_min_threshold); + const size_t threshold = min_free_threshold(capacity); + const size_t evac_min_threshold = anticipated_available > threshold ? anticipated_available - threshold : 0; + return MIN2(evac_slack_avg, evac_min_threshold); } diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp index 8fabc40693c..723fb631e75 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp @@ -27,6 +27,8 @@ #include "gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp" class ShenandoahYoungGeneration; +class ShenandoahOldGeneration; +class ShenandoahOldHeuristics; /* * This is a specialization of the generational heuristic which chooses @@ -37,20 +39,26 @@ class ShenandoahYoungHeuristics : public ShenandoahGenerationalHeuristics { public: explicit ShenandoahYoungHeuristics(ShenandoahYoungGeneration* generation); - - void select_collection_set_regions(ShenandoahCollectionSet* cset, - RegionData* data, size_t size, - size_t actual_free) override; - bool should_start_gc() override; size_t bytes_of_allocation_runway_before_gc_trigger(size_t young_regions_to_be_reclaimed); +protected: + void select_collection_set_regions(ShenandoahCollectionSet* cset, + RegionData* data, size_t size, + size_t actual_free) override; + private: void choose_young_collection_set(ShenandoahCollectionSet* cset, const RegionData* data, size_t size, size_t actual_free) const; + bool old_collection_needs_more_time(ShenandoahOldGeneration* old_generation, + ShenandoahOldHeuristics* old_heuristics); + + bool trigger_expedite_promotions(ShenandoahGenerationalHeap* heap, ShenandoahOldGeneration* old_generation); + + bool trigger_expedite_mixed(ShenandoahGenerationalHeap* heap, ShenandoahOldHeuristics* old_heuristics); }; #endif // SHARE_GC_SHENANDOAH_HEURISTICS_SHENANDOAHYOUNGHEURISTICS_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp new file mode 100644 index 00000000000..3d0e9d63fb5 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp @@ -0,0 +1,175 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP + +#include "gc/shenandoah/shenandoahWeightedSeq.hpp" +#include "runtime/atomic.hpp" +#include "runtime/mutex.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/os.hpp" +#include "utilities/globalDefinitions.hpp" + +class ShenandoahAllocationClock { +public: + static jlong elapsed_counter() { + return os::elapsed_counter(); + } + + static jlong elapsed_frequency() { + return os::elapsed_frequency(); + } +}; + +// Snapshot values used by heuristic triggers to avoid lock contention +struct ShenandoahAnticipatedConsumption { + template friend class ShenandoahAllocRate; + explicit ShenandoahAnticipatedConsumption(double duration_seconds) + : _duration_seconds(duration_seconds) + , _baseline(0.0) + , _momentary(0.0) + , _acceleration(0.0) + , _predicted_rate(0.0) { + } + + // Anticipated duration in seconds of next gc cycle + double duration_seconds() const { + return _duration_seconds; + } + + // Consumption in bytes based on baseline allocation rate for the next gc cycle + size_t baseline_consumption() const; + double baseline_rate() const { + return _baseline; + } + + // Consumption in bytes based on momentary allocation rate for the next gc cycle + size_t momentary_consumption() const; + double momentary_rate() const { + return _momentary; + } + + // Consumption in bytes based on an accelerating allocation rate for the next gc cycle + size_t accelerated_consumption() const; + + // The acceleration of the allocation rate (based on slope of linear regression) + double acceleration() const { + return _acceleration; + } + + // Predicated allocation rate based on weighted linear regression + double predicted_rate() const { + return _predicted_rate; + } + +private: + double _duration_seconds; + double _baseline; + double _momentary; + double _acceleration; + double _predicted_rate; +}; + + +// This class tracks three moving averages of the allocation rate: +// 1. Momentary: this is the shortest and acts as a sort of 'spike' detector +// 2. Recent: larger than momentary, these samples are used to detect 'acceleration' of the rate +// 3. Baseline: the largest sample window, this is meant to establish the baseline allocation rate +// +// Samples are taken whenever the accumulating count of bytes allocated exceeds the +// minimum sample size. The minimum sample size is generally derived from the heap +// capacity. The thinking is that larger heaps require less frequent sampling. Note +// that as the allocation rate increases, the timeliness of the averages and other +// estimates increases. +template +class ShenandoahAllocRate { + static constexpr size_t ALLOC_SAMPLE_PORTION = 128; + static constexpr size_t ALLOC_SAMPLE_MIN = M; + static constexpr size_t ALLOC_SAMPLE_MAX = G; + + PaddedMonitor _sample_lock; + Atomic _allocated_bytes_since_last_sample; + Atomic _minimum_sample_size; // bytes, read by mutator, updated by gc + jlong _last_sample_time; + + ShenandoahWeightedSeq _baseline; + ShenandoahWeightedSeq _recent; + ShenandoahWeightedSeq _momentary; + +public: + explicit ShenandoahAllocRate(const uint minimum_sample_size = ALLOC_SAMPLE_MIN, + const uint baseline_window_size = ShenandoahAllocRateSampleWindow, + const uint recent_window_size = ShenandoahRecentAllocRateSampleWindow, + const uint momentary_window_size = ShenandoahMomentaryAllocRateSampleWindow) + : _sample_lock(Mutex::nosafepoint - 2, "ShenandoahAllocSample_lock", true) + , _allocated_bytes_since_last_sample(0) + , _minimum_sample_size(minimum_sample_size) + , _last_sample_time(Clock::elapsed_counter()) + , _baseline(baseline_window_size) + , _recent(recent_window_size) + , _momentary(momentary_window_size) + { + } + + // Update minimum sample size based on the given available bytes + void update_minimum_sample_size(size_t available); + + // Set minimum sample size in bytes + void set_minimum_sample_size(const size_t minimum_sample_size) { + _minimum_sample_size.store_relaxed(minimum_sample_size); + } + + // Indicate that this many bytes have been allocated (by the mutator). + void allocated(size_t allocated_bytes); + + // Returns a snapshot of the parameters necessary to evaluate allocation rate triggers. + // Note that momentary consumption and accelerated consumption may both be zero, but may + // not both be non-zero. The `time_delta` parameter is the anticipated duration of the + // next gc cycle. The `standard_deviations` parameter is the margin of error applied to + // the baseline allocation rate expressed as a multiple of the standard deviation. + ShenandoahAnticipatedConsumption snapshot(double time_delta, double standard_deviations); + + // Returns the weighted average of the samples. + double weighted_average() { + MonitorLocker locker(&_sample_lock, Mutex::_no_safepoint_check_flag); + return _baseline.weighted_average(); + } + + // Returns the upper bound of the confidence interval about the mean in terms of the given deviation. + double upper_bound(const double standard_deviations) { + MonitorLocker locker(&_sample_lock, Mutex::_no_safepoint_check_flag); + return upper_bound_no_lock(standard_deviations); + } + +private: + double upper_bound_no_lock(const double standard_deviations) const { + assert(_sample_lock.is_locked(), "Caller must hold lock"); + return _baseline.weighted_average() + standard_deviations * _baseline.weighted_sd(); + } +}; + +typedef ShenandoahAllocRate<> ShenandoahAllocationRate; + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp new file mode 100644 index 00000000000..42b639de165 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp @@ -0,0 +1,120 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_INLINE_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_INLINE_HPP + +#include "gc/shenandoah/shenandoahAllocRate.hpp" + +#include "gc/shenandoah/shenandoahUtils.hpp" +#include "logging/log.hpp" + + +inline size_t ShenandoahAnticipatedConsumption::baseline_consumption() const { + return shenandoah_safe_size_cast(_baseline * _duration_seconds); +} + +inline size_t ShenandoahAnticipatedConsumption::momentary_consumption() const { + return shenandoah_safe_size_cast(_momentary * _duration_seconds); +} + +inline size_t ShenandoahAnticipatedConsumption::accelerated_consumption() const { + const double consumption = _predicted_rate * _duration_seconds + 0.5 * _acceleration * _duration_seconds * _duration_seconds; + return shenandoah_safe_size_cast(consumption); +} + +template +void ShenandoahAllocRate::update_minimum_sample_size(const size_t available) { + const size_t min_sample_size = clamp(available / ALLOC_SAMPLE_PORTION, ALLOC_SAMPLE_MIN, ALLOC_SAMPLE_MAX); + log_info(gc, ergo)("Adjust minimum allocation sample size to: " PROPERFMT, PROPERFMTARGS(min_sample_size)); + set_minimum_sample_size(min_sample_size); +} + +template +void ShenandoahAllocRate::allocated(const size_t allocated_bytes) { + size_t unsampled = _allocated_bytes_since_last_sample.add_then_fetch(allocated_bytes); + const size_t minimum_sample_size = _minimum_sample_size.load_relaxed(); + if (unsampled < minimum_sample_size) { + // Not enough to sample yet + return; + } + + if (!_sample_lock.try_lock()) { + // Another thread has the lock and will take the sample + return; + } + + unsampled = _allocated_bytes_since_last_sample.load_relaxed(); + if (unsampled < minimum_sample_size) { + // Another thread has sampled and reset the allocated bytes under the lock + _sample_lock.unlock(); + return; + } + + const jlong now = Clock::elapsed_counter(); + const jlong elapsed = now - _last_sample_time; + + if (elapsed <= 0) { + // Avoid sampling nonsense allocation rates + _sample_lock.unlock(); + return; + } + + _last_sample_time = now; + + // We are recording this sample, deduct it from the counter. It may be increased + // concurrently by other threads outside the lock, so we still use an atomic access. + _allocated_bytes_since_last_sample.sub_then_fetch(unsampled); + + const double timestamp = static_cast(_last_sample_time) / Clock::elapsed_frequency(); + const double rate_seconds = static_cast(unsampled) * Clock::elapsed_frequency() / elapsed; + + _baseline.add(timestamp, rate_seconds); + _recent.add(timestamp, rate_seconds); + _momentary.add(timestamp, rate_seconds); + + _sample_lock.unlock(); + + log_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp); +} + +template +ShenandoahAnticipatedConsumption ShenandoahAllocRate::snapshot(const double time_delta, const double standard_deviations) { + ShenandoahAnticipatedConsumption result(time_delta); + MonitorLocker locker(&_sample_lock, Mutex::_no_safepoint_check_flag); + + result._baseline = upper_bound_no_lock(standard_deviations); + + if (_recent.weighted_average() <= _baseline.weighted_average()) { + // We are not accelerating, just use the momentary average. + result._momentary = _momentary.weighted_average(); + } else { + result._acceleration = _recent.slope(); + result._predicted_rate = _recent.predict_y(_recent.last()); + } + + return result; +} + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_INLINE_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp index 5ce131b3c80..0a2beea34ce 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -193,7 +193,7 @@ void ShenandoahArguments::initialize() { // TLAB sizing policy makes resizing decisions before each GC cycle. It averages // historical data, assigning more recent data the weight according to TLABAllocationWeight. // Current default is good for generational collectors that run frequent young GCs. - // With Shenandoah, GC cycles are much less frequent, so we need we need sizing policy + // With Shenandoah, GC cycles are much less frequent, so we need sizing policy // to converge faster over smaller number of resizing decisions. if (strcmp(ShenandoahGCMode, "generational") && FLAG_IS_DEFAULT(TLABAllocationWeight)) { FLAG_SET_DEFAULT(TLABAllocationWeight, 90); @@ -202,7 +202,7 @@ void ShenandoahArguments::initialize() { if (GCCardSizeInBytes < ShenandoahMinCardSizeInBytes) { vm_exit_during_initialization( - err_msg("GCCardSizeInBytes ( %u ) must be >= %u\n", GCCardSizeInBytes, (unsigned int) ShenandoahMinCardSizeInBytes)); + err_msg("GCCardSizeInBytes ( %u ) must be >= %u\n", GCCardSizeInBytes, ShenandoahMinCardSizeInBytes)); } // Gen shen does not support any ShenandoahGCHeuristics value except for the default "adaptive" @@ -213,6 +213,16 @@ void ShenandoahArguments::initialize() { FLAG_SET_ERGO(ShenandoahGCHeuristics, "adaptive"); } + if (ShenandoahMomentaryAllocRateSampleWindow > ShenandoahRecentAllocRateSampleWindow + || ShenandoahRecentAllocRateSampleWindow > ShenandoahAllocRateSampleWindow) { + vm_exit_during_initialization( + err_msg("Relation must hold: ShenandoahMomentaryAllocRateSampleWindow (%u) " + "<= ShenandoahRecentAllocRateSampleWindow (%u) " + "<= ShenandoahAllocRateSampleWindow (%u)", + ShenandoahMomentaryAllocRateSampleWindow, ShenandoahRecentAllocRateSampleWindow, + ShenandoahAllocRateSampleWindow)); + } + FullGCForwarding::initialize_flags(MaxHeapSize); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp index 53e12711a13..f2447db8210 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahControlThread.cpp @@ -138,11 +138,6 @@ void ShenandoahControlThread::run_service() { heuristics->cancel_trigger_request(); - if (mode != stw_degenerated) { - // If mode is stw_degenerated, count bytes allocated from the start of the conc GC that experienced alloc failure. - heap->reset_bytes_allocated_since_gc_start(); - } - MetaspaceCombinedStats meta_sizes = MetaspaceUtils::get_combined_statistics(); // If GC was requested, we are sampling the counters even without actual triggers diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.cpp new file mode 100644 index 00000000000..86d0a08bafe --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.cpp @@ -0,0 +1,53 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "gc/shenandoah/shenandoahCycleDuration.hpp" +#include "logging/log.hpp" +#include "logging/logTag.hpp" +#include "runtime/mutexLocker.hpp" + +#include + + +ShenandoahCycleDuration::ShenandoahCycleDuration(uint size) + : _gc_times_lock(Mutex::nosafepoint - 2, "ShenandoahCycleTimes_lock", true) + , _gc_times(size) {} + +void ShenandoahCycleDuration::record_duration(double timestamp_at_start, double duration) { + log_debug(gc, sampling)("Cycle started at: %.3f, completed in %.3fs", timestamp_at_start, duration); + MonitorLocker locker(&_gc_times_lock, Mutex::_no_safepoint_check_flag); + _gc_times.add(timestamp_at_start, duration); +} + +double ShenandoahCycleDuration::predict_duration(double timestamp_at_start, double margin_of_error) { + MonitorLocker locker(&_gc_times_lock, Mutex::_no_safepoint_check_flag); + + const double prediction = _gc_times.predict_y(timestamp_at_start); + if (std::isfinite(prediction) && prediction > 0.0) { + return prediction + _gc_times.residual_sd() * margin_of_error; + } + + // return average time, rather than negative or zero time + return _gc_times.average() + _gc_times.sd() * margin_of_error; +} diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.hpp new file mode 100644 index 00000000000..8bfcc7c3748 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahCycleDuration.hpp @@ -0,0 +1,47 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHCYCLEDURATION_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAHCYCLEDURATION_HPP + +#include "gc/shenandoah/shenandoahWeightedSeq.hpp" +#include "runtime/mutex.hpp" + +class ShenandoahCycleDuration { + // To enable detection of GC time trends, we keep separate track of the recent history of gc time. During initialization, + // for example, the amount of live memory may be increasing, which is likely to cause the GC times to increase. This history + // allows us to predict increasing GC times rather than always assuming average recent GC time is the best predictor. + static constexpr uint GC_TIME_SAMPLE_SIZE = 15; + + // Written by control thread, read by regulator thread + Monitor _gc_times_lock; + ShenandoahWeightedSeq _gc_times; + +public: + explicit ShenandoahCycleDuration(uint size = GC_TIME_SAMPLE_SIZE); + void record_duration(double timestamp_at_start, double duration); + double predict_duration(double timestamp_at_start, double margin_of_error); +}; + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAHCYCLEDURATION_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp index a6ed56f6fea..9bbbada0be1 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.cpp @@ -289,32 +289,6 @@ void ShenandoahFreeSet::resize_old_collector_capacity(size_t regions) { // else, old generation is already appropriately sized } - -void ShenandoahFreeSet::reset_bytes_allocated_since_gc_start(size_t initial_bytes_allocated) { - shenandoah_assert_heaplocked(); - // Future inquiries of get_total_bytes_allocated() will return the sum of - // _total_bytes_previously_allocated and _mutator_bytes_allocated_since_gc_start. - // Since _mutator_bytes_allocated_since_gc_start does not start at zero, we subtract initial_bytes_allocated so as - // to not double count these allocated bytes. - size_t original_mutator_bytes_allocated_since_gc_start = _mutator_bytes_allocated_since_gc_start; - - // Setting _mutator_bytes_allocated_since_gc_start before _total_bytes_previously_allocated reduces the damage - // in the case that the control or regulator thread queries get_bytes_allocated_since_previous_sample() between - // the two assignments. - // - // These are not declared as volatile so the compiler or hardware may reorder the assignments. The implementation of - // get_bytes_allocated_since_previous_cycle() is robust to this possibility, as are triggering heuristics. The current - // implementation assumes we are better off to tolerate the very rare race rather than impose a synchronization penalty - // on every update and fetch. (Perhaps it would be better to make the opposite tradeoff for improved maintainability.) - _mutator_bytes_allocated_since_gc_start = initial_bytes_allocated; - _total_bytes_previously_allocated += original_mutator_bytes_allocated_since_gc_start - initial_bytes_allocated; -} - -void ShenandoahFreeSet::increase_bytes_allocated(size_t bytes) { - shenandoah_assert_heaplocked(); - _mutator_bytes_allocated_since_gc_start += bytes; -} - inline idx_t ShenandoahRegionPartitions::leftmost(ShenandoahFreeSetPartitionId which_partition) const { assert (which_partition < NumPartitions, "selected free partition must be valid"); idx_t idx = _leftmosts[int(which_partition)]; @@ -1229,8 +1203,6 @@ inline void ShenandoahRegionPartitions::assert_bounds_sanity() { ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _heap(heap), _partitions(max_regions, this), - _total_bytes_previously_allocated(0), - _mutator_bytes_at_last_sample(0), _total_humongous_waste(0), _alloc_bias_weight(0), _total_young_used(0), @@ -1242,8 +1214,7 @@ ShenandoahFreeSet::ShenandoahFreeSet(ShenandoahHeap* heap, size_t max_regions) : _young_unaffiliated_regions(0), _global_unaffiliated_regions(0), _total_young_regions(0), - _total_global_regions(0), - _mutator_bytes_allocated_since_gc_start(0) + _total_global_regions(0) { clear_internal(); } @@ -1660,7 +1631,6 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah if (req.is_mutator_alloc()) { assert(req.is_young(), "Mutator allocations always come from young generation."); _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, req.actual_size() * HeapWordSize); - increase_bytes_allocated(req.actual_size() * HeapWordSize); } else { assert(req.is_gc_alloc(), "Should be gc_alloc since req wasn't mutator alloc"); @@ -1699,7 +1669,7 @@ HeapWord* ShenandoahFreeSet::try_allocate_in(ShenandoahHeapRegion* r, Shenandoah size_t waste_bytes = _partitions.retire_from_partition(orig_partition, idx, r->used()); DEBUG_ONLY(boundary_changed = true;) if (req.is_mutator_alloc() && (waste_bytes > 0)) { - increase_bytes_allocated(waste_bytes); + req.set_waste(waste_bytes / HeapWordSize); } } @@ -1871,15 +1841,9 @@ HeapWord* ShenandoahFreeSet::allocate_contiguous(ShenandoahAllocRequest& req, bo } } _partitions.decrease_empty_region_counts(ShenandoahFreeSetPartitionId::Mutator, num); - if (waste_bytes > 0) { - // For humongous allocations, waste_bytes are included in total_used. Since this is not humongous, - // we need to account separately for the waste_bytes. - increase_bytes_allocated(waste_bytes); - } } _partitions.increase_used(ShenandoahFreeSetPartitionId::Mutator, total_used); - increase_bytes_allocated(total_used); req.set_actual_size(words_size); // If !is_humongous, the "waste" is made availabe for new allocation if (waste_bytes > 0) { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp index 21668a5d1d1..083dd551aaa 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFreeSet.hpp @@ -441,9 +441,6 @@ private: ShenandoahHeap* const _heap; ShenandoahRegionPartitions _partitions; - size_t _total_bytes_previously_allocated; - size_t _mutator_bytes_at_last_sample; - // Temporarily holds mutator_Free allocatable bytes between prepare_to_rebuild() and finish_rebuild() size_t _prepare_to_rebuild_mutator_free; @@ -519,8 +516,6 @@ private: size_t _total_young_regions; size_t _total_global_regions; - size_t _mutator_bytes_allocated_since_gc_start; - // If only affiliation changes are promote-in-place and generation sizes have not changed, // we have AffiliatedChangesAreGlobalNeutral // If only affiliation changes are non-empty regions moved from Mutator to Collector and young size has not changed, @@ -668,37 +663,6 @@ public: return _partitions.shrink_interval_if_range_modifies_either_boundary(partition, low_idx, high_idx, num_regions); } - void reset_bytes_allocated_since_gc_start(size_t initial_bytes_allocated); - - void increase_bytes_allocated(size_t bytes); - - // Return an approximation of the bytes allocated since GC start. The value returned is monotonically non-decreasing - // in time within each GC cycle. For certain GC cycles, the value returned may include some bytes allocated before - // the start of the current GC cycle. - inline size_t get_bytes_allocated_since_gc_start() const { - return _mutator_bytes_allocated_since_gc_start; - } - - inline size_t get_total_bytes_allocated() { - return _mutator_bytes_allocated_since_gc_start + _total_bytes_previously_allocated; - } - - inline size_t get_bytes_allocated_since_previous_sample() { - const size_t total_bytes_allocated = get_total_bytes_allocated(); - // total_bytes_allocated could overflow (wraps around) size_t in rare condition, we are relying on - // wrap-around arithmetic of size_t type to produce meaningful result when total_bytes_allocated overflows - // its 64-bit counter. The expression below is equivalent to code: - // if (total_bytes < _mutator_bytes_at_last_sample) { - // // overflow - // return total_bytes + (SIZE_T_MAX - _mutator_bytes_at_last_sample) + 1; - // } else { - // return total_bytes - _mutator_bytes_at_last_sample; - // } - const size_t result = total_bytes_allocated - _mutator_bytes_at_last_sample; - _mutator_bytes_at_last_sample = total_bytes_allocated; - return result; - } - // Public because ShenandoahRegionPartitions assertions require access. inline size_t alloc_capacity(ShenandoahHeapRegion *r) const; inline size_t alloc_capacity(size_t idx) const; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp index bc2028d077d..b3a48f85114 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp @@ -256,11 +256,6 @@ void ShenandoahGenerationalControlThread::run_gc_cycle(const ShenandoahGCRequest GCIdMark gc_id_mark; - if ((gc_mode() != servicing_old) && (gc_mode() != stw_degenerated)) { - // If mode is stw_degenerated, count bytes allocated from the start of the conc GC that experienced alloc failure. - _heap->reset_bytes_allocated_since_gc_start(); - } - MetaspaceCombinedStats meta_sizes = MetaspaceUtils::get_combined_statistics(); // If GC was requested, we are sampling the counters even without actual triggers diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp index a072fe2db06..a23812227ba 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.cpp @@ -28,8 +28,9 @@ #include "gc/shenandoah/shenandoahGlobalGeneration.hpp" #include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" +#include "gc/shenandoah/shenandoahOldGeneration.hpp" #include "gc/shenandoah/shenandoahUtils.hpp" -#include "gc/shenandoah/shenandoahVerifier.hpp" +#include "gc/shenandoah/shenandoahYoungGeneration.hpp" const char* ShenandoahGlobalGeneration::name() const { @@ -49,10 +50,6 @@ size_t ShenandoahGlobalGeneration::used() const { return _free_set->global_used(); } -size_t ShenandoahGlobalGeneration::bytes_allocated_since_gc_start() const { - return _free_set->get_bytes_allocated_since_gc_start(); -} - size_t ShenandoahGlobalGeneration::get_affiliated_region_count() const { return _free_set->global_affiliated_regions(); } @@ -61,7 +58,6 @@ size_t ShenandoahGlobalGeneration::get_humongous_waste() const { return _free_set->total_humongous_waste(); } - size_t ShenandoahGlobalGeneration::used_regions() const { return _free_set->global_affiliated_regions(); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp index 9f9e4818a95..8b323d6ee47 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahGlobalGeneration.hpp @@ -26,8 +26,6 @@ #define SHARE_VM_GC_SHENANDOAH_SHENANDOAHGLOBALGENERATION_HPP #include "gc/shenandoah/shenandoahGeneration.hpp" -#include "gc/shenandoah/shenandoahOldGeneration.hpp" -#include "gc/shenandoah/shenandoahYoungGeneration.hpp" // A "generation" that represents the whole heap. class ShenandoahGlobalGeneration : public ShenandoahGeneration { @@ -46,7 +44,6 @@ public: public: const char* name() const override; - size_t bytes_allocated_since_gc_start() const override; size_t used() const override; size_t used_regions() const override; size_t used_regions_size() const override; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 2bedc53e24b..bc48543d113 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -42,6 +42,7 @@ #include "gc/shenandoah/mode/shenandoahGenerationalMode.hpp" #include "gc/shenandoah/mode/shenandoahPassiveMode.hpp" #include "gc/shenandoah/mode/shenandoahSATBMode.hpp" +#include "gc/shenandoah/shenandoahAllocRate.inline.hpp" #include "gc/shenandoah/shenandoahAllocRequest.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahClosures.inline.hpp" @@ -822,8 +823,7 @@ bool ShenandoahHeap::check_soft_max_changed() { size_t new_soft_max = AtomicAccess::load(&SoftMaxHeapSize); size_t old_soft_max = soft_max_capacity(); if (new_soft_max != old_soft_max) { - new_soft_max = MAX2(min_capacity(), new_soft_max); - new_soft_max = MIN2(max_capacity(), new_soft_max); + new_soft_max = clamp(new_soft_max, min_capacity(), max_capacity()); if (new_soft_max != old_soft_max) { log_info(gc)("Soft Max Heap Size: %zu%s -> %zu%s", byte_size_in_proper_unit(old_soft_max), proper_unit_for_byte_size(old_soft_max), @@ -1033,35 +1033,41 @@ HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req HeapWord* result = _free_set->allocate(req, in_new_region); // Record the plab configuration for this result and register the object. - if (result != nullptr && req.is_old()) { - if (req.is_lab_alloc()) { - old_generation()->configure_plab_for_current_thread(req); - } else { - // Register the newly allocated object while we're holding the global lock since there's no synchronization - // built in to the implementation of register_object(). There are potential races when multiple independent - // threads are allocating objects, some of which might span the same card region. For example, consider - // a card table's memory region within which three objects are being allocated by three different threads: - // - // objects being "concurrently" allocated: - // [-----a------][-----b-----][--------------c------------------] - // [---- card table memory range --------------] - // - // Before any objects are allocated, this card's memory range holds no objects. Note that allocation of object a - // wants to set the starts-object, first-start, and last-start attributes of the preceding card region. - // Allocation of object b wants to set the starts-object, first-start, and last-start attributes of this card region. - // Allocation of object c also wants to set the starts-object, first-start, and last-start attributes of this - // card region. - // - // The thread allocating b and the thread allocating c can "race" in various ways, resulting in confusion, such as - // last-start representing object b while first-start represents object c. This is why we need to require all - // register_object() invocations to be "mutually exclusive" with respect to each card's memory range. - old_generation()->card_scan()->register_object(result); + if (result != nullptr) { + if (req.is_mutator_alloc()) { + _alloc_rate.allocated((req.actual_size() + req.waste()) * HeapWordSize); + } - if (req.is_promotion()) { - // Shared promotion. - const size_t actual_size = req.actual_size() * HeapWordSize; - log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size); - old_generation()->expend_promoted(actual_size); + if (req.is_old()) { + if (req.is_lab_alloc()) { + old_generation()->configure_plab_for_current_thread(req); + } else { + // Register the newly allocated object while we're holding the global lock since there's no synchronization + // built in to the implementation of register_object(). There are potential races when multiple independent + // threads are allocating objects, some of which might span the same card region. For example, consider + // a card table's memory region within which three objects are being allocated by three different threads: + // + // objects being "concurrently" allocated: + // [-----a------][-----b-----][--------------c------------------] + // [---- card table memory range --------------] + // + // Before any objects are allocated, this card's memory range holds no objects. Note that allocation of object a + // wants to set the starts-object, first-start, and last-start attributes of the preceding card region. + // Allocation of object b wants to set the starts-object, first-start, and last-start attributes of this card region. + // Allocation of object c also wants to set the starts-object, first-start, and last-start attributes of this + // card region. + // + // The thread allocating b and the thread allocating c can "race" in various ways, resulting in confusion, such as + // last-start representing object b while first-start represents object c. This is why we need to require all + // register_object() invocations to be "mutually exclusive" with respect to each card's memory range. + old_generation()->card_scan()->register_object(result); + + if (req.is_promotion()) { + // Shared promotion. + const size_t actual_size = req.actual_size() * HeapWordSize; + log_debug(gc, plab)("Expend shared promotion of %zu bytes", actual_size); + old_generation()->expend_promoted(actual_size); + } } } } @@ -2312,7 +2318,7 @@ void ShenandoahHeap::stop() { // Step 2. Wait until GC worker exits normally (this will cancel any ongoing GC). control_thread()->stop(); - // Stop 4. Shutdown uncommit thread. + // Step 3. Shutdown uncommit thread. if (_uncommit_thread != nullptr) { _uncommit_thread->stop(); } @@ -2423,27 +2429,6 @@ address ShenandoahHeap::in_cset_fast_test_addr() { return (address) heap->collection_set()->biased_map_address(); } -void ShenandoahHeap::reset_bytes_allocated_since_gc_start() { - // It is important to force_alloc_rate_sample() before the associated generation's bytes_allocated has been reset. - // Note that we obtain heap lock to prevent additional allocations between sampling bytes_allocated_since_gc_start() - // and reset_bytes_allocated_since_gc_start() - { - ShenandoahHeapLocker locker(lock()); - // unaccounted_bytes is the bytes not accounted for by our forced sample. If the sample interval is too short, - // the "forced sample" will not happen, and any recently allocated bytes are "unaccounted for". We pretend these - // bytes are allocated after the start of subsequent gc. - size_t unaccounted_bytes; - size_t bytes_allocated = _free_set->get_bytes_allocated_since_gc_start(); - if (mode()->is_generational()) { - unaccounted_bytes = young_generation()->heuristics()->force_alloc_rate_sample(bytes_allocated); - } else { - // Single-gen Shenandoah uses global heuristics. - unaccounted_bytes = heuristics()->force_alloc_rate_sample(bytes_allocated); - } - _free_set->reset_bytes_allocated_since_gc_start(unaccounted_bytes); - } -} - void ShenandoahHeap::set_degenerated_gc_in_progress(bool in_progress) { _degenerated_gc_in_progress.set_cond(in_progress); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp index fa435eaa1be..6e0070b35ab 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp @@ -30,6 +30,7 @@ #include "gc/shared/collectedHeap.hpp" #include "gc/shared/markBitMap.hpp" #include "gc/shenandoah/mode/shenandoahMode.hpp" +#include "gc/shenandoah/shenandoahAllocRate.hpp" #include "gc/shenandoah/shenandoahAllocRequest.hpp" #include "gc/shenandoah/shenandoahAsserts.hpp" #include "gc/shenandoah/shenandoahController.hpp" @@ -227,12 +228,12 @@ private: Atomic _committed; shenandoah_padding(1); + ShenandoahAllocationRate _alloc_rate; + public: void increase_committed(size_t bytes); void decrease_committed(size_t bytes); - void reset_bytes_allocated_since_gc_start(); - size_t min_capacity() const; size_t max_capacity() const override; size_t soft_max_capacity() const; @@ -243,6 +244,10 @@ public: void set_soft_max_capacity(size_t v); + ShenandoahAllocationRate& alloc_rate() { + return _alloc_rate; + } + // ---------- Periodic Tasks // public: diff --git a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp index 1dd38deb4d7..029504f9c23 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.cpp @@ -796,11 +796,6 @@ size_t ShenandoahOldGeneration::used() const { return _free_set->old_used(); } -size_t ShenandoahOldGeneration::bytes_allocated_since_gc_start() const { - assert(ShenandoahHeap::heap()->mode()->is_generational(), "NON_GEN implies not generational"); - return 0; -} - size_t ShenandoahOldGeneration::get_affiliated_region_count() const { return _free_set->old_affiliated_regions(); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.hpp b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.hpp index 942f93c5c68..7e26d800e1d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahOldGeneration.hpp @@ -353,7 +353,6 @@ public: static const char* state_name(State state); - size_t bytes_allocated_since_gc_start() const override; size_t used() const override; size_t used_regions() const override; size_t used_regions_size() const override; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahUtils.cpp b/src/hotspot/share/gc/shenandoah/shenandoahUtils.cpp index 5af2e274833..ac6c826a3f2 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahUtils.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahUtils.cpp @@ -26,15 +26,10 @@ #include "gc/shared/gcCause.hpp" #include "gc/shared/gcTrace.hpp" -#include "gc/shared/gcWhen.hpp" #include "gc/shared/referenceProcessorStats.hpp" -#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" -#include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" -#include "gc/shenandoah/shenandoahOldGeneration.hpp" #include "gc/shenandoah/shenandoahReferenceProcessor.hpp" #include "gc/shenandoah/shenandoahUtils.hpp" -#include "gc/shenandoah/shenandoahYoungGeneration.hpp" #include "jfr/jfrEvents.hpp" #include "utilities/debug.hpp" diff --git a/src/hotspot/share/gc/shenandoah/shenandoahUtils.hpp b/src/hotspot/share/gc/shenandoah/shenandoahUtils.hpp index b6e084c9091..4750a0cb2db 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahUtils.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahUtils.hpp @@ -41,6 +41,9 @@ #include "runtime/vmThread.hpp" #include "services/memoryService.hpp" +#include +#include + class GCTimer; class ShenandoahGeneration; @@ -235,5 +238,20 @@ public: } }; +// Casting a double that cannot be represented as a size_t may result in undefined behavior. +// This small function checks if the given double is representable in a size_t and returns +// that representation if it is. Otherwise, if the double cannot be safely cast to a size_t +// it returns zero. +inline size_t shenandoah_safe_size_cast(const double d) { + static constexpr double size_max_as_double = static_cast(std::numeric_limits::max()); + if (std::isnan(d) || d < 0 || d >= size_max_as_double) { + // NaN is unordered, all comparisons will be false. + // +Inf is always greater than, -Inf is always less than + return 0; + } + return static_cast(d); +} + + #endif // SHARE_GC_SHENANDOAH_SHENANDOAHUTILS_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahWeightedSeq.cpp b/src/hotspot/share/gc/shenandoah/shenandoahWeightedSeq.cpp new file mode 100644 index 00000000000..383b2a54a79 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahWeightedSeq.cpp @@ -0,0 +1,190 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "gc/shenandoah/shenandoahWeightedSeq.hpp" +#include "memory/allocation.hpp" + +#include + +ShenandoahWeightedSeq::ShenandoahWeightedSeq(uint size) +: _size(size), + _first_sample_index(0), + _num_samples(0), + _x_values(NEW_C_HEAP_ARRAY(double, _size, mtGC)), + _y_values(NEW_C_HEAP_ARRAY(double, _size, mtGC)), + _weights(NEW_C_HEAP_ARRAY(double, _size, mtGC)), + _x_origin(0), + _y_origin(0), + _x_sum(0), + _y_sum(0), + _weighted_y_sum(0), + _weighted_sum(0), + _weighted_yy_sum(0), + _xy_sum(0), + _xx_sum(0), + _yy_sum(0), + _slope(0.0), + _y_intercept(0.0), + _residual_sd(0.0) { +} + +ShenandoahWeightedSeq::~ShenandoahWeightedSeq() { + FREE_C_HEAP_ARRAY(_x_values); + FREE_C_HEAP_ARRAY(_y_values); + FREE_C_HEAP_ARRAY(_weights); +} + +void ShenandoahWeightedSeq::add(double x, double y) { + if (_num_samples == 0) { + add(x, y, 0.0); + } else { + const uint index = (_first_sample_index + _num_samples - 1) % _size; + const double weight = x - _x_values[index]; + add(x, y, weight); + } +} + +void ShenandoahWeightedSeq::deduct_oldest_and_rebase(const double x_absolute, const double y_absolute, const double weight) { + // Suppose we want to shift _x_origin by delta. Our accumulators for x + // components are based on the relative value 'x - x_origin', call this 'a'. + // We want to update our accumulators to hold 'a - delta'. + // Our new value for + // updated sum(x) = sum(a - delta) + // = sum(a) - n * delta. + // Similarly + // updated sum(x^2) = sum((a - delta)^2) + // = sum(a^2 - 2 * delta * a + delta^2) + // = sum(a^2) - 2 * delta * sum(a) + n * delta^2 + // Finally + // updated sum(xy) = sum(a - delta) * y + // = sum(xy) - delta * sum(y) + const double x_delta = x_absolute - _x_origin; + const double y_delta = y_absolute - _y_origin; + + // order matters here, we must use old _x_sum + _xx_sum = _xx_sum - 2.0 * x_delta * _x_sum + _num_samples * x_delta * x_delta; + _xy_sum = _xy_sum - x_delta * _y_sum; + _x_sum = _x_sum - _num_samples * x_delta; + _x_origin = x_absolute; + + // similarly, rebase y + _yy_sum = _yy_sum - 2.0 * y_delta * _y_sum + _num_samples * y_delta * y_delta; + _xy_sum = _xy_sum - y_delta * _x_sum; + _y_sum = _y_sum - _num_samples * y_delta; + _y_origin = y_absolute; + + // and our weighted sums + _weighted_yy_sum = _weighted_yy_sum - 2.0 * y_delta * _weighted_y_sum + _weighted_sum * y_delta * y_delta; + _weighted_y_sum = _weighted_y_sum - _weighted_sum * y_delta; + _weighted_sum -= weight; +} + +void ShenandoahWeightedSeq::add_latest(double x_absolute, double y_absolute, double weight) { + const double x_delta = x_absolute - _x_origin; + const double y_delta = y_absolute - _y_origin; + _x_sum += x_delta; + _y_sum += y_delta; + _xy_sum += x_delta * y_delta; + _xx_sum += x_delta * x_delta; + _yy_sum += y_delta * y_delta; + _weighted_sum += weight; + _weighted_y_sum += y_delta * weight; + _weighted_yy_sum += y_delta * y_delta * weight; +} + +void ShenandoahWeightedSeq::add(double x, double y, double weight) { + // Update best-fit linear regression + const uint index = (_first_sample_index + _num_samples) % _size; + if (_num_samples == _size) { + deduct_oldest_and_rebase(_x_values[index], _y_values[index], _weights[index]); + } else if (_num_samples == 0) { + _x_origin = x; + _y_origin = y; + } + + _x_values[index] = x; + _y_values[index] = y; + _weights[index] = weight; + + add_latest(x, y, weight); + + if (_num_samples < _size) { + _num_samples++; + } else { + _first_sample_index = (_first_sample_index + 1) % _size; + } + + const double x_spread = _num_samples * _xx_sum - _x_sum * _x_sum; + if (x_spread <= 0.0 || _num_samples < 2) { + // All samples are the sample point, can't make a line + _slope = 0; + _y_intercept = y - _y_origin; + _residual_sd = 0.0; + return; + } + + _slope = (_num_samples * _xy_sum - _x_sum * _y_sum) / x_spread; + _y_intercept = (_y_sum - _slope * _x_sum) / _num_samples; + const double total_sum_of_squares = _yy_sum - _y_sum * _y_sum / _num_samples; + const double sum_of_cross_deviations = _xy_sum - _x_sum * _y_sum / _num_samples; + const double residual_sum_of_squares = total_sum_of_squares - _slope * sum_of_cross_deviations; + _residual_sd = std::sqrt(MAX2(residual_sum_of_squares, 0.0) / _num_samples); +} + +double ShenandoahWeightedSeq::predict(double x_absolute, double margin_of_error) const { + const double prediction = predict_y(x_absolute) + _residual_sd * margin_of_error; + if (prediction <= 0.0) { + // return average time, rather than negative or zero time + return average(); + } + return prediction; +} + +double ShenandoahWeightedSeq::weighted_average() const { + if (_weighted_sum <= 0.0) { + return 0.0; + } + + return _weighted_y_sum / _weighted_sum + _y_origin; +} + +double ShenandoahWeightedSeq::weighted_sd() const { + if (_weighted_sum <= 0.0) { + return 0.0; + } + + const double weighted_mean = _weighted_y_sum / _weighted_sum; + const double variance = _weighted_yy_sum / _weighted_sum - weighted_mean * weighted_mean; + return std::sqrt(MAX2(variance, 0.0)); +} + +double ShenandoahWeightedSeq::sd() const { + if (_num_samples < 2) { + return 0.0; + } + + const double mean = _y_sum / _num_samples; + const double variance = _yy_sum / _num_samples - mean * mean; + return std::sqrt(MAX2(variance, 0.0)); +} diff --git a/src/hotspot/share/gc/shenandoah/shenandoahWeightedSeq.hpp b/src/hotspot/share/gc/shenandoah/shenandoahWeightedSeq.hpp new file mode 100644 index 00000000000..00a1d121be3 --- /dev/null +++ b/src/hotspot/share/gc/shenandoah/shenandoahWeightedSeq.hpp @@ -0,0 +1,130 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHENANDOAH_SHENANDOAHWEIGHTEDSEQ_HPP +#define SHARE_GC_SHENANDOAH_SHENANDOAHWEIGHTEDSEQ_HPP + +#include "utilities/globalDefinitions.hpp" + +// Provides a weighted sequence of x, y pairs. Various statistical properties +// such as weighted mean, standard deviation, the line of best fit and the +// residual deviation (deviation about the line of best fit) are available. +// These attributes are maintained incrementally as we expect this structure +// to be read more often than it is written. +class ShenandoahWeightedSeq { + + uint _size; + uint _first_sample_index; + uint _num_samples; + + double* const _x_values; + double* const _y_values; + double* const _weights; + + // Values stored in the x,y accumulators will be reduced to avoid arithmetic + // errors caused by loss of precision when working with large doubles. This + // is particularly important for the common use case when x is a monotonically + // increasing timestamp + double _x_origin; + double _y_origin; + + double _x_sum; + double _y_sum; + double _weighted_y_sum; + double _weighted_sum; + double _weighted_yy_sum; + double _xy_sum; + double _xx_sum; + double _yy_sum; + + double _slope; // slope + double _y_intercept; // y-intercept + double _residual_sd; // sd on deviance from prediction + +public: + + explicit ShenandoahWeightedSeq(uint size); + ~ShenandoahWeightedSeq(); + + // Return last item x value added to the sequence (zero if sequence is empty). + double last() const { + if (_num_samples == 0) { + return 0.0; + } + + const uint index = (_first_sample_index + _num_samples - 1) % _size; + return _x_values[index]; + } + + // Add x, y to the sequence. Weight will be calculated as x - last(). + void add(double x, double y); + + + // Add x, y to the sequence using given weight. + void add(double x, double y, double weight); + + // Predict the next value in the sequence for a given x. Uses average + // if the prediction is <= 0. This is a legacy method visible only for + // testing. + double predict(double x, double margin_of_error) const; + + // The standard deviation of the samples about the line of best fit rather + // than deviation about the mean. + double residual_sd() const { return _residual_sd; } + + // An unweighted mean. + double average() const { return _y_sum / MAX2(_num_samples, 1u) + _y_origin; } + + // The weighted mean for the sequence. + double weighted_average() const; + + // Standard deviation for the weighted mean. + double weighted_sd() const; + + // An unweighted standard deviation of the unweighted mean + double sd() const; + + // The slope for a line of best fit through the samples + double slope() const { return _slope; } + + // Predict the y-value for the given x value based on linear reg + double predict_y(double x_absolute) const { + return _slope * (x_absolute - _x_origin) + _y_intercept + _y_origin; + } + + // Provides the slope and y-intercept for the line of best fit through the sequence + void fit_line(const double x_absolute, double& slope, double& intercept) const { + slope = _slope; + intercept = predict_y(x_absolute); + } + +private: + // Removes about to be overwritten sample from x accumulators and rebases x origin + void deduct_oldest_and_rebase(double x, double y, double weight); + + // Record the sample into the sequence, update x, y accumulators + void add_latest(double x, double y, double weight); +}; + +#endif // SHARE_GC_SHENANDOAH_SHENANDOAHWEIGHTEDSEQ_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.cpp b/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.cpp index 7a76bc50078..2a1036b3728 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.cpp @@ -100,11 +100,6 @@ size_t ShenandoahYoungGeneration::used() const { return _free_set->young_used(); } -size_t ShenandoahYoungGeneration::bytes_allocated_since_gc_start() const { - assert(ShenandoahHeap::heap()->mode()->is_generational(), "Young implies generational"); - return _free_set->get_bytes_allocated_since_gc_start(); -} - size_t ShenandoahYoungGeneration::get_affiliated_region_count() const { return _free_set->young_affiliated_regions(); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.hpp b/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.hpp index c3b6944ec80..6f514c8a322 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahYoungGeneration.hpp @@ -69,11 +69,10 @@ public: // Returns true if the young generation is configured to enqueue old // oops for the old generation mark queues. - bool is_bootstrap_cycle() { + bool is_bootstrap_cycle() const { return _old_gen_task_queues != nullptr; } - size_t bytes_allocated_since_gc_start() const override; size_t used() const override; size_t used_regions() const override; size_t used_regions_size() const override; diff --git a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp index d26959edf89..68c287dd22b 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp @@ -34,22 +34,18 @@ range, \ constraint) \ \ - product(uint, ShenandoahAccelerationSamplePeriod, 15, EXPERIMENTAL, \ - "When at least this much time (measured in ms) has passed " \ - "since the acceleration allocation rate was most recently " \ - "sampled, capture another allocation rate sample for the purpose "\ - "of detecting acceleration or momentary spikes in allocation " \ - "rate. A smaller value allows quicker response to changes in " \ - "allocation rates but is more vulnerable to noise and requires " \ - "more monitoring effort.") \ + product(uint, ShenandoahMomentaryAllocRateSampleWindow, 6, EXPERIMENTAL, \ + "The number of samples in the momentary allocation rate moving " \ + "average. This window serves to detect momentary spikes in the " \ + "allocation rate. A smaller value allows quicker response to " \ + "changes in the allocation rate but is more vulnerable to noise " \ + "and requires more monitoring effort. Must not be greater than " \ + "ShenandoahRecentAllocRateSampleWindow") \ range(1, 1000) \ \ - product(uint, ShenandoahRateAccelerationSampleSize, 8, EXPERIMENTAL, \ - "In selected ShenandoahControlIntervals " \ - "(if ShenandoahAccelerationSamplePeriod ms have passed " \ - "since previous allocation rate sample), " \ - "we compute the allocation rate since the previous rate was " \ - "sampled. This many samples are analyzed to determine whether " \ + product(uint, ShenandoahRecentAllocRateSampleWindow, 20, EXPERIMENTAL, \ + "The number of samples in the recent allocation rate moving " \ + "average. These samples are analyzed to determine whether " \ "allocation rates are accelerating. Acceleration may occur " \ "due to increasing client demand or due to phase changes in " \ "an application. A larger value reduces sensitivity to " \ @@ -62,30 +58,13 @@ "detected. If the last several of all samples are signficantly " \ "larger than the other samples, the best fit line through all " \ "sampled values will have an upward slope, manifesting as " \ - "acceleration.") \ - range(1,64) \ + "acceleration. Must not be greater than ShenandoahAllocRateSampleWindow") \ + range(1,5000) \ \ - product(uint, ShenandoahMomentaryAllocationRateSpikeSampleSize, \ - 2, EXPERIMENTAL, \ - "In selected ShenandoahControlIntervals " \ - "(if ShenandoahAccelerationSamplePeriod ms have passed " \ - "since previous allocation rate sample), we compute " \ - "the allocation rate since the previous rate was sampled. " \ - "The weighted average of this " \ - "many most recent momentary allocation rate samples is compared " \ - "against current allocation runway and anticipated GC time to " \ - "determine whether a spike in momentary allocation rate " \ - "justifies an early GC trigger. Momentary allocation spike " \ - "detection is in addition to previously implemented " \ - "ShenandoahAdaptiveInitialSpikeThreshold, the latter of which " \ - "is more effective at detecting slower spikes. The latter " \ - "spike detection samples at the rate specifieid by " \ - "ShenandoahAdaptiveSampleFrequencyHz. The value of this " \ - "parameter must be less than the value of " \ - "ShenandoahRateAccelerationSampleSize. A larger value makes " \ - "momentary spike detection less sensitive. A smaller value " \ - "may result in excessive GC triggers.") \ - range(1,64) \ + product(uint, ShenandoahAllocRateSampleWindow, 100, EXPERIMENTAL, \ + "The size of the moving window over which the average " \ + "baseline allocation rate is maintained.") \ + range(1,10000) \ \ product(uintx, ShenandoahGenerationalMinPIPUsage, 30, EXPERIMENTAL, \ "(Generational mode only) What percent of a heap region " \ @@ -267,7 +246,7 @@ range(0,100) \ \ product(uintx, ShenandoahAllocationThreshold, 0, EXPERIMENTAL, \ - "How many new allocations should happen since the last GC cycle " \ + "How many bytes may be allocated since the last GC cycle started "\ "before some heuristics trigger the collection. In percents of " \ "(soft) max heap size. Set to zero to effectively disable.") \ range(0,100) \ @@ -291,34 +270,12 @@ "to 100 effectively disables the shortcut.") \ range(0,100) \ \ - product(uintx, ShenandoahAdaptiveSampleFrequencyHz, 10, EXPERIMENTAL, \ - "The number of times per second to update the allocation rate " \ - "moving average.") \ - \ - product(uintx, ShenandoahAdaptiveSampleSizeSeconds, 10, EXPERIMENTAL, \ - "The size of the moving window over which the average " \ - "allocation rate is maintained. The total number of samples " \ - "is the product of this number and the sample frequency.") \ - \ product(double, ShenandoahAdaptiveInitialConfidence, 1.8, EXPERIMENTAL, \ "The number of standard deviations used to determine an initial " \ "margin of error for the average cycle time and average " \ "allocation rate. Increasing this value will cause the " \ "heuristic to initiate more concurrent cycles." ) \ \ - product(double, ShenandoahAdaptiveInitialSpikeThreshold, 1.8, EXPERIMENTAL, \ - "If the most recently sampled allocation rate is more than " \ - "this many standard deviations away from the moving average, " \ - "then a cycle is initiated. This value controls how sensitive " \ - "the heuristic is to allocation spikes. Decreasing this number " \ - "increases the sensitivity. ") \ - \ - product(double, ShenandoahAdaptiveDecayFactor, 0.5, EXPERIMENTAL, \ - "The decay factor (alpha) used for values in the weighted " \ - "moving average of cycle time and allocation rate. " \ - "Larger values give more weight to recent values.") \ - range(0,1.0) \ - \ product(uintx, ShenandoahGuaranteedGCInterval, 5*60*1000, EXPERIMENTAL, \ "Many heuristics would guarantee a concurrent GC cycle at " \ "least with this interval. This is useful when large idle " \ diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index eaf0cb7559e..b3397067baa 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -558,6 +558,14 @@ static SpecialFlag const special_jvm_flags[] = { { "UseNewLongLShift", JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, { "AggressiveHeap", JDK_Version::jdk(26), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + {"ShenandoahAccelerationSamplePeriod", JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + {"ShenandoahRateAccelerationSampleSize", JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + {"ShenandoahMomentaryAllocationRateSpikeSampleSize", JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + {"ShenandoahAdaptiveSampleFrequencyHz", JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + {"ShenandoahAdaptiveSampleSizeSeconds", JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + {"ShenandoahAdaptiveInitialSpikeThreshold",JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + {"ShenandoahAdaptiveDecayFactor", JDK_Version::undefined(), JDK_Version::jdk(27), JDK_Version::jdk(28) }, + #ifdef ASSERT { "DummyObsoleteTestFlag", JDK_Version::undefined(), JDK_Version::jdk(18), JDK_Version::undefined() }, #endif diff --git a/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp b/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp new file mode 100644 index 00000000000..d0f7bcd6a8e --- /dev/null +++ b/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp @@ -0,0 +1,161 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "unittest.hpp" +#include "gc/shared/gc_globals.hpp" + +#include "gc/shenandoah/shenandoahAllocRate.inline.hpp" + +class ShenandoahMockClock { +public: + static volatile jlong Counter; + static jlong elapsed_counter() { + const jlong result = Counter; + Counter += NANOSECS_PER_SEC; + return result; + } + + static jlong elapsed_frequency() { + return NANOSECS_PER_SEC; + } +}; + +volatile jlong ShenandoahMockClock::Counter = 0; + +class ShenandoahAllocationRateTest : public testing::Test { +protected: + ShenandoahAllocationRateTest() { + ShenandoahMockClock::Counter = 0; + } + + template + static void allocate(ShenandoahAllocRate& rate, size_t quantity) { + rate.allocated(quantity); + } +}; + +constexpr uint BASELINE_SAMPLES = 100; +constexpr uint RECENT_SAMPLES = 8; +constexpr uint MOMENTARY_SAMPLES = 2; +constexpr uint MINIMUM_SAMPLE_SIZE = 1024; + +TEST_VM_F(ShenandoahAllocationRateTest, ignore_too_small_sample) { + ShenandoahAllocRate rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + rate.allocated(512); + EXPECT_DOUBLE_EQ(rate.weighted_average(), 0); +} + +TEST_VM_F(ShenandoahAllocationRateTest, two_second_average) { + ShenandoahAllocRate rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + allocate(rate, 2048); // t = 1 + allocate(rate, 2048); // t = 2 + EXPECT_DOUBLE_EQ(rate.weighted_average(), 2048.0); +} + +TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_small_number_of_samples) { + ShenandoahAllocRate rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + allocate(rate, 1024); + ShenandoahAnticipatedConsumption consumption = rate.snapshot(100, 1); + EXPECT_DOUBLE_EQ(consumption.acceleration(), 0.0); + EXPECT_DOUBLE_EQ(consumption.predicted_rate(), 0.0); + EXPECT_EQ(consumption.accelerated_consumption(), 0UL); +} + +TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_uniform_rate) { + ShenandoahAllocRate rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + for (uint i = 0; i < BASELINE_SAMPLES; ++i) { + allocate(rate, 1024); + } + + ShenandoahAnticipatedConsumption consumption = rate.snapshot(100, 1); + EXPECT_DOUBLE_EQ(rate.weighted_average(), 1024); // Average rate, 1024 bytes per tick + EXPECT_DOUBLE_EQ(consumption.acceleration(), 0.0); // No acceleration, rate is constant + EXPECT_DOUBLE_EQ(consumption.momentary_rate(), 1024); // Momentary rate is the same as the average + EXPECT_EQ(consumption.momentary_consumption(), 102400UL); // 100 clock ticks at 1024 bytes per tick + EXPECT_EQ(consumption.accelerated_consumption(), 0UL); +} + +TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_momentary_spike) { + ShenandoahAllocRate rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + for (uint i = 0; i < BASELINE_SAMPLES; ++i) { + allocate(rate, 2048); + } + + for (uint i = 0; i < RECENT_SAMPLES; ++i) { + allocate(rate, 1024); + } + + for (uint i = 0; i < MOMENTARY_SAMPLES + 1; ++i) { + allocate(rate, 2048); + } + + // Here we simulate a situation where we are returning from a lull (avg 1024/s) back + // to the baseline average allocation rate (2048/s). The momentary rate will reflect + // the recent samples, but we will not consider this to be an acceleration. + ShenandoahAnticipatedConsumption consumption = rate.snapshot(100, 1); + EXPECT_DOUBLE_EQ(consumption.acceleration(), 0.0); + EXPECT_DOUBLE_EQ(consumption.momentary_rate(), 2048); + EXPECT_EQ(consumption.momentary_consumption(), 204800UL); + EXPECT_EQ(consumption.accelerated_consumption(), 0UL); +} + +TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_accelerating) { + ShenandoahAllocRate rate(256, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + for (uint i = 0; i < BASELINE_SAMPLES; ++i) { + allocate(rate, 512); + } + + for (uint i = 0; i < RECENT_SAMPLES; ++i) { + allocate(rate, 1024); + } + + for (uint i = 0; i < MOMENTARY_SAMPLES + 1; ++i) { + allocate(rate, 2048); + } + + // Setup as before, but pretend our baseline acceleration rate is lower (512). This + // will evaluate the acceleration of the rate. + ShenandoahAnticipatedConsumption consumption = rate.snapshot(100, 1); + EXPECT_GE(consumption.acceleration(), 180.0); + EXPECT_GE(consumption.predicted_rate(), 2047.0); // should be 2048, but can be 2047.9999 from fp issues + EXPECT_GE(consumption.accelerated_consumption(), 102400UL); + EXPECT_EQ(consumption.momentary_consumption(), 0UL); +} + +TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_decelerating) { + ShenandoahAllocRate rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + for (uint i = 0; i < RECENT_SAMPLES; ++i) { + allocate(rate, 2048); + } + + for (uint i = 0; i < MOMENTARY_SAMPLES + 1; ++i) { + allocate(rate, 1024); + } + + // In this setup, the allocation rate is declining. + ShenandoahAnticipatedConsumption consumption = rate.snapshot(100, 1); + EXPECT_DOUBLE_EQ(consumption.acceleration(), 0.0); + EXPECT_DOUBLE_EQ(consumption.momentary_rate(), 1024.0); + EXPECT_EQ(consumption.momentary_consumption(), 102400UL); +} diff --git a/test/hotspot/gtest/gc/shenandoah/test_shenandoahCycleDuration.cpp b/test/hotspot/gtest/gc/shenandoah/test_shenandoahCycleDuration.cpp new file mode 100644 index 00000000000..de87ba49f86 --- /dev/null +++ b/test/hotspot/gtest/gc/shenandoah/test_shenandoahCycleDuration.cpp @@ -0,0 +1,51 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "unittest.hpp" + +#include "gc/shenandoah/shenandoahCycleDuration.hpp" + +TEST_VM(ShenandoahCycleDurationTest, empty_sanity) { + ShenandoahCycleDuration cycles(5); + EXPECT_DOUBLE_EQ(cycles.predict_duration(1.0, 1.0), 0.0); +} + +TEST_VM(ShenandoahCycleDurationTest, predict_duration) { + ShenandoahCycleDuration cycles(5); + cycles.record_duration(1.0, 1.0); + cycles.record_duration(2.0, 2.0); + cycles.record_duration(3.0, 3.0); + EXPECT_DOUBLE_EQ(cycles.predict_duration(4.0, 0.0), 4.0); +} + +TEST_VM(ShenandoahCycleDurationTest, fallback_to_average) { + ShenandoahCycleDuration cycles(5); + cycles.record_duration(1.0, 5.0); + cycles.record_duration(2.0, 4.0); + cycles.record_duration(3.0, 3.0); + // With this downward trend, predicted duration at 6 seconds would be zero, so + // we fall back to the average (5 + 4 + 3 / 3 = 4) + EXPECT_DOUBLE_EQ(cycles.predict_duration(6.0, 0.0), 4.0); + // Average is 4.0, sd = sqrt((25+16+9)/3 - 16) = sqrt(50/3 - 16) = sqrt(2/3) ~ 0.816 + EXPECT_NEAR(cycles.predict_duration(6.0, 1.0), 4.0 + 0.816, 0.001); +} diff --git a/test/hotspot/gtest/gc/shenandoah/test_shenandoahWeightedSeq.cpp b/test/hotspot/gtest/gc/shenandoah/test_shenandoahWeightedSeq.cpp new file mode 100644 index 00000000000..440facd5776 --- /dev/null +++ b/test/hotspot/gtest/gc/shenandoah/test_shenandoahWeightedSeq.cpp @@ -0,0 +1,155 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +#include "unittest.hpp" +#include "gc/shenandoah/shenandoahWeightedSeq.hpp" + +constexpr uint SAMPLE_SIZE = 3; + +TEST_VM(ShenandoahWeightedSeqTest, empty_sanity) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + EXPECT_DOUBLE_EQ(seq.predict(0, 0), 0.0); + EXPECT_DOUBLE_EQ(seq.predict(1, 0), 0.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, predict_flat_line) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(1, 2); + EXPECT_DOUBLE_EQ(seq.predict(1, 1), 2.0); + EXPECT_DOUBLE_EQ(seq.predict(2, 1), 2.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, predict_y_equals_x) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(1, 1); + seq.add(2, 2); + seq.add(3, 3); + EXPECT_DOUBLE_EQ(seq.predict(4, 1), 4.0); + EXPECT_DOUBLE_EQ(seq.predict(5, 1), 5.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, predict_y_equals_x_squared) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(1, 1); + seq.add(2, 4); + seq.add(3, 9); + EXPECT_NEAR(seq.predict(4, 0), 12.666, 0.001); + EXPECT_NEAR(seq.predict(5, 0), 16.666, 0.001); + // Give a margin of error that incorporates residuals standard deviation + EXPECT_NEAR(seq.predict(4, 1), 13.138, 0.001); + EXPECT_NEAR(seq.predict(5, 1), 17.138, 0.001); +} + +TEST_VM(ShenandoahWeightedSeqTest, predict_y_equals_x_squared_overflow) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + for (uint i = 0; i < SAMPLE_SIZE; i++) { + seq.add(1, 1); + seq.add(2, 4); + seq.add(3, 9); + } + + EXPECT_NEAR(seq.predict(4, 0), 12.666, 0.001); + EXPECT_NEAR(seq.predict(5, 0), 16.666, 0.001); + // Give a margin of error that incorporates residuals standard deviation + EXPECT_NEAR(seq.predict(4, 1), 13.138, 0.001); + EXPECT_NEAR(seq.predict(5, 1), 17.138, 0.001); +} + +TEST_VM(ShenandoahWeightedSeqTest, predict_y_equals_x_long_uptime) { + // Simulates a JVM running ~66 days, using os::elapsedTime() as x. + // With tight sampling (50ms intervals) this hits catastrophic cancellation + // in x_spread = samples * _xx_sum − _x_sum_squared — both operands have magnitude + // whose double-precision LSB is ~0.065, but the true spread is ~0.015. + constexpr double OFFSET = 66.0 * 86400.0; // 66 days of uptime in seconds + constexpr double DT = 0.05; // 50ms between samples + + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + uint i = 1; + for (uint n = SAMPLE_SIZE * 2; i <= n; i++) { + seq.add(OFFSET + i * DT, i); + } + + // Relationship: y = (x − OFFSET)/DT, so predict(OFFSET + i·DT) should give i + EXPECT_NEAR(seq.predict(OFFSET + (i + 1) * DT, 0), i + 1, 0.001); +} + +TEST_VM(ShenandoahWeightedSeqTest, identical_timestamps) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(100.0, 1.0); + seq.add(100.0, 2.0); + + // Should return a finite prediction (e.g. the average of y). + EXPECT_TRUE(std::isfinite(seq.predict(101.0, 0))); +} + +TEST_VM(ShenandoahWeightedSeqTest, simple_average_no_samples) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + EXPECT_DOUBLE_EQ(seq.average(), 0.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, simple_average_one_sample) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(1, 1); + EXPECT_DOUBLE_EQ(seq.average(), 1.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, simple_average_overflow) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + for (uint i = 0; i < SAMPLE_SIZE + 1; i++) { + seq.add(i, 1); + } + EXPECT_DOUBLE_EQ(seq.average(), 1.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, simple_average) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(1, 1); + seq.add(2, 1); + EXPECT_DOUBLE_EQ(seq.average(), 1.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, simple_average_2) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(1, 1); + seq.add(2, 2); + EXPECT_DOUBLE_EQ(seq.average(), 1.5); +} + +TEST_VM(ShenandoahWeightedSeqTest, weighted_average_no_samples) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + EXPECT_DOUBLE_EQ(seq.weighted_average(), 0.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, weighted_average_one_sample) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + seq.add(1, 2, 2); + EXPECT_DOUBLE_EQ(seq.weighted_average(), 2.0); +} + +TEST_VM(ShenandoahWeightedSeqTest, weighted_average_overflow) { + ShenandoahWeightedSeq seq(SAMPLE_SIZE); + for (uint i = 0; i < SAMPLE_SIZE + 1; i++) { + seq.add(i, 2, 2); + } + EXPECT_DOUBLE_EQ(seq.weighted_average(), 2.0); +}