From 85265d3afeed2caf2c69e27bf9aac1a8ee9076ca Mon Sep 17 00:00:00 2001 From: Kelvin Nilsen Date: Thu, 26 Mar 2026 19:59:48 +0000 Subject: [PATCH] Comment touchup in preparation for review --- .../shenandoahAdaptiveHeuristics.cpp | 62 ++++++---------- .../shenandoahAdaptiveHeuristics.hpp | 9 +-- .../shenandoahGenerationalHeuristics.cpp | 3 +- .../heuristics/shenandoahGlobalHeuristics.cpp | 3 +- .../heuristics/shenandoahHeuristics.hpp | 70 +++++-------------- .../heuristics/shenandoahYoungHeuristics.hpp | 3 +- .../shenandoah/shenandoahCollectorPolicy.hpp | 5 ++ 7 files changed, 56 insertions(+), 99 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp index 7871db3fb93..ffcc27d25c7 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp @@ -291,8 +291,8 @@ double ShenandoahAdaptiveHeuristics::predict_gc_time(size_t mark_words) { } } -// Marking effort is assumed to be a function of "time". During steady state, marking efforts should be constant. During -// initialization, marking may increase linearly as data is retained for promotion. +// Marking effort is assumed to be a function of how many words are marked. During steady state, marking efforts should be +// constant. During initialization, marking may increase linearly as data is retained for promotion. void ShenandoahAdaptiveHeuristics::record_mark_end(double now, size_t marked_words) { // mark will be followed by evac or final_roots, we're not sure which _phase_stats[ShenandoahMajorGCPhase::_evac].set_most_recent_start_time(now); @@ -303,7 +303,7 @@ void ShenandoahAdaptiveHeuristics::record_mark_end(double now, size_t marked_wor } // Evacuation effort is assumed to be a function of words evacuated or promoted in place. In non-generational mode, -// use promoted_in_place_words equal zero. +// promoted_in_place_words equals zero. void ShenandoahAdaptiveHeuristics::record_evac_end(double now, size_t evacuated_words, size_t promoted_in_place_words) { // evac will be followed by update _phase_stats[ShenandoahMajorGCPhase::_update].set_most_recent_start_time(now); @@ -311,7 +311,8 @@ void ShenandoahAdaptiveHeuristics::record_evac_end(double now, size_t evacuated_ double duration = now - start_phase_time; // Evacuation time is a linear function of both evacuated_words and promoted_in_place_words. Analysis of selected // (not exhaustive) experiments shows that the proportionality constant for evacuated_words is 5 times larger than - // the proportionality constant for promoted_in_place_words. This was determined by first analyzing multiple results + // the proportionality constant for promoted_in_place_words. In other words, it is approximately 5 times more + // costly to evacuate than to promote in place. yThis was determined by first analyzing multiple results // for which promoted_in_place_words equals zero to first determine the proportionality constant for evacuated_words, // and then feeding that result into the analysis of proportionality constant for promoted_in_place_words. Our current // thoughts are that analyzing two-dimensional linear equations in real time is not practical. Instead, we convert this @@ -320,16 +321,15 @@ void ShenandoahAdaptiveHeuristics::record_evac_end(double now, size_t evacuated_ } // Update effort is assumed to be a function of live words updated. For young collection, this is number of live words -// in young at start of evac that are not residing within the cset. This does not include the old-gen words that are -// updated from remset. That component is assumed to remain approximately constant and negligible, and will be accounted -// in the y-intercept. For mixed collections, this is the number of live words in young and old at start of evac (excluding cset). +// in young at start of evac that are not residing within the cset plus the remembered set words that reside in the old +// generation. For mixed collections, this is the number of live words in young and old at start of evac (excluding cset). void ShenandoahAdaptiveHeuristics::record_update_end(double now, size_t updated_words) { double start_phase_time = _phase_stats[ShenandoahMajorGCPhase::_update].get_most_recent_start_time(); double duration = now - start_phase_time; record_phase_duration(ShenandoahMajorGCPhase::_update, (double) updated_words, duration); } -// Final roots is assumed to be a function of pip_words. For non-generational mode, use zero. +// Final roots is assumed to be a function of pip_words. For non-generational mode, promoted_in_place_words is zero. void ShenandoahAdaptiveHeuristics::record_final_roots_end(double now, size_t promoted_in_place_words) { double start_phase_time = _phase_stats[ShenandoahMajorGCPhase::_final_roots].get_most_recent_start_time(); double duration = now - start_phase_time; @@ -352,10 +352,22 @@ double ShenandoahAdaptiveHeuristics::predict_final_roots_time(size_t pip_words) return _phase_stats[ShenandoahMajorGCPhase::_final_roots].predict_at((double) 0.0); } +// This is the common entry for using predicting gc time by prediction model rather than as historical average. +// There are two prediction models: +// 1. the linear prediction model applies to "typical" GC cycles and is based on a theory that gc times may be +// increasing linearly (due to an increase in live memory) +// 2. the phase-accounting model applies to "atypical" GC cycles (e.g. generational global, abbreviated, mixed) +// and relies upon having anticipated values for words to be marked, words to be evacuated, words to be promoted +// in place, and words to be updated. +// If anticipated values are not available, the phase-accounting model returns 0.0 and this function relies upon +// its linear prediction model. When we recognize that an upcoming GC cycle is anticipated to be "typical", we set +// anticipated mark words to zero in order to disable the phase-accounting model in order to force the linear prediction +// model. double ShenandoahAdaptiveHeuristics::predict_gc_time(double timestamp_at_start) { size_t mark_words = get_anticipated_mark_words(); double result; if ((mark_words == 0) || ((result = predict_gc_time(mark_words)) == 0.0)) { + // Return the linear prediction result. result =_gc_time_m * timestamp_at_start + _gc_time_b + _gc_time_sd * _margin_of_error_sd; } return result; @@ -1009,38 +1021,8 @@ ShenandoahPhaseTimeEstimator::ShenandoahPhaseTimeEstimator(const char* name) : // The x_value represents an input parameter for the size of the phase's work. For example, the evacuation phase is // parameterized by the amount of memory that we expect to evacuate. The y-value is the time required to execute the phase. // -// The samples are calibrated under the assumption that workers are not surged. In theory, we should be able to add -// phase-time samples for phases that have experienced worker surge, adjusting the duration by the magnitude of the -// surge. For example, if we surged with 2x the number of normal workers, then we could record that the normal time -// (without the worker surge) to execute this phase would have been 2x the time it took with the 2x worker surge. We -// have found this does not work. It gets us into a death spiral. In particular, this causes the triggering heuristic -// to "believe" it will take too long to execute the phase, so it triggers early, but usually not early enough to safely -// handle the anticipated long duration of the phase (because there is simply not enough allocation runway to handle that -// very long anticipated duration even when we trigger back to back). Then the surge heuristics observes the situation and -// decides we have to surge with even more workers in order to handle the situation we are in. Then at the end of the -// phase, we record the result of executing the phase with the 2.25x as taking 2.25x as long without the surge. It gets -// worse and worse until we are stuck in maximum surge of 3x. Meanwhile, the service is deprived of CPU attention -// because almost all the cores (75%) are fully consumed by out-of-control GC worker surge. So whenever they get CPU -// time, the service threads are very hungry to allocate memory in order to catch up with pending work. -// -// We also experimented with scaling measured surge execution times to lower values. For example, if surge was 2x, we -// tried scaling the measured execution time to 1.5x. This also resulted in the death spiral behavior, albeit at a slightly -// slower pace. Several considerations have motivated us to abandon the pursuit of the "perfect" scale factor: -// -// 1. If we accidentally undershoot the right scale value, we will end up with an overly optimistic scheduling heuristic. -// We will trigger too late for normal operation, and the surge trigger will not kick in because it will not recognize -// that we scheduled too late. -// -// 2. We expect that the "perfect" scale factor will differ for each surge percentage. Typical experience is diminishing -// returns for each new concurrent processor thrown at a shared job due to increased contention for shared resources and -// locking mechanisms. -// -// 3. We expect that the scalability of different phases will be different. Marking, for example, is especially difficulit -// to scale, because typical workloads have mostly small objects, and the current implementation requires synchronization -// between workers for each object that we mark through, and for each object added to the shared scan queue. On the other -// hand, evacuation and updating is much more easily performed by many cores. -// -// Our current approach to this problem is to only add samples that result from measurement of "unsurged execution phases". +// The samples are calibrated under the assumption that workers are not surged. Our current approach is to +// only add samples that result from measurement of "unsurged execution phases". void ShenandoahPhaseTimeEstimator::add_sample(double x_value, double y_value) { if (_num_samples >= MaxSamples) { diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp index dccfe102f26..d3f07156b2e 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp @@ -294,12 +294,13 @@ protected: // How many total words were evacuated in the most recently completed GC? size_t _words_most_recently_evacuated; - // How many words do we expect to mark in the next GC? - // (aka how many words did we evacuate from most recently completed GC?) + // How many words do we expect to mark in the next GC? Setting this value to zero effectively disables phase-account + // model prediction of GC time fot he next GC cycle. size_t _anticipated_mark_words; - // How many words do we expect to evacuate in the next GC? - // (aka how many words did we evacuate from most recently completed GC?) + // How many words do we expect to evacuate in the next GC? For an anticipated young GC, this is the same as what was + // evacuated in the previous GC cycle. For an anticipated mixed-evac GC, this includes the anticipated mixed-evac + // workload. size_t _anticipated_evac_words; // How many words do we expect to update in the next GC? diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp index 8d73a6319f0..47b87a621f3 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp @@ -316,7 +316,8 @@ void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahCollectionSet* c } } - if (10 * collection_set->get_live_bytes_in_tenurable_regions() > collection_set->get_live_bytes_in_untenurable_regions()) { + if (ShenandoahHeuristics::is_promotino_signicant(collection_set->get_live_bytes_in_tenurable_regions(), + collection_set->get_live_bytes_in_untenurable_regions())) { gc_cycle_has_significant_promotion(); } if (collection_set->has_old_regions()) { diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp index 812645686c3..f27d01cf38c 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp @@ -236,7 +236,8 @@ void ShenandoahGlobalHeuristics::choose_global_collection_set(ShenandoahCollecti gc_cycle_has_old(); } - if (10 * cset->get_live_bytes_in_tenurable_regions() > cset->get_live_bytes_in_untenurable_regions()) { + if (ShenandoahHeuristics::is_promotion_significant(cset->get_live_bytes_in_tenurable_regions(), + cset->get_live_bytes_in_untenurable_regions())) { gc_cycle_has_significant_promotion(); } diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp index 9ecd370ea7d..96d39121418 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahHeuristics.hpp @@ -240,8 +240,9 @@ protected: _gc_cycle_is_atypical = 0; } - // A typical gc cycle is defined as one that has no promotions and no mixed evacuations and is not abbreviated. - // The time required for an atypical gc cycle is computed from phase-accounting model rather than from average cycle time. + // A typical gc cycle is defined as one that has few promotions, no mixed evacuations, is not generational global, + // and is not abbreviated. The time required for an atypical gc cycle is computed from phase-accounting model rather + // than from average cycle time or from linear prediction model. inline bool is_gc_cycle_typical() { return !_gc_cycle_is_atypical; } @@ -266,6 +267,12 @@ protected: return ShenandoahAdaptiveInitialConfidence; } + // Promotion is considered signficant if it represents an increase of more than 10% over the normal young + // evacuation workload. + inline static bool is_promotion_significant(size_t anticipated_promotion, size_t anticipated_young_evac_non_promotion) { + return (10 * anticipated_promotion > anticipated_young_evac_non_promotion)? + } + // If we have reserved for anticipated promotion more than 10% of planned young evacuation load, treat this as an // atypical GC cycle due to the promotion workload. inline void gc_cycle_has_significant_promotion() { @@ -356,63 +363,22 @@ public: return _words_most_recently_evacuated; } - virtual void record_mark_end(double now, size_t marked_words) { - // Do nothing. - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - } - - virtual void record_evac_end(double now, size_t evacuated_words, size_t pip_words) { - // Do nothing. - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - } - - virtual void record_update_end(double now, size_t updated_words) { - // Do nothing. - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - } - - virtual void record_final_roots_end(double now, size_t pip_words) { - // Do nothing. - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - } - - virtual double predict_mark_time(size_t anticipated_marked_words) { - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - return 0.0; - } + virtual void record_mark_end(double now, size_t marked_words) = 0; + virtual void record_evac_end(double now, size_t evacuated_words, size_t pip_words) = 0; + virtual void record_update_end(double now, size_t updated_words) = 0; + virtual void record_final_roots_end(double now, size_t pip_words) = 0; + virtual double predict_mark_time(size_t anticipated_marked_words) = 0; // For satb mode, anticipate_pip_words is zero - virtual double predict_evac_time(size_t anticipated_evac_words, size_t anticipated_pip_words) { - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - return 0.0; - } - - virtual double predict_update_time(size_t anticipated_update_words) { - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - return 0.0; - } + virtual double predict_evac_time(size_t anticipated_evac_words, size_t anticipated_pip_words) = 0; + virtual double predict_update_time(size_t anticipated_update_words) = 0; // In non-generational mode, supply pip_words as zero - virtual double predict_final_roots_time(size_t pip_words) { - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - return 0.0; - } + virtual double predict_final_roots_time(size_t pip_words) = 0; // Predict gc time using conservative approximations of anticipated mark, evac, and update words. Returns 0.0 if there // is not enough history to make a prediction. - virtual double predict_gc_time(size_t mark_words) { - // Subclass ShenandoahAdaptiveHeuristics overrides for satb mode. - // Subclass ShenandoahYoungHeuristics overrides for generational mode. - return 0.0; - } + virtual double predict_gc_time(size_t mark_words) = 0; virtual const char* name() = 0; virtual bool is_diagnostic() = 0; diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp index 46544abb236..beb64da8dd8 100644 --- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp +++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp @@ -151,7 +151,8 @@ public: double predict_gc_time(size_t anticipated_mark_words) override; - // Setting this value to zero denotes current GC cycle to be "traditional young", so average GC cycle tine is best predictor. + // Setting this value to zero denotes current GC cycle to be "traditional young", so average GC cycle tine or linear + // prediction are preferred over phase-account prediction. inline void set_anticipated_mark_words(size_t words) { _anticipated_mark_words = words; } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.hpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.hpp index 2e13ac864ab..1166333ae3a 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.hpp @@ -74,6 +74,11 @@ public: void record_success_old(); void record_interrupted_old(); + // A collection cycle may be "abbreviated" if Shenandoah finds a sufficient percentage + // of regions that contain no live objects (ShenandoahImmediateThreshold). These cycles + // end after final mark, skipping the evacuation and reference-updating phases. Such + // cycles are very efficient and are worth tracking. Note that both degenerated and + // concurrent cycles can be abbreviated. void record_success_concurrent(bool is_young, bool is_abbreviated); // Record that a degenerated cycle has been completed. Note that such a cycle may or