Comment touchup in preparation for review

This commit is contained in:
Kelvin Nilsen 2026-03-26 19:59:48 +00:00
parent fd22b643c0
commit 85265d3afe
7 changed files with 56 additions and 99 deletions

View File

@ -291,8 +291,8 @@ double ShenandoahAdaptiveHeuristics::predict_gc_time(size_t mark_words) {
}
}
// Marking effort is assumed to be a function of "time". During steady state, marking efforts should be constant. During
// initialization, marking may increase linearly as data is retained for promotion.
// Marking effort is assumed to be a function of how many words are marked. During steady state, marking efforts should be
// constant. During initialization, marking may increase linearly as data is retained for promotion.
void ShenandoahAdaptiveHeuristics::record_mark_end(double now, size_t marked_words) {
// mark will be followed by evac or final_roots, we're not sure which
_phase_stats[ShenandoahMajorGCPhase::_evac].set_most_recent_start_time(now);
@ -303,7 +303,7 @@ void ShenandoahAdaptiveHeuristics::record_mark_end(double now, size_t marked_wor
}
// Evacuation effort is assumed to be a function of words evacuated or promoted in place. In non-generational mode,
// use promoted_in_place_words equal zero.
// promoted_in_place_words equals zero.
void ShenandoahAdaptiveHeuristics::record_evac_end(double now, size_t evacuated_words, size_t promoted_in_place_words) {
// evac will be followed by update
_phase_stats[ShenandoahMajorGCPhase::_update].set_most_recent_start_time(now);
@ -311,7 +311,8 @@ void ShenandoahAdaptiveHeuristics::record_evac_end(double now, size_t evacuated_
double duration = now - start_phase_time;
// Evacuation time is a linear function of both evacuated_words and promoted_in_place_words. Analysis of selected
// (not exhaustive) experiments shows that the proportionality constant for evacuated_words is 5 times larger than
// the proportionality constant for promoted_in_place_words. This was determined by first analyzing multiple results
// the proportionality constant for promoted_in_place_words. In other words, it is approximately 5 times more
// costly to evacuate than to promote in place. yThis was determined by first analyzing multiple results
// for which promoted_in_place_words equals zero to first determine the proportionality constant for evacuated_words,
// and then feeding that result into the analysis of proportionality constant for promoted_in_place_words. Our current
// thoughts are that analyzing two-dimensional linear equations in real time is not practical. Instead, we convert this
@ -320,16 +321,15 @@ void ShenandoahAdaptiveHeuristics::record_evac_end(double now, size_t evacuated_
}
// Update effort is assumed to be a function of live words updated. For young collection, this is number of live words
// in young at start of evac that are not residing within the cset. This does not include the old-gen words that are
// updated from remset. That component is assumed to remain approximately constant and negligible, and will be accounted
// in the y-intercept. For mixed collections, this is the number of live words in young and old at start of evac (excluding cset).
// in young at start of evac that are not residing within the cset plus the remembered set words that reside in the old
// generation. For mixed collections, this is the number of live words in young and old at start of evac (excluding cset).
void ShenandoahAdaptiveHeuristics::record_update_end(double now, size_t updated_words) {
double start_phase_time = _phase_stats[ShenandoahMajorGCPhase::_update].get_most_recent_start_time();
double duration = now - start_phase_time;
record_phase_duration(ShenandoahMajorGCPhase::_update, (double) updated_words, duration);
}
// Final roots is assumed to be a function of pip_words. For non-generational mode, use zero.
// Final roots is assumed to be a function of pip_words. For non-generational mode, promoted_in_place_words is zero.
void ShenandoahAdaptiveHeuristics::record_final_roots_end(double now, size_t promoted_in_place_words) {
double start_phase_time = _phase_stats[ShenandoahMajorGCPhase::_final_roots].get_most_recent_start_time();
double duration = now - start_phase_time;
@ -352,10 +352,22 @@ double ShenandoahAdaptiveHeuristics::predict_final_roots_time(size_t pip_words)
return _phase_stats[ShenandoahMajorGCPhase::_final_roots].predict_at((double) 0.0);
}
// This is the common entry for using predicting gc time by prediction model rather than as historical average.
// There are two prediction models:
// 1. the linear prediction model applies to "typical" GC cycles and is based on a theory that gc times may be
// increasing linearly (due to an increase in live memory)
// 2. the phase-accounting model applies to "atypical" GC cycles (e.g. generational global, abbreviated, mixed)
// and relies upon having anticipated values for words to be marked, words to be evacuated, words to be promoted
// in place, and words to be updated.
// If anticipated values are not available, the phase-accounting model returns 0.0 and this function relies upon
// its linear prediction model. When we recognize that an upcoming GC cycle is anticipated to be "typical", we set
// anticipated mark words to zero in order to disable the phase-accounting model in order to force the linear prediction
// model.
double ShenandoahAdaptiveHeuristics::predict_gc_time(double timestamp_at_start) {
size_t mark_words = get_anticipated_mark_words();
double result;
if ((mark_words == 0) || ((result = predict_gc_time(mark_words)) == 0.0)) {
// Return the linear prediction result.
result =_gc_time_m * timestamp_at_start + _gc_time_b + _gc_time_sd * _margin_of_error_sd;
}
return result;
@ -1009,38 +1021,8 @@ ShenandoahPhaseTimeEstimator::ShenandoahPhaseTimeEstimator(const char* name) :
// The x_value represents an input parameter for the size of the phase's work. For example, the evacuation phase is
// parameterized by the amount of memory that we expect to evacuate. The y-value is the time required to execute the phase.
//
// The samples are calibrated under the assumption that workers are not surged. In theory, we should be able to add
// phase-time samples for phases that have experienced worker surge, adjusting the duration by the magnitude of the
// surge. For example, if we surged with 2x the number of normal workers, then we could record that the normal time
// (without the worker surge) to execute this phase would have been 2x the time it took with the 2x worker surge. We
// have found this does not work. It gets us into a death spiral. In particular, this causes the triggering heuristic
// to "believe" it will take too long to execute the phase, so it triggers early, but usually not early enough to safely
// handle the anticipated long duration of the phase (because there is simply not enough allocation runway to handle that
// very long anticipated duration even when we trigger back to back). Then the surge heuristics observes the situation and
// decides we have to surge with even more workers in order to handle the situation we are in. Then at the end of the
// phase, we record the result of executing the phase with the 2.25x as taking 2.25x as long without the surge. It gets
// worse and worse until we are stuck in maximum surge of 3x. Meanwhile, the service is deprived of CPU attention
// because almost all the cores (75%) are fully consumed by out-of-control GC worker surge. So whenever they get CPU
// time, the service threads are very hungry to allocate memory in order to catch up with pending work.
//
// We also experimented with scaling measured surge execution times to lower values. For example, if surge was 2x, we
// tried scaling the measured execution time to 1.5x. This also resulted in the death spiral behavior, albeit at a slightly
// slower pace. Several considerations have motivated us to abandon the pursuit of the "perfect" scale factor:
//
// 1. If we accidentally undershoot the right scale value, we will end up with an overly optimistic scheduling heuristic.
// We will trigger too late for normal operation, and the surge trigger will not kick in because it will not recognize
// that we scheduled too late.
//
// 2. We expect that the "perfect" scale factor will differ for each surge percentage. Typical experience is diminishing
// returns for each new concurrent processor thrown at a shared job due to increased contention for shared resources and
// locking mechanisms.
//
// 3. We expect that the scalability of different phases will be different. Marking, for example, is especially difficulit
// to scale, because typical workloads have mostly small objects, and the current implementation requires synchronization
// between workers for each object that we mark through, and for each object added to the shared scan queue. On the other
// hand, evacuation and updating is much more easily performed by many cores.
//
// Our current approach to this problem is to only add samples that result from measurement of "unsurged execution phases".
// The samples are calibrated under the assumption that workers are not surged. Our current approach is to
// only add samples that result from measurement of "unsurged execution phases".
void ShenandoahPhaseTimeEstimator::add_sample(double x_value, double y_value) {
if (_num_samples >= MaxSamples) {

View File

@ -294,12 +294,13 @@ protected:
// How many total words were evacuated in the most recently completed GC?
size_t _words_most_recently_evacuated;
// How many words do we expect to mark in the next GC?
// (aka how many words did we evacuate from most recently completed GC?)
// How many words do we expect to mark in the next GC? Setting this value to zero effectively disables phase-account
// model prediction of GC time fot he next GC cycle.
size_t _anticipated_mark_words;
// How many words do we expect to evacuate in the next GC?
// (aka how many words did we evacuate from most recently completed GC?)
// How many words do we expect to evacuate in the next GC? For an anticipated young GC, this is the same as what was
// evacuated in the previous GC cycle. For an anticipated mixed-evac GC, this includes the anticipated mixed-evac
// workload.
size_t _anticipated_evac_words;
// How many words do we expect to update in the next GC?

View File

@ -316,7 +316,8 @@ void ShenandoahGenerationalHeuristics::filter_regions(ShenandoahCollectionSet* c
}
}
if (10 * collection_set->get_live_bytes_in_tenurable_regions() > collection_set->get_live_bytes_in_untenurable_regions()) {
if (ShenandoahHeuristics::is_promotino_signicant(collection_set->get_live_bytes_in_tenurable_regions(),
collection_set->get_live_bytes_in_untenurable_regions())) {
gc_cycle_has_significant_promotion();
}
if (collection_set->has_old_regions()) {

View File

@ -236,7 +236,8 @@ void ShenandoahGlobalHeuristics::choose_global_collection_set(ShenandoahCollecti
gc_cycle_has_old();
}
if (10 * cset->get_live_bytes_in_tenurable_regions() > cset->get_live_bytes_in_untenurable_regions()) {
if (ShenandoahHeuristics::is_promotion_significant(cset->get_live_bytes_in_tenurable_regions(),
cset->get_live_bytes_in_untenurable_regions())) {
gc_cycle_has_significant_promotion();
}

View File

@ -240,8 +240,9 @@ protected:
_gc_cycle_is_atypical = 0;
}
// A typical gc cycle is defined as one that has no promotions and no mixed evacuations and is not abbreviated.
// The time required for an atypical gc cycle is computed from phase-accounting model rather than from average cycle time.
// A typical gc cycle is defined as one that has few promotions, no mixed evacuations, is not generational global,
// and is not abbreviated. The time required for an atypical gc cycle is computed from phase-accounting model rather
// than from average cycle time or from linear prediction model.
inline bool is_gc_cycle_typical() {
return !_gc_cycle_is_atypical;
}
@ -266,6 +267,12 @@ protected:
return ShenandoahAdaptiveInitialConfidence;
}
// Promotion is considered signficant if it represents an increase of more than 10% over the normal young
// evacuation workload.
inline static bool is_promotion_significant(size_t anticipated_promotion, size_t anticipated_young_evac_non_promotion) {
return (10 * anticipated_promotion > anticipated_young_evac_non_promotion)?
}
// If we have reserved for anticipated promotion more than 10% of planned young evacuation load, treat this as an
// atypical GC cycle due to the promotion workload.
inline void gc_cycle_has_significant_promotion() {
@ -356,63 +363,22 @@ public:
return _words_most_recently_evacuated;
}
virtual void record_mark_end(double now, size_t marked_words) {
// Do nothing.
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
}
virtual void record_evac_end(double now, size_t evacuated_words, size_t pip_words) {
// Do nothing.
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
}
virtual void record_update_end(double now, size_t updated_words) {
// Do nothing.
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
}
virtual void record_final_roots_end(double now, size_t pip_words) {
// Do nothing.
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
}
virtual double predict_mark_time(size_t anticipated_marked_words) {
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
return 0.0;
}
virtual void record_mark_end(double now, size_t marked_words) = 0;
virtual void record_evac_end(double now, size_t evacuated_words, size_t pip_words) = 0;
virtual void record_update_end(double now, size_t updated_words) = 0;
virtual void record_final_roots_end(double now, size_t pip_words) = 0;
virtual double predict_mark_time(size_t anticipated_marked_words) = 0;
// For satb mode, anticipate_pip_words is zero
virtual double predict_evac_time(size_t anticipated_evac_words, size_t anticipated_pip_words) {
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
return 0.0;
}
virtual double predict_update_time(size_t anticipated_update_words) {
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
return 0.0;
}
virtual double predict_evac_time(size_t anticipated_evac_words, size_t anticipated_pip_words) = 0;
virtual double predict_update_time(size_t anticipated_update_words) = 0;
// In non-generational mode, supply pip_words as zero
virtual double predict_final_roots_time(size_t pip_words) {
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
return 0.0;
}
virtual double predict_final_roots_time(size_t pip_words) = 0;
// Predict gc time using conservative approximations of anticipated mark, evac, and update words. Returns 0.0 if there
// is not enough history to make a prediction.
virtual double predict_gc_time(size_t mark_words) {
// Subclass ShenandoahAdaptiveHeuristics overrides for satb mode.
// Subclass ShenandoahYoungHeuristics overrides for generational mode.
return 0.0;
}
virtual double predict_gc_time(size_t mark_words) = 0;
virtual const char* name() = 0;
virtual bool is_diagnostic() = 0;

View File

@ -151,7 +151,8 @@ public:
double predict_gc_time(size_t anticipated_mark_words) override;
// Setting this value to zero denotes current GC cycle to be "traditional young", so average GC cycle tine is best predictor.
// Setting this value to zero denotes current GC cycle to be "traditional young", so average GC cycle tine or linear
// prediction are preferred over phase-account prediction.
inline void set_anticipated_mark_words(size_t words) {
_anticipated_mark_words = words;
}

View File

@ -74,6 +74,11 @@ public:
void record_success_old();
void record_interrupted_old();
// A collection cycle may be "abbreviated" if Shenandoah finds a sufficient percentage
// of regions that contain no live objects (ShenandoahImmediateThreshold). These cycles
// end after final mark, skipping the evacuation and reference-updating phases. Such
// cycles are very efficient and are worth tracking. Note that both degenerated and
// concurrent cycles can be abbreviated.
void record_success_concurrent(bool is_young, bool is_abbreviated);
// Record that a degenerated cycle has been completed. Note that such a cycle may or