From e615d4903245e554a957d56c2c6573418ee50170 Mon Sep 17 00:00:00 2001 From: William Kemper Date: Thu, 28 May 2026 17:54:21 +0000 Subject: [PATCH] 8385375: Shenandoah: Sampled allocation rate is too slow to react to lower allocation rates Reviewed-by: kdnilsen, ruili --- .../gc/shenandoah/shenandoahAllocRate.hpp | 20 +++++++++- .../shenandoah/shenandoahAllocRate.inline.hpp | 40 +++++++++++++++++-- .../share/gc/shenandoah/shenandoahHeap.cpp | 13 +++++- .../share/gc/shenandoah/shenandoahHeap.hpp | 1 + .../test_shenandoahAllocationRate.cpp | 16 ++++++++ 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp index 3d0e9d63fb5..ff0a425e260 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp @@ -30,6 +30,7 @@ #include "runtime/mutex.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/os.hpp" +#include "runtime/task.hpp" #include "utilities/globalDefinitions.hpp" class ShenandoahAllocationClock { @@ -92,7 +93,6 @@ private: double _predicted_rate; }; - // This class tracks three moving averages of the allocation rate: // 1. Momentary: this is the shortest and acts as a sort of 'spike' detector // 2. Recent: larger than momentary, these samples are used to detect 'acceleration' of the rate @@ -144,6 +144,12 @@ public: // Indicate that this many bytes have been allocated (by the mutator). void allocated(size_t allocated_bytes); + // Shenandoah currently evaluates triggers on a dedicated thread to lighten the workload + // for allocators. However, this means that when there isn't enough allocations to update + // the rate, the heuristics will continue to see a high allocation rate. This method is + // for heuristics to periodically force the rate to update and decay the allocation rate. + void force_update(); + // Returns a snapshot of the parameters necessary to evaluate allocation rate triggers. // Note that momentary consumption and accelerated consumption may both be zero, but may // not both be non-zero. The `time_delta` parameter is the anticipated duration of the @@ -164,6 +170,9 @@ public: } private: + // Record the sample under the sample lock + void take_sample(jlong now, jlong elapsed, size_t unsampled); + double upper_bound_no_lock(const double standard_deviations) const { assert(_sample_lock.is_locked(), "Caller must hold lock"); return _baseline.weighted_average() + standard_deviations * _baseline.weighted_sd(); @@ -172,4 +181,13 @@ private: typedef ShenandoahAllocRate<> ShenandoahAllocationRate; +// See description of `force_update` +class ShenandoahDecayAllocRate : public PeriodicTask { + static constexpr size_t DECAY_INTERVAL_MS = 100; + ShenandoahAllocationRate* _rate; +public: + ShenandoahDecayAllocRate(ShenandoahAllocationRate* rate) : PeriodicTask(DECAY_INTERVAL_MS), _rate(rate) {} + void task() override; +}; + #endif // SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp index 42b639de165..eedda19b86e 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp @@ -44,6 +44,10 @@ inline size_t ShenandoahAnticipatedConsumption::accelerated_consumption() const return shenandoah_safe_size_cast(consumption); } +inline void ShenandoahDecayAllocRate::task() { + _rate->force_update(); +} + template void ShenandoahAllocRate::update_minimum_sample_size(const size_t available) { const size_t min_sample_size = clamp(available / ALLOC_SAMPLE_PORTION, ALLOC_SAMPLE_MIN, ALLOC_SAMPLE_MAX); @@ -81,6 +85,37 @@ void ShenandoahAllocRate::allocated(const size_t allocated_bytes) { return; } + take_sample(now, elapsed, unsampled); + + _sample_lock.unlock(); +} + +template +void ShenandoahAllocRate::force_update() { + if (!_sample_lock.try_lock()) { + // Another thread has the lock and will take the sample + return; + } + + const size_t unsampled = _allocated_bytes_since_last_sample.load_relaxed(); + const jlong now = Clock::elapsed_counter(); + const jlong elapsed = now - _last_sample_time; + + if (elapsed <= 0) { + // Avoid sampling nonsense allocation rates + _sample_lock.unlock(); + return; + } + + take_sample(now, elapsed, unsampled); + + _sample_lock.unlock(); +} + +template +void ShenandoahAllocRate::take_sample(jlong now, jlong elapsed, size_t unsampled) { + assert(_sample_lock.is_locked(), "Caller must hold lock"); + _last_sample_time = now; // We are recording this sample, deduct it from the counter. It may be increased @@ -94,9 +129,8 @@ void ShenandoahAllocRate::allocated(const size_t allocated_bytes) { _recent.add(timestamp, rate_seconds); _momentary.add(timestamp, rate_seconds); - _sample_lock.unlock(); - - log_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp); + // Careful, still under a lock here + log_develop_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp); } template diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp index 3e295a6cda4..6704dc774f0 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -555,6 +555,7 @@ ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) : _active_generation(nullptr), _initial_size(0), _committed(0), + _alloc_rate_decay(&_alloc_rate), _max_workers(MAX3(ConcGCThreads, ParallelGCThreads, 1U)), _workers(nullptr), _safepoint_workers(nullptr), @@ -699,6 +700,11 @@ void ShenandoahHeap::post_initialize() { // Schedule periodic task to report on gc thread CPU utilization _mmu_tracker.initialize(); + // Periodically decay allocation rate to compensate for not being updated when allocation rate + // is low. Heuristics are evaluated unconditionally from a dedicated thread so it will continue + // to see the last (possibly stale) allocation rate if the allocation rate is low. + _alloc_rate_decay.enroll(); + MutexLocker ml(Threads_lock); ShenandoahInitWorkerGCLABClosure init_gclabs; @@ -2312,10 +2318,13 @@ void ShenandoahHeap::stop() { // Step 1. Stop reporting on gc thread cpu utilization mmu_tracker()->stop(); - // Step 2. Wait until GC worker exits normally (this will cancel any ongoing GC). + // Step 2. Stop decaying allocation rate. + _alloc_rate_decay.disenroll(); + + // Step 3. Wait until GC worker exits normally (this will cancel any ongoing GC). control_thread()->stop(); - // Step 3. Shutdown uncommit thread. + // Step 4. Shutdown uncommit thread. if (_uncommit_thread != nullptr) { _uncommit_thread->stop(); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp index 6e0070b35ab..ad8cd220968 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp @@ -229,6 +229,7 @@ private: shenandoah_padding(1); ShenandoahAllocationRate _alloc_rate; + ShenandoahDecayAllocRate _alloc_rate_decay; public: void increase_committed(size_t bytes); diff --git a/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp b/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp index d0f7bcd6a8e..af066657377 100644 --- a/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp +++ b/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp @@ -159,3 +159,19 @@ TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_decelerating) { EXPECT_DOUBLE_EQ(consumption.momentary_rate(), 1024.0); EXPECT_EQ(consumption.momentary_consumption(), 102400UL); } + +TEST_VM_F(ShenandoahAllocationRateTest, force_updates) { + ShenandoahAllocRate rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES); + for (uint i = 0; i < BASELINE_SAMPLES; ++i) { + allocate(rate, 2048); + } + EXPECT_DOUBLE_EQ(rate.weighted_average(), 2048.0); + + // Now simulate an equal number of seconds passing without any allocations. This + // should decay our baseline average back to zero. + for (uint i = 0; i < BASELINE_SAMPLES; ++i) { + rate.force_update(); + } + EXPECT_DOUBLE_EQ(rate.weighted_average(), 0.0); +} +