8385375: Shenandoah: Sampled allocation rate is too slow to react to lower allocation rates

Reviewed-by: kdnilsen, ruili
This commit is contained in:
William Kemper 2026-05-28 17:54:21 +00:00
parent a453a8bd0c
commit e615d49032
5 changed files with 84 additions and 6 deletions

View File

@ -30,6 +30,7 @@
#include "runtime/mutex.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/os.hpp"
#include "runtime/task.hpp"
#include "utilities/globalDefinitions.hpp"
class ShenandoahAllocationClock {
@ -92,7 +93,6 @@ private:
double _predicted_rate;
};
// This class tracks three moving averages of the allocation rate:
// 1. Momentary: this is the shortest and acts as a sort of 'spike' detector
// 2. Recent: larger than momentary, these samples are used to detect 'acceleration' of the rate
@ -144,6 +144,12 @@ public:
// Indicate that this many bytes have been allocated (by the mutator).
void allocated(size_t allocated_bytes);
// Shenandoah currently evaluates triggers on a dedicated thread to lighten the workload
// for allocators. However, this means that when there isn't enough allocations to update
// the rate, the heuristics will continue to see a high allocation rate. This method is
// for heuristics to periodically force the rate to update and decay the allocation rate.
void force_update();
// Returns a snapshot of the parameters necessary to evaluate allocation rate triggers.
// Note that momentary consumption and accelerated consumption may both be zero, but may
// not both be non-zero. The `time_delta` parameter is the anticipated duration of the
@ -164,6 +170,9 @@ public:
}
private:
// Record the sample under the sample lock
void take_sample(jlong now, jlong elapsed, size_t unsampled);
double upper_bound_no_lock(const double standard_deviations) const {
assert(_sample_lock.is_locked(), "Caller must hold lock");
return _baseline.weighted_average() + standard_deviations * _baseline.weighted_sd();
@ -172,4 +181,13 @@ private:
typedef ShenandoahAllocRate<> ShenandoahAllocationRate;
// See description of `force_update`
class ShenandoahDecayAllocRate : public PeriodicTask {
static constexpr size_t DECAY_INTERVAL_MS = 100;
ShenandoahAllocationRate* _rate;
public:
ShenandoahDecayAllocRate(ShenandoahAllocationRate* rate) : PeriodicTask(DECAY_INTERVAL_MS), _rate(rate) {}
void task() override;
};
#endif // SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP

View File

@ -44,6 +44,10 @@ inline size_t ShenandoahAnticipatedConsumption::accelerated_consumption() const
return shenandoah_safe_size_cast(consumption);
}
inline void ShenandoahDecayAllocRate::task() {
_rate->force_update();
}
template<typename Clock>
void ShenandoahAllocRate<Clock>::update_minimum_sample_size(const size_t available) {
const size_t min_sample_size = clamp(available / ALLOC_SAMPLE_PORTION, ALLOC_SAMPLE_MIN, ALLOC_SAMPLE_MAX);
@ -81,6 +85,37 @@ void ShenandoahAllocRate<Clock>::allocated(const size_t allocated_bytes) {
return;
}
take_sample(now, elapsed, unsampled);
_sample_lock.unlock();
}
template<typename Clock>
void ShenandoahAllocRate<Clock>::force_update() {
if (!_sample_lock.try_lock()) {
// Another thread has the lock and will take the sample
return;
}
const size_t unsampled = _allocated_bytes_since_last_sample.load_relaxed();
const jlong now = Clock::elapsed_counter();
const jlong elapsed = now - _last_sample_time;
if (elapsed <= 0) {
// Avoid sampling nonsense allocation rates
_sample_lock.unlock();
return;
}
take_sample(now, elapsed, unsampled);
_sample_lock.unlock();
}
template<typename Clock>
void ShenandoahAllocRate<Clock>::take_sample(jlong now, jlong elapsed, size_t unsampled) {
assert(_sample_lock.is_locked(), "Caller must hold lock");
_last_sample_time = now;
// We are recording this sample, deduct it from the counter. It may be increased
@ -94,9 +129,8 @@ void ShenandoahAllocRate<Clock>::allocated(const size_t allocated_bytes) {
_recent.add(timestamp, rate_seconds);
_momentary.add(timestamp, rate_seconds);
_sample_lock.unlock();
log_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp);
// Careful, still under a lock here
log_develop_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp);
}
template<typename Clock>

View File

@ -555,6 +555,7 @@ ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) :
_active_generation(nullptr),
_initial_size(0),
_committed(0),
_alloc_rate_decay(&_alloc_rate),
_max_workers(MAX3(ConcGCThreads, ParallelGCThreads, 1U)),
_workers(nullptr),
_safepoint_workers(nullptr),
@ -699,6 +700,11 @@ void ShenandoahHeap::post_initialize() {
// Schedule periodic task to report on gc thread CPU utilization
_mmu_tracker.initialize();
// Periodically decay allocation rate to compensate for not being updated when allocation rate
// is low. Heuristics are evaluated unconditionally from a dedicated thread so it will continue
// to see the last (possibly stale) allocation rate if the allocation rate is low.
_alloc_rate_decay.enroll();
MutexLocker ml(Threads_lock);
ShenandoahInitWorkerGCLABClosure init_gclabs;
@ -2312,10 +2318,13 @@ void ShenandoahHeap::stop() {
// Step 1. Stop reporting on gc thread cpu utilization
mmu_tracker()->stop();
// Step 2. Wait until GC worker exits normally (this will cancel any ongoing GC).
// Step 2. Stop decaying allocation rate.
_alloc_rate_decay.disenroll();
// Step 3. Wait until GC worker exits normally (this will cancel any ongoing GC).
control_thread()->stop();
// Step 3. Shutdown uncommit thread.
// Step 4. Shutdown uncommit thread.
if (_uncommit_thread != nullptr) {
_uncommit_thread->stop();
}

View File

@ -229,6 +229,7 @@ private:
shenandoah_padding(1);
ShenandoahAllocationRate _alloc_rate;
ShenandoahDecayAllocRate _alloc_rate_decay;
public:
void increase_committed(size_t bytes);

View File

@ -159,3 +159,19 @@ TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_decelerating) {
EXPECT_DOUBLE_EQ(consumption.momentary_rate(), 1024.0);
EXPECT_EQ(consumption.momentary_consumption(), 102400UL);
}
TEST_VM_F(ShenandoahAllocationRateTest, force_updates) {
ShenandoahAllocRate<ShenandoahMockClock> rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES);
for (uint i = 0; i < BASELINE_SAMPLES; ++i) {
allocate(rate, 2048);
}
EXPECT_DOUBLE_EQ(rate.weighted_average(), 2048.0);
// Now simulate an equal number of seconds passing without any allocations. This
// should decay our baseline average back to zero.
for (uint i = 0; i < BASELINE_SAMPLES; ++i) {
rate.force_update();
}
EXPECT_DOUBLE_EQ(rate.weighted_average(), 0.0);
}