8385375: Shenandoah: Sampled allocation rate is too slow to react to lower allocation rates

Reviewed-by: kdnilsen, ruili
2026-07-28 03:43:21 +00:00 · 2026-05-28 17:54:21 +00:00 · 2026-05-28 17:54:21 +00:00 · e615d49032
commit e615d49032
parent a453a8bd0c
5 changed files with 84 additions and 6 deletions
--- a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.hpp
@ -30,6 +30,7 @@
 #include "runtime/mutex.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
+#include "runtime/task.hpp"
 #include "utilities/globalDefinitions.hpp"

 class ShenandoahAllocationClock {
@ -92,7 +93,6 @@ private:
  double _predicted_rate;
 };

-
 // This class tracks three moving averages of the allocation rate:
 //  1. Momentary: this is the shortest and acts as a sort of 'spike' detector
 //  2. Recent: larger than momentary, these samples are used to detect 'acceleration' of the rate
@ -144,6 +144,12 @@ public:
  // Indicate that this many bytes have been allocated (by the mutator).
  void allocated(size_t allocated_bytes);

+  // Shenandoah currently evaluates triggers on a dedicated thread to lighten the workload
+  // for allocators. However, this means that when there isn't enough allocations to update
+  // the rate, the heuristics will continue to see a high allocation rate. This method is
+  // for heuristics to periodically force the rate to update and decay the allocation rate.
+  void force_update();
+
  // Returns a snapshot of the parameters necessary to evaluate allocation rate triggers.
  // Note that momentary consumption and accelerated consumption may both be zero, but may
  // not both be non-zero. The `time_delta` parameter is the anticipated duration of the
@ -164,6 +170,9 @@ public:
  }

 private:
+  // Record the sample under the sample lock
+  void take_sample(jlong now, jlong elapsed, size_t unsampled);
+
  double upper_bound_no_lock(const double standard_deviations) const {
    assert(_sample_lock.is_locked(), "Caller must hold lock");
    return _baseline.weighted_average() + standard_deviations * _baseline.weighted_sd();
@ -172,4 +181,13 @@ private:

 typedef ShenandoahAllocRate<> ShenandoahAllocationRate;

+// See description of `force_update`
+class ShenandoahDecayAllocRate : public PeriodicTask {
+  static constexpr size_t DECAY_INTERVAL_MS = 100;
+  ShenandoahAllocationRate* _rate;
+public:
+  ShenandoahDecayAllocRate(ShenandoahAllocationRate* rate) : PeriodicTask(DECAY_INTERVAL_MS), _rate(rate) {}
+  void task() override;
+};
+
 #endif // SHARE_GC_SHENANDOAH_SHENANDOAHALLOCRATE_HPP
--- a/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAllocRate.inline.hpp
@ -44,6 +44,10 @@ inline size_t ShenandoahAnticipatedConsumption::accelerated_consumption() const
  return shenandoah_safe_size_cast(consumption);
 }

+inline void ShenandoahDecayAllocRate::task() {
+  _rate->force_update();
+}
+
 template<typename Clock>
 void ShenandoahAllocRate<Clock>::update_minimum_sample_size(const size_t available) {
  const size_t min_sample_size = clamp(available / ALLOC_SAMPLE_PORTION, ALLOC_SAMPLE_MIN, ALLOC_SAMPLE_MAX);
@ -81,6 +85,37 @@ void ShenandoahAllocRate<Clock>::allocated(const size_t allocated_bytes) {
    return;
  }

+  take_sample(now, elapsed, unsampled);
+
+  _sample_lock.unlock();
+}
+
+template<typename Clock>
+void ShenandoahAllocRate<Clock>::force_update() {
+  if (!_sample_lock.try_lock()) {
+    // Another thread has the lock and will take the sample
+    return;
+  }
+
+  const size_t unsampled = _allocated_bytes_since_last_sample.load_relaxed();
+  const jlong now = Clock::elapsed_counter();
+  const jlong elapsed = now - _last_sample_time;
+
+  if (elapsed <= 0) {
+    // Avoid sampling nonsense allocation rates
+    _sample_lock.unlock();
+    return;
+  }
+
+  take_sample(now, elapsed, unsampled);
+
+  _sample_lock.unlock();
+}
+
+template<typename Clock>
+void ShenandoahAllocRate<Clock>::take_sample(jlong now, jlong elapsed, size_t unsampled) {
+  assert(_sample_lock.is_locked(), "Caller must hold lock");
+
  _last_sample_time = now;

  // We are recording this sample, deduct it from the counter. It may be increased
@ -94,9 +129,8 @@ void ShenandoahAllocRate<Clock>::allocated(const size_t allocated_bytes) {
  _recent.add(timestamp, rate_seconds);
  _momentary.add(timestamp, rate_seconds);

-  _sample_lock.unlock();
-
-  log_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp);
+  // Careful, still under a lock here
+  log_develop_trace(gc, sampling)("Recorded %.3f/s at %.3fs", rate_seconds, timestamp);
 }

 template<typename Clock>
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@ -555,6 +555,7 @@ ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) :
  _active_generation(nullptr),
  _initial_size(0),
  _committed(0),
+  _alloc_rate_decay(&_alloc_rate),
  _max_workers(MAX3(ConcGCThreads, ParallelGCThreads, 1U)),
  _workers(nullptr),
  _safepoint_workers(nullptr),
@ -699,6 +700,11 @@ void ShenandoahHeap::post_initialize() {
  // Schedule periodic task to report on gc thread CPU utilization
  _mmu_tracker.initialize();

+  // Periodically decay allocation rate to compensate for not being updated when allocation rate
+  // is low. Heuristics are evaluated unconditionally from a dedicated thread so it will continue
+  // to see the last (possibly stale) allocation rate if the allocation rate is low.
+  _alloc_rate_decay.enroll();
+
  MutexLocker ml(Threads_lock);

  ShenandoahInitWorkerGCLABClosure init_gclabs;
@ -2312,10 +2318,13 @@ void ShenandoahHeap::stop() {
  // Step 1. Stop reporting on gc thread cpu utilization
  mmu_tracker()->stop();

-  // Step 2. Wait until GC worker exits normally (this will cancel any ongoing GC).
+  // Step 2. Stop decaying allocation rate.
+  _alloc_rate_decay.disenroll();
+
+  // Step 3. Wait until GC worker exits normally (this will cancel any ongoing GC).
  control_thread()->stop();

-  // Step 3. Shutdown uncommit thread.
+  // Step 4. Shutdown uncommit thread.
  if (_uncommit_thread != nullptr) {
    _uncommit_thread->stop();
  }
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@ -229,6 +229,7 @@ private:
  shenandoah_padding(1);

  ShenandoahAllocationRate _alloc_rate;
+  ShenandoahDecayAllocRate _alloc_rate_decay;

 public:
  void increase_committed(size_t bytes);
--- a/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp
+++ b/test/hotspot/gtest/gc/shenandoah/test_shenandoahAllocationRate.cpp
@ -159,3 +159,19 @@ TEST_VM_F(ShenandoahAllocationRateTest, accelerated_consumption_decelerating) {
  EXPECT_DOUBLE_EQ(consumption.momentary_rate(), 1024.0);
  EXPECT_EQ(consumption.momentary_consumption(), 102400UL);
 }
+
+TEST_VM_F(ShenandoahAllocationRateTest, force_updates) {
+  ShenandoahAllocRate<ShenandoahMockClock> rate(MINIMUM_SAMPLE_SIZE, BASELINE_SAMPLES, RECENT_SAMPLES, MOMENTARY_SAMPLES);
+  for (uint i = 0; i < BASELINE_SAMPLES; ++i) {
+    allocate(rate, 2048);
+  }
+  EXPECT_DOUBLE_EQ(rate.weighted_average(), 2048.0);
+
+  // Now simulate an equal number of seconds passing without any allocations. This
+  // should decay our baseline average back to zero.
+  for (uint i = 0; i < BASELINE_SAMPLES; ++i) {
+    rate.force_update();
+  }
+  EXPECT_DOUBLE_EQ(rate.weighted_average(), 0.0);
+}
+