8370681: [BACKOUT] Improve memory ordering in new CPU Time Profiler

Reviewed-by: mdoerr
This commit is contained in:
Johannes Bechberger 2025-10-27 12:29:22 +00:00
parent 7bb490c4bf
commit 5ed6c201ba
2 changed files with 29 additions and 29 deletions

View File

@ -82,7 +82,14 @@ JfrCPUTimeTraceQueue::~JfrCPUTimeTraceQueue() {
bool JfrCPUTimeTraceQueue::enqueue(JfrCPUTimeSampleRequest& request) {
assert(JavaThread::current()->jfr_thread_local()->is_cpu_time_jfr_enqueue_locked(), "invariant");
assert(&JavaThread::current()->jfr_thread_local()->cpu_time_jfr_queue() == this, "invariant");
_data[_head++] = request;
u4 elementIndex;
do {
elementIndex = AtomicAccess::load_acquire(&_head);
if (elementIndex >= _capacity) {
return false;
}
} while (AtomicAccess::cmpxchg(&_head, elementIndex, elementIndex + 1) != elementIndex);
_data[elementIndex] = request;
return true;
}
@ -94,19 +101,19 @@ JfrCPUTimeSampleRequest& JfrCPUTimeTraceQueue::at(u4 index) {
static volatile u4 _lost_samples_sum = 0;
u4 JfrCPUTimeTraceQueue::size() const {
return _head;
return AtomicAccess::load_acquire(&_head);
}
void JfrCPUTimeTraceQueue::set_size(u4 size) {
_head = size;
AtomicAccess::release_store(&_head, size);
}
u4 JfrCPUTimeTraceQueue::capacity() const {
return _capacity;
return AtomicAccess::load_acquire(&_capacity);
}
void JfrCPUTimeTraceQueue::set_capacity(u4 capacity) {
if (capacity == _capacity) {
if (capacity == AtomicAccess::load(&_capacity)) {
return;
}
_head = 0;
@ -119,15 +126,15 @@ void JfrCPUTimeTraceQueue::set_capacity(u4 capacity) {
} else {
_data = nullptr;
}
_capacity = capacity;
AtomicAccess::release_store(&_capacity, capacity);
}
bool JfrCPUTimeTraceQueue::is_empty() const {
return _head == 0;
return AtomicAccess::load_acquire(&_head) == 0;
}
u4 JfrCPUTimeTraceQueue::lost_samples() const {
return _lost_samples;
return AtomicAccess::load(&_lost_samples);
}
void JfrCPUTimeTraceQueue::increment_lost_samples() {
@ -136,7 +143,7 @@ void JfrCPUTimeTraceQueue::increment_lost_samples() {
}
void JfrCPUTimeTraceQueue::increment_lost_samples_due_to_queue_full() {
_lost_samples_due_to_queue_full++;
AtomicAccess::inc(&_lost_samples_due_to_queue_full);
}
u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples() {
@ -144,9 +151,7 @@ u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples() {
}
u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples_due_to_queue_full() {
u4 lost = _lost_samples_due_to_queue_full;
_lost_samples_due_to_queue_full = 0;
return lost;
return AtomicAccess::xchg(&_lost_samples_due_to_queue_full, (u4)0);
}
void JfrCPUTimeTraceQueue::init() {
@ -154,7 +159,7 @@ void JfrCPUTimeTraceQueue::init() {
}
void JfrCPUTimeTraceQueue::clear() {
_head = 0;
AtomicAccess::release_store(&_head, (u4)0);
}
void JfrCPUTimeTraceQueue::resize_if_needed() {
@ -162,8 +167,9 @@ void JfrCPUTimeTraceQueue::resize_if_needed() {
if (lost_samples_due_to_queue_full == 0) {
return;
}
if (_capacity < CPU_TIME_QUEUE_MAX_CAPACITY) {
float ratio = (float)lost_samples_due_to_queue_full / (float)_capacity;
u4 capacity = AtomicAccess::load(&_capacity);
if (capacity < CPU_TIME_QUEUE_MAX_CAPACITY) {
float ratio = (float)lost_samples_due_to_queue_full / (float)capacity;
int factor = 1;
if (ratio > 8) { // idea is to quickly scale the queue in the worst case
factor = ratio;
@ -175,7 +181,7 @@ void JfrCPUTimeTraceQueue::resize_if_needed() {
factor = 2;
}
if (factor > 1) {
u4 new_capacity = MIN2(CPU_TIME_QUEUE_MAX_CAPACITY, _capacity * factor);
u4 new_capacity = MIN2(CPU_TIME_QUEUE_MAX_CAPACITY, capacity * factor);
set_capacity(new_capacity);
}
}

View File

@ -43,24 +43,19 @@ struct JfrCPUTimeSampleRequest {
// Fixed size async-signal-safe SPSC linear queue backed by an array.
// Designed to be only used under lock and read linearly
// The lock in question is the tri-state CPU time JFR lock in JfrThreadLocal
// This allows us to skip most of the atomic accesses and memory barriers,
// holding a lock acts as a memory barrier
// Only the _lost_samples property is atomic, as it can be accessed even after
// acquiring the lock failed.
// Important to note is that the queue is also only accessed under lock in signal
// handlers.
class JfrCPUTimeTraceQueue {
// the default queue capacity, scaled if the sampling period is smaller than 10ms
// when the thread is started
static const u4 CPU_TIME_QUEUE_CAPACITY = 500;
JfrCPUTimeSampleRequest* _data;
u4 _capacity;
volatile u4 _capacity;
// next unfilled index
u4 _head;
volatile u4 _head;
// the only property accessible without a lock
volatile u4 _lost_samples;
u4 _lost_samples_due_to_queue_full;
volatile u4 _lost_samples_due_to_queue_full;
static const u4 CPU_TIME_QUEUE_INITIAL_CAPACITY = 20;
static const u4 CPU_TIME_QUEUE_MAX_CAPACITY = 2000;
@ -87,7 +82,6 @@ public:
u4 lost_samples() const;
// the only method callable without holding a lock
void increment_lost_samples();
void increment_lost_samples_due_to_queue_full();