mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-21 02:48:02 +00:00
8370681: [BACKOUT] Improve memory ordering in new CPU Time Profiler
Reviewed-by: mdoerr
This commit is contained in:
parent
7bb490c4bf
commit
5ed6c201ba
@ -82,7 +82,14 @@ JfrCPUTimeTraceQueue::~JfrCPUTimeTraceQueue() {
|
||||
bool JfrCPUTimeTraceQueue::enqueue(JfrCPUTimeSampleRequest& request) {
|
||||
assert(JavaThread::current()->jfr_thread_local()->is_cpu_time_jfr_enqueue_locked(), "invariant");
|
||||
assert(&JavaThread::current()->jfr_thread_local()->cpu_time_jfr_queue() == this, "invariant");
|
||||
_data[_head++] = request;
|
||||
u4 elementIndex;
|
||||
do {
|
||||
elementIndex = AtomicAccess::load_acquire(&_head);
|
||||
if (elementIndex >= _capacity) {
|
||||
return false;
|
||||
}
|
||||
} while (AtomicAccess::cmpxchg(&_head, elementIndex, elementIndex + 1) != elementIndex);
|
||||
_data[elementIndex] = request;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -94,19 +101,19 @@ JfrCPUTimeSampleRequest& JfrCPUTimeTraceQueue::at(u4 index) {
|
||||
static volatile u4 _lost_samples_sum = 0;
|
||||
|
||||
u4 JfrCPUTimeTraceQueue::size() const {
|
||||
return _head;
|
||||
return AtomicAccess::load_acquire(&_head);
|
||||
}
|
||||
|
||||
void JfrCPUTimeTraceQueue::set_size(u4 size) {
|
||||
_head = size;
|
||||
AtomicAccess::release_store(&_head, size);
|
||||
}
|
||||
|
||||
u4 JfrCPUTimeTraceQueue::capacity() const {
|
||||
return _capacity;
|
||||
return AtomicAccess::load_acquire(&_capacity);
|
||||
}
|
||||
|
||||
void JfrCPUTimeTraceQueue::set_capacity(u4 capacity) {
|
||||
if (capacity == _capacity) {
|
||||
if (capacity == AtomicAccess::load(&_capacity)) {
|
||||
return;
|
||||
}
|
||||
_head = 0;
|
||||
@ -119,15 +126,15 @@ void JfrCPUTimeTraceQueue::set_capacity(u4 capacity) {
|
||||
} else {
|
||||
_data = nullptr;
|
||||
}
|
||||
_capacity = capacity;
|
||||
AtomicAccess::release_store(&_capacity, capacity);
|
||||
}
|
||||
|
||||
bool JfrCPUTimeTraceQueue::is_empty() const {
|
||||
return _head == 0;
|
||||
return AtomicAccess::load_acquire(&_head) == 0;
|
||||
}
|
||||
|
||||
u4 JfrCPUTimeTraceQueue::lost_samples() const {
|
||||
return _lost_samples;
|
||||
return AtomicAccess::load(&_lost_samples);
|
||||
}
|
||||
|
||||
void JfrCPUTimeTraceQueue::increment_lost_samples() {
|
||||
@ -136,7 +143,7 @@ void JfrCPUTimeTraceQueue::increment_lost_samples() {
|
||||
}
|
||||
|
||||
void JfrCPUTimeTraceQueue::increment_lost_samples_due_to_queue_full() {
|
||||
_lost_samples_due_to_queue_full++;
|
||||
AtomicAccess::inc(&_lost_samples_due_to_queue_full);
|
||||
}
|
||||
|
||||
u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples() {
|
||||
@ -144,9 +151,7 @@ u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples() {
|
||||
}
|
||||
|
||||
u4 JfrCPUTimeTraceQueue::get_and_reset_lost_samples_due_to_queue_full() {
|
||||
u4 lost = _lost_samples_due_to_queue_full;
|
||||
_lost_samples_due_to_queue_full = 0;
|
||||
return lost;
|
||||
return AtomicAccess::xchg(&_lost_samples_due_to_queue_full, (u4)0);
|
||||
}
|
||||
|
||||
void JfrCPUTimeTraceQueue::init() {
|
||||
@ -154,7 +159,7 @@ void JfrCPUTimeTraceQueue::init() {
|
||||
}
|
||||
|
||||
void JfrCPUTimeTraceQueue::clear() {
|
||||
_head = 0;
|
||||
AtomicAccess::release_store(&_head, (u4)0);
|
||||
}
|
||||
|
||||
void JfrCPUTimeTraceQueue::resize_if_needed() {
|
||||
@ -162,8 +167,9 @@ void JfrCPUTimeTraceQueue::resize_if_needed() {
|
||||
if (lost_samples_due_to_queue_full == 0) {
|
||||
return;
|
||||
}
|
||||
if (_capacity < CPU_TIME_QUEUE_MAX_CAPACITY) {
|
||||
float ratio = (float)lost_samples_due_to_queue_full / (float)_capacity;
|
||||
u4 capacity = AtomicAccess::load(&_capacity);
|
||||
if (capacity < CPU_TIME_QUEUE_MAX_CAPACITY) {
|
||||
float ratio = (float)lost_samples_due_to_queue_full / (float)capacity;
|
||||
int factor = 1;
|
||||
if (ratio > 8) { // idea is to quickly scale the queue in the worst case
|
||||
factor = ratio;
|
||||
@ -175,7 +181,7 @@ void JfrCPUTimeTraceQueue::resize_if_needed() {
|
||||
factor = 2;
|
||||
}
|
||||
if (factor > 1) {
|
||||
u4 new_capacity = MIN2(CPU_TIME_QUEUE_MAX_CAPACITY, _capacity * factor);
|
||||
u4 new_capacity = MIN2(CPU_TIME_QUEUE_MAX_CAPACITY, capacity * factor);
|
||||
set_capacity(new_capacity);
|
||||
}
|
||||
}
|
||||
|
||||
@ -43,24 +43,19 @@ struct JfrCPUTimeSampleRequest {
|
||||
|
||||
// Fixed size async-signal-safe SPSC linear queue backed by an array.
|
||||
// Designed to be only used under lock and read linearly
|
||||
// The lock in question is the tri-state CPU time JFR lock in JfrThreadLocal
|
||||
// This allows us to skip most of the atomic accesses and memory barriers,
|
||||
// holding a lock acts as a memory barrier
|
||||
// Only the _lost_samples property is atomic, as it can be accessed even after
|
||||
// acquiring the lock failed.
|
||||
// Important to note is that the queue is also only accessed under lock in signal
|
||||
// handlers.
|
||||
class JfrCPUTimeTraceQueue {
|
||||
|
||||
// the default queue capacity, scaled if the sampling period is smaller than 10ms
|
||||
// when the thread is started
|
||||
static const u4 CPU_TIME_QUEUE_CAPACITY = 500;
|
||||
|
||||
JfrCPUTimeSampleRequest* _data;
|
||||
u4 _capacity;
|
||||
volatile u4 _capacity;
|
||||
// next unfilled index
|
||||
u4 _head;
|
||||
volatile u4 _head;
|
||||
|
||||
// the only property accessible without a lock
|
||||
volatile u4 _lost_samples;
|
||||
|
||||
u4 _lost_samples_due_to_queue_full;
|
||||
volatile u4 _lost_samples_due_to_queue_full;
|
||||
|
||||
static const u4 CPU_TIME_QUEUE_INITIAL_CAPACITY = 20;
|
||||
static const u4 CPU_TIME_QUEUE_MAX_CAPACITY = 2000;
|
||||
@ -87,7 +82,6 @@ public:
|
||||
|
||||
u4 lost_samples() const;
|
||||
|
||||
// the only method callable without holding a lock
|
||||
void increment_lost_samples();
|
||||
|
||||
void increment_lost_samples_due_to_queue_full();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user