8377179: Improve and document racy use of start/end in ThreadLocalAllocBuffer

Reviewed-by: iwalulya, ayang
This commit is contained in:
Thomas Schatzl 2026-02-06 10:36:03 +00:00
parent 2d6a1aa36e
commit 5f83e9ad0e
3 changed files with 36 additions and 48 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -458,10 +458,19 @@ size_t ThreadLocalAllocBuffer::end_reserve() {
return MAX2(reserve_size, (size_t)_reserve_for_allocation_prefetch);
}
const HeapWord* ThreadLocalAllocBuffer::start_relaxed() const {
return AtomicAccess::load(&_start);
}
const HeapWord* ThreadLocalAllocBuffer::top_relaxed() const {
return AtomicAccess::load(&_top);
size_t ThreadLocalAllocBuffer::estimated_used_bytes() const {
HeapWord* start = AtomicAccess::load(&_start);
HeapWord* top = AtomicAccess::load(&_top);
// There has been a race when retrieving _top and _start. Return 0.
if (_top < _start) {
return 0;
}
size_t used_bytes = pointer_delta(_top, _start, 1);
// Comparing diff with the maximum allowed size will ensure that we don't add
// the used bytes from a semi-initialized TLAB ending up with implausible values.
// In this case also just return 0.
if (used_bytes > ThreadLocalAllocBuffer::max_size_in_bytes()) {
return 0;
}
return used_bytes;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -33,16 +33,13 @@
class ThreadLocalAllocStats;
// ThreadLocalAllocBuffer: a descriptor for thread-local storage used by
// the threads for allocation.
// It is thread-private at any time, but maybe multiplexed over
// time across multiple threads. The park()/unpark() pair is
// used to make it available for such multiplexing.
// the threads for allocation. It is thread-private at any time.
//
// Heap sampling is performed via the end and allocation_end
// fields.
// allocation_end contains the real end of the tlab allocation,
// whereas end can be set to an arbitrary spot in the tlab to
// trip the return and sample the allocation.
// Heap sampling is performed via the end and allocation_end
// fields.
// allocation_end contains the real end of the tlab allocation,
// whereas end can be set to an arbitrary spot in the tlab to
// trip the return and sample the allocation.
class ThreadLocalAllocBuffer: public CHeapObj<mtThread> {
friend class VMStructs;
friend class JVMCIVMStructs;
@ -116,17 +113,18 @@ public:
HeapWord* end() const { return _end; }
HeapWord* top() const { return _top; }
HeapWord* hard_end();
HeapWord* pf_top() const { return _pf_top; }
size_t desired_size() const { return _desired_size; }
size_t used() const { return pointer_delta(top(), start()); }
size_t used_bytes() const { return pointer_delta(top(), start(), 1); }
size_t free() const { return pointer_delta(end(), top()); }
// Don't discard tlab if remaining space is larger than this.
size_t refill_waste_limit() const { return _refill_waste_limit; }
// For external inspection.
const HeapWord* start_relaxed() const;
const HeapWord* top_relaxed() const;
// Returns an estimate of the number of bytes currently used in the TLAB.
// Due to races with concurrent allocations and/or resetting the TLAB the return
// value may be inconsistent with any other metrics (e.g. total allocated
// bytes), and may just incorrectly return 0.
// Intented fo external inspection only where accuracy is not 100% required.
size_t estimated_used_bytes() const;
// Allocate size HeapWords. The memory is NOT initialized to zero.
inline HeapWord* allocate(size_t size);
@ -171,14 +169,6 @@ public:
static size_t refill_waste_limit_increment();
template <typename T> void addresses_do(T f) {
f(&_start);
f(&_top);
f(&_pf_top);
f(&_end);
f(&_allocation_end);
}
// Code generation support
static ByteSize start_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _start); }
static ByteSize end_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _end); }

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -37,25 +37,14 @@
inline jlong Thread::cooked_allocated_bytes() {
jlong allocated_bytes = AtomicAccess::load_acquire(&_allocated_bytes);
size_t used_bytes = 0;
if (UseTLAB) {
// These reads are unsynchronized and unordered with the thread updating its tlab pointers.
// Use only if top > start && used_bytes <= max_tlab_size_bytes.
const HeapWord* const top = tlab().top_relaxed();
const HeapWord* const start = tlab().start_relaxed();
if (top <= start) {
return allocated_bytes;
}
const size_t used_bytes = pointer_delta(top, start, 1);
if (used_bytes <= ThreadLocalAllocBuffer::max_size_in_bytes()) {
// Comparing used_bytes with the maximum allowed size will ensure
// that we don't add the used bytes from a semi-initialized TLAB
// ending up with incorrect values. There is still a race between
// incrementing _allocated_bytes and clearing the TLAB, that might
// cause double counting in rare cases.
return allocated_bytes + used_bytes;
}
// cooked_used_bytes() does its best to not return implausible values, but
// there is still a potential race between incrementing _allocated_bytes and
// clearing the TLAB, that might cause double-counting.
used_bytes = tlab().estimated_used_bytes();
}
return allocated_bytes;
return allocated_bytes + used_bytes;
}
inline ThreadsList* Thread::cmpxchg_threads_hazard_ptr(ThreadsList* exchange_value, ThreadsList* compare_value) {