mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-17 11:23:19 +00:00
Refactor PtrQueueSet, use lock-free stack for SATB completed buffers Reviewed-by: tschatzl, shade
274 lines
8.6 KiB
C++
274 lines
8.6 KiB
C++
/*
|
|
* Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*
|
|
*/
|
|
|
|
#include "precompiled.hpp"
|
|
#include "gc/shared/ptrQueue.hpp"
|
|
#include "logging/log.hpp"
|
|
#include "memory/allocation.hpp"
|
|
#include "memory/allocation.inline.hpp"
|
|
#include "runtime/atomic.hpp"
|
|
#include "runtime/mutex.hpp"
|
|
#include "runtime/mutexLocker.hpp"
|
|
#include "runtime/orderAccess.hpp"
|
|
#include "runtime/thread.inline.hpp"
|
|
#include "utilities/globalCounter.inline.hpp"
|
|
|
|
#include <new>
|
|
|
|
PtrQueue::PtrQueue(PtrQueueSet* qset, bool active) :
|
|
_qset(qset),
|
|
_active(active),
|
|
_index(0),
|
|
_capacity_in_bytes(0),
|
|
_buf(NULL)
|
|
{}
|
|
|
|
PtrQueue::~PtrQueue() {
|
|
assert(_buf == NULL, "queue must be flushed before delete");
|
|
}
|
|
|
|
void PtrQueue::flush_impl() {
|
|
if (_buf != NULL) {
|
|
BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
|
|
if (is_empty()) {
|
|
// No work to do.
|
|
qset()->deallocate_buffer(node);
|
|
} else {
|
|
qset()->enqueue_completed_buffer(node);
|
|
}
|
|
_buf = NULL;
|
|
set_index(0);
|
|
}
|
|
}
|
|
|
|
void PtrQueue::enqueue_known_active(void* ptr) {
|
|
while (_index == 0) {
|
|
handle_zero_index();
|
|
}
|
|
|
|
assert(_buf != NULL, "postcondition");
|
|
assert(index() > 0, "postcondition");
|
|
assert(index() <= capacity(), "invariant");
|
|
_index -= _element_size;
|
|
_buf[index()] = ptr;
|
|
}
|
|
|
|
void PtrQueue::handle_zero_index() {
|
|
assert(index() == 0, "precondition");
|
|
|
|
if (_buf != NULL) {
|
|
handle_completed_buffer();
|
|
} else {
|
|
// Bootstrapping kludge; lazily initialize capacity. The initial
|
|
// thread's queues are constructed before the second phase of the
|
|
// two-phase initialization of the associated qsets. As a result,
|
|
// we can't initialize _capacity_in_bytes in the queue constructor.
|
|
if (_capacity_in_bytes == 0) {
|
|
_capacity_in_bytes = index_to_byte_index(qset()->buffer_size());
|
|
}
|
|
allocate_buffer();
|
|
}
|
|
}
|
|
|
|
void PtrQueue::allocate_buffer() {
|
|
_buf = qset()->allocate_buffer();
|
|
reset();
|
|
}
|
|
|
|
void PtrQueue::enqueue_completed_buffer() {
|
|
assert(_buf != NULL, "precondition");
|
|
BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
|
|
qset()->enqueue_completed_buffer(node);
|
|
allocate_buffer();
|
|
}
|
|
|
|
BufferNode* BufferNode::allocate(size_t size) {
|
|
size_t byte_size = size * sizeof(void*);
|
|
void* data = NEW_C_HEAP_ARRAY(char, buffer_offset() + byte_size, mtGC);
|
|
return new (data) BufferNode;
|
|
}
|
|
|
|
void BufferNode::deallocate(BufferNode* node) {
|
|
node->~BufferNode();
|
|
FREE_C_HEAP_ARRAY(char, node);
|
|
}
|
|
|
|
BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) :
|
|
_buffer_size(buffer_size),
|
|
_pending_list(),
|
|
_free_list(),
|
|
_pending_count(0),
|
|
_free_count(0),
|
|
_transfer_lock(false)
|
|
{
|
|
strncpy(_name, name, sizeof(_name) - 1);
|
|
_name[sizeof(_name) - 1] = '\0';
|
|
}
|
|
|
|
BufferNode::Allocator::~Allocator() {
|
|
delete_list(_free_list.pop_all());
|
|
delete_list(_pending_list.pop_all());
|
|
}
|
|
|
|
void BufferNode::Allocator::delete_list(BufferNode* list) {
|
|
while (list != NULL) {
|
|
BufferNode* next = list->next();
|
|
DEBUG_ONLY(list->set_next(NULL);)
|
|
BufferNode::deallocate(list);
|
|
list = next;
|
|
}
|
|
}
|
|
|
|
size_t BufferNode::Allocator::free_count() const {
|
|
return Atomic::load(&_free_count);
|
|
}
|
|
|
|
BufferNode* BufferNode::Allocator::allocate() {
|
|
BufferNode* node;
|
|
{
|
|
// Protect against ABA; see release().
|
|
GlobalCounter::CriticalSection cs(Thread::current());
|
|
node = _free_list.pop();
|
|
}
|
|
if (node == NULL) {
|
|
node = BufferNode::allocate(_buffer_size);
|
|
} else {
|
|
// Decrement count after getting buffer from free list. This, along
|
|
// with incrementing count before adding to free list, ensures count
|
|
// never underflows.
|
|
size_t count = Atomic::sub(1u, &_free_count);
|
|
assert((count + 1) != 0, "_free_count underflow");
|
|
}
|
|
return node;
|
|
}
|
|
|
|
// To solve the ABA problem for lock-free stack pop, allocate does the
|
|
// pop inside a critical section, and release synchronizes on the
|
|
// critical sections before adding to the _free_list. But we don't
|
|
// want to make every release have to do a synchronize. Instead, we
|
|
// initially place released nodes on the _pending_list, and transfer
|
|
// them to the _free_list in batches. Only one transfer at a time is
|
|
// permitted, with a lock bit to control access to that phase. A
|
|
// transfer takes all the nodes from the _pending_list, synchronizes on
|
|
// the _free_list pops, and then adds the former pending nodes to the
|
|
// _free_list. While that's happening, other threads might be adding
|
|
// other nodes to the _pending_list, to be dealt with by some later
|
|
// transfer.
|
|
void BufferNode::Allocator::release(BufferNode* node) {
|
|
assert(node != NULL, "precondition");
|
|
assert(node->next() == NULL, "precondition");
|
|
|
|
// Desired minimum transfer batch size. There is relatively little
|
|
// importance to the specific number. It shouldn't be too big, else
|
|
// we're wasting space when the release rate is low. If the release
|
|
// rate is high, we might accumulate more than this before being
|
|
// able to start a new transfer, but that's okay. Also note that
|
|
// the allocation rate and the release rate are going to be fairly
|
|
// similar, due to how the buffers are used.
|
|
const size_t trigger_transfer = 10;
|
|
|
|
// Add to pending list. Update count first so no underflow in transfer.
|
|
size_t pending_count = Atomic::add(1u, &_pending_count);
|
|
_pending_list.push(*node);
|
|
if (pending_count > trigger_transfer) {
|
|
try_transfer_pending();
|
|
}
|
|
}
|
|
|
|
// Try to transfer nodes from _pending_list to _free_list, with a
|
|
// synchronization delay for any in-progress pops from the _free_list,
|
|
// to solve ABA there. Return true if performed a (possibly empty)
|
|
// transfer, false if blocked from doing so by some other thread's
|
|
// in-progress transfer.
|
|
bool BufferNode::Allocator::try_transfer_pending() {
|
|
// Attempt to claim the lock.
|
|
if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail.
|
|
Atomic::cmpxchg(true, &_transfer_lock, false)) {
|
|
return false;
|
|
}
|
|
// Have the lock; perform the transfer.
|
|
|
|
// Claim all the pending nodes.
|
|
BufferNode* first = _pending_list.pop_all();
|
|
if (first != NULL) {
|
|
// Prepare to add the claimed nodes, and update _pending_count.
|
|
BufferNode* last = first;
|
|
size_t count = 1;
|
|
for (BufferNode* next = first->next(); next != NULL; next = next->next()) {
|
|
last = next;
|
|
++count;
|
|
}
|
|
Atomic::sub(count, &_pending_count);
|
|
|
|
// Wait for any in-progress pops, to avoid ABA for them.
|
|
GlobalCounter::write_synchronize();
|
|
|
|
// Add synchronized nodes to _free_list.
|
|
// Update count first so no underflow in allocate().
|
|
Atomic::add(count, &_free_count);
|
|
_free_list.prepend(*first, *last);
|
|
log_trace(gc, ptrqueue, freelist)
|
|
("Transferred %s pending to free: " SIZE_FORMAT, name(), count);
|
|
}
|
|
OrderAccess::release_store(&_transfer_lock, false);
|
|
return true;
|
|
}
|
|
|
|
size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) {
|
|
try_transfer_pending();
|
|
size_t removed = 0;
|
|
for ( ; removed < remove_goal; ++removed) {
|
|
BufferNode* node = _free_list.pop();
|
|
if (node == NULL) break;
|
|
BufferNode::deallocate(node);
|
|
}
|
|
size_t new_count = Atomic::sub(removed, &_free_count);
|
|
log_debug(gc, ptrqueue, freelist)
|
|
("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT,
|
|
name(), removed, new_count);
|
|
return removed;
|
|
}
|
|
|
|
PtrQueueSet::PtrQueueSet() :
|
|
_allocator(NULL),
|
|
_all_active(false)
|
|
{}
|
|
|
|
PtrQueueSet::~PtrQueueSet() {}
|
|
|
|
void PtrQueueSet::initialize(BufferNode::Allocator* allocator) {
|
|
assert(allocator != NULL, "Init order issue?");
|
|
_allocator = allocator;
|
|
}
|
|
|
|
void** PtrQueueSet::allocate_buffer() {
|
|
BufferNode* node = _allocator->allocate();
|
|
return BufferNode::make_buffer_from_node(node);
|
|
}
|
|
|
|
void PtrQueueSet::deallocate_buffer(BufferNode* node) {
|
|
_allocator->release(node);
|
|
}
|
|
|