8339668: Parallel: Adopt PartialArrayState to consolidate marking stack in Full GC

Co-authored-by: Thomas Schatzl <tschatzl@openjdk.org>
Reviewed-by: tschatzl, ayang
This commit is contained in:
Zhengyu Gu 2025-01-28 13:29:37 +00:00
parent 2bef5b4a87
commit 5fec999474
6 changed files with 137 additions and 119 deletions

View File

@ -28,6 +28,8 @@
#include "gc/parallel/psCompactionManager.inline.hpp"
#include "gc/parallel/psOldGen.hpp"
#include "gc/parallel/psParallelCompact.inline.hpp"
#include "gc/shared/partialArraySplitter.inline.hpp"
#include "gc/shared/partialArrayState.hpp"
#include "gc/shared/preservedMarks.inline.hpp"
#include "gc/shared/taskqueue.inline.hpp"
#include "logging/log.hpp"
@ -42,9 +44,9 @@
PSOldGen* ParCompactionManager::_old_gen = nullptr;
ParCompactionManager** ParCompactionManager::_manager_array = nullptr;
ParCompactionManager::OopTaskQueueSet* ParCompactionManager::_oop_task_queues = nullptr;
ParCompactionManager::ObjArrayTaskQueueSet* ParCompactionManager::_objarray_task_queues = nullptr;
ParCompactionManager::PSMarkTasksQueueSet* ParCompactionManager::_marking_stacks = nullptr;
ParCompactionManager::RegionTaskQueueSet* ParCompactionManager::_region_task_queues = nullptr;
PartialArrayStateManager* ParCompactionManager::_partial_array_state_manager = nullptr;
ObjectStartArray* ParCompactionManager::_start_array = nullptr;
ParMarkBitMap* ParCompactionManager::_mark_bitmap = nullptr;
@ -54,8 +56,10 @@ Monitor* ParCompactionManager::_shadow_region_monitor = nullptr;
PreservedMarksSet* ParCompactionManager::_preserved_marks_set = nullptr;
ParCompactionManager::ParCompactionManager(PreservedMarks* preserved_marks,
ReferenceProcessor* ref_processor)
: _mark_and_push_closure(this, ref_processor) {
ReferenceProcessor* ref_processor,
uint parallel_gc_threads)
:_partial_array_splitter(_partial_array_state_manager, parallel_gc_threads),
_mark_and_push_closure(this, ref_processor) {
ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
@ -78,8 +82,10 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
assert(_manager_array == nullptr, "Attempt to initialize twice");
_manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads, mtGC);
_oop_task_queues = new OopTaskQueueSet(parallel_gc_threads);
_objarray_task_queues = new ObjArrayTaskQueueSet(parallel_gc_threads);
assert(_partial_array_state_manager == nullptr, "Attempt to initialize twice");
_partial_array_state_manager
= new PartialArrayStateManager(parallel_gc_threads);
_marking_stacks = new PSMarkTasksQueueSet(parallel_gc_threads);
_region_task_queues = new RegionTaskQueueSet(parallel_gc_threads);
_preserved_marks_set = new PreservedMarksSet(true);
@ -88,16 +94,15 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
// Create and register the ParCompactionManager(s) for the worker threads.
for(uint i=0; i<parallel_gc_threads; i++) {
_manager_array[i] = new ParCompactionManager(_preserved_marks_set->get(i),
PSParallelCompact::ref_processor());
oop_task_queues()->register_queue(i, _manager_array[i]->oop_stack());
_objarray_task_queues->register_queue(i, &_manager_array[i]->_objarray_stack);
PSParallelCompact::ref_processor(),
parallel_gc_threads);
marking_stacks()->register_queue(i, _manager_array[i]->marking_stack());
region_task_queues()->register_queue(i, _manager_array[i]->region_stack());
}
_shadow_region_array = new (mtGC) GrowableArray<size_t >(10, mtGC);
_shadow_region_monitor = new Monitor(Mutex::nosafepoint, "CompactionManager_lock");
}
void ParCompactionManager::flush_all_string_dedup_requests() {
@ -114,42 +119,41 @@ ParCompactionManager::gc_thread_compaction_manager(uint index) {
return _manager_array[index];
}
inline void ParCompactionManager::publish_and_drain_oop_tasks() {
oop obj;
while (oop_stack()->pop_overflow(obj)) {
if (!oop_stack()->try_push_to_taskqueue(obj)) {
follow_contents(obj);
}
}
while (oop_stack()->pop_local(obj)) {
follow_contents(obj);
}
void ParCompactionManager::push_objArray(oop obj) {
assert(obj->is_objArray(), "precondition");
_mark_and_push_closure.do_klass(obj->klass());
objArrayOop obj_array = objArrayOop(obj);
size_t array_length = obj_array->length();
size_t initial_chunk_size =
_partial_array_splitter.start(&_marking_stack, obj_array, nullptr, array_length);
follow_array(obj_array, 0, initial_chunk_size);
}
bool ParCompactionManager::publish_or_pop_objarray_tasks(ObjArrayTask& task) {
while (_objarray_stack.pop_overflow(task)) {
if (!_objarray_stack.try_push_to_taskqueue(task)) {
return true;
}
}
return false;
void ParCompactionManager::process_array_chunk(PartialArrayState* state, bool stolen) {
// Access before release by claim().
oop obj = state->source();
PartialArraySplitter::Claim claim =
_partial_array_splitter.claim(state, &_marking_stack, stolen);
follow_array(objArrayOop(obj), claim._start, claim._end);
}
void ParCompactionManager::follow_marking_stacks() {
ScannerTask task;
do {
// First, try to move tasks from the overflow stack into the shared buffer, so
// that other threads can steal. Otherwise process the overflow stack first.
publish_and_drain_oop_tasks();
// Process ObjArrays one at a time to avoid marking stack bloat.
ObjArrayTask task;
if (publish_or_pop_objarray_tasks(task) ||
_objarray_stack.pop_local(task)) {
follow_array((objArrayOop)task.obj(), task.index());
while (marking_stack()->pop_overflow(task)) {
if (!marking_stack()->try_push_to_taskqueue(task)) {
follow_contents(task, false);
}
}
} while (!marking_stacks_empty());
while (marking_stack()->pop_local(task)) {
follow_contents(task, false);
}
} while (!marking_stack_empty());
assert(marking_stacks_empty(), "Sanity");
assert(marking_stack_empty(), "Sanity");
}
void ParCompactionManager::drain_region_stacks() {
@ -196,11 +200,32 @@ void ParCompactionManager::remove_all_shadow_regions() {
_shadow_region_array->clear();
}
#if TASKQUEUE_STATS
void ParCompactionManager::print_and_reset_taskqueue_stats() {
marking_stacks()->print_and_reset_taskqueue_stats("Marking Stacks");
auto get_pa_stats = [&](uint i) {
return _manager_array[i]->partial_array_task_stats();
};
PartialArrayTaskStats::log_set(ParallelGCThreads, get_pa_stats,
"Partial Array Task Stats");
uint parallel_gc_threads = ParallelScavengeHeap::heap()->workers().max_workers();
for (uint i = 0; i < parallel_gc_threads; ++i) {
get_pa_stats(i)->reset();
}
}
PartialArrayTaskStats* ParCompactionManager::partial_array_task_stats() {
return _partial_array_splitter.stats();
}
#endif // TASKQUEUE_STATS
#ifdef ASSERT
void ParCompactionManager::verify_all_marking_stack_empty() {
uint parallel_gc_threads = ParallelGCThreads;
for (uint i = 0; i < parallel_gc_threads; i++) {
assert(_manager_array[i]->marking_stacks_empty(), "Marking stack should be empty");
assert(_manager_array[i]->marking_stack_empty(), "Marking stack should be empty");
}
}

View File

@ -27,6 +27,9 @@
#include "classfile/classLoaderData.hpp"
#include "gc/parallel/psParallelCompact.hpp"
#include "gc/shared/partialArraySplitter.hpp"
#include "gc/shared/partialArrayTaskStats.hpp"
#include "gc/shared/partialArrayState.hpp"
#include "gc/shared/preservedMarks.hpp"
#include "gc/shared/stringdedup/stringDedup.hpp"
#include "gc/shared/taskqueue.hpp"
@ -64,26 +67,22 @@ class ParCompactionManager : public CHeapObj<mtGC> {
friend class PCAddThreadRootsMarkingTaskClosure;
private:
typedef OverflowTaskQueue<oop, mtGC> OopTaskQueue;
typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;
// 32-bit: 4K * 8 = 32KiB; 64-bit: 8K * 16 = 128KiB
#define QUEUE_SIZE (1 << NOT_LP64(12) LP64_ONLY(13))
typedef OverflowTaskQueue<ObjArrayTask, mtGC, QUEUE_SIZE> ObjArrayTaskQueue;
typedef GenericTaskQueueSet<ObjArrayTaskQueue, mtGC> ObjArrayTaskQueueSet;
#undef QUEUE_SIZE
typedef OverflowTaskQueue<size_t, mtGC> RegionTaskQueue;
typedef GenericTaskQueueSet<RegionTaskQueue, mtGC> RegionTaskQueueSet;
typedef OverflowTaskQueue<ScannerTask, mtGC> PSMarkTaskQueue;
typedef GenericTaskQueueSet<PSMarkTaskQueue, mtGC> PSMarkTasksQueueSet;
typedef OverflowTaskQueue<size_t, mtGC> RegionTaskQueue;
typedef GenericTaskQueueSet<RegionTaskQueue, mtGC> RegionTaskQueueSet;
static ParCompactionManager** _manager_array;
static OopTaskQueueSet* _oop_task_queues;
static ObjArrayTaskQueueSet* _objarray_task_queues;
static PSMarkTasksQueueSet* _marking_stacks;
static ObjectStartArray* _start_array;
static RegionTaskQueueSet* _region_task_queues;
static PSOldGen* _old_gen;
OopTaskQueue _oop_stack;
ObjArrayTaskQueue _objarray_stack;
static PartialArrayStateManager* _partial_array_state_manager;
PartialArraySplitter _partial_array_splitter;
PSMarkTaskQueue _marking_stack;
size_t _next_shadow_region;
PCMarkAndPushClosure _mark_and_push_closure;
@ -109,23 +108,20 @@ class ParCompactionManager : public CHeapObj<mtGC> {
static PSOldGen* old_gen() { return _old_gen; }
static ObjectStartArray* start_array() { return _start_array; }
static OopTaskQueueSet* oop_task_queues() { return _oop_task_queues; }
static PSMarkTasksQueueSet* marking_stacks() { return _marking_stacks; }
static void initialize(ParMarkBitMap* mbm);
void publish_and_drain_oop_tasks();
// Try to publish all contents from the objArray task queue overflow stack to
// the shared objArray stack.
// Returns true and a valid task if there has not been enough space in the shared
// objArray stack, otherwise returns false and the task is invalid.
bool publish_or_pop_objarray_tasks(ObjArrayTask& task);
ParCompactionManager(PreservedMarks* preserved_marks,
ReferenceProcessor* ref_processor);
ReferenceProcessor* ref_processor,
uint parallel_gc_threads);
// Array of task queues. Needed by the task terminator.
static RegionTaskQueueSet* region_task_queues() { return _region_task_queues; }
OopTaskQueue* oop_stack() { return &_oop_stack; }
inline PSMarkTaskQueue* marking_stack() { return &_marking_stack; }
inline void push(PartialArrayState* stat);
void push_objArray(oop obj);
// To collect per-region live-words in a worker local cache in order to
// reduce threads contention.
@ -155,6 +151,11 @@ class ParCompactionManager : public CHeapObj<mtGC> {
MarkingStatsCache* _marking_stats_cache;
#if TASKQUEUE_STATS
static void print_and_reset_taskqueue_stats();
PartialArrayTaskStats* partial_array_task_stats();
#endif // TASKQUEUE_STATS
public:
static const size_t InvalidShadow = ~0;
static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr);
@ -189,7 +190,6 @@ public:
// Save for later processing. Must not fail.
inline void push(oop obj);
inline void push_objarray(oop objarray, size_t index);
inline void push_region(size_t index);
// Check mark and maybe push on marking stack.
@ -198,19 +198,19 @@ public:
// Access function for compaction managers
static ParCompactionManager* gc_thread_compaction_manager(uint index);
static bool steal(int queue_num, oop& t);
static bool steal_objarray(int queue_num, ObjArrayTask& t);
static bool steal(int queue_num, ScannerTask& t);
static bool steal(int queue_num, size_t& region);
// Process tasks remaining on any marking stack
// Process tasks remaining on marking stack
void follow_marking_stacks();
inline bool marking_stacks_empty() const;
inline bool marking_stack_empty() const;
// Process tasks remaining on any stack
void drain_region_stacks();
void follow_contents(oop obj);
void follow_array(objArrayOop array, int index);
inline void follow_contents(const ScannerTask& task, bool stolen);
inline void follow_array(objArrayOop array, size_t start, size_t end);
void process_array_chunk(PartialArrayState* state, bool stolen);
class FollowStackClosure: public VoidClosure {
private:
@ -234,8 +234,8 @@ public:
static void verify_all_region_stack_empty() NOT_DEBUG_RETURN;
};
bool ParCompactionManager::marking_stacks_empty() const {
return _oop_stack.is_empty() && _objarray_stack.is_empty();
bool ParCompactionManager::marking_stack_empty() const {
return _marking_stack.is_empty();
}
#endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_HPP

View File

@ -32,6 +32,8 @@
#include "gc/parallel/parMarkBitMap.hpp"
#include "gc/parallel/psParallelCompact.inline.hpp"
#include "gc/parallel/psStringDedup.hpp"
#include "gc/shared/partialArrayState.hpp"
#include "gc/shared/partialArrayTaskStepper.inline.hpp"
#include "gc/shared/taskqueue.inline.hpp"
#include "oops/access.inline.hpp"
#include "oops/arrayOop.hpp"
@ -46,12 +48,8 @@ inline void PCMarkAndPushClosure::do_oop_work(T* p) {
_compaction_manager->mark_and_push(p);
}
inline bool ParCompactionManager::steal(int queue_num, oop& t) {
return oop_task_queues()->steal(queue_num, t);
}
inline bool ParCompactionManager::steal_objarray(int queue_num, ObjArrayTask& t) {
return _objarray_task_queues->steal(queue_num, t);
inline bool ParCompactionManager::steal(int queue_num, ScannerTask& t) {
return marking_stacks()->steal(queue_num, t);
}
inline bool ParCompactionManager::steal(int queue_num, size_t& region) {
@ -59,14 +57,11 @@ inline bool ParCompactionManager::steal(int queue_num, size_t& region) {
}
inline void ParCompactionManager::push(oop obj) {
_oop_stack.push(obj);
marking_stack()->push(ScannerTask(obj));
}
void ParCompactionManager::push_objarray(oop obj, size_t index)
{
ObjArrayTask task(obj, index);
assert(task.is_valid(), "bad ObjArrayTask");
_objarray_stack.push(task);
inline void ParCompactionManager::push(PartialArrayState* stat) {
marking_stack()->push(ScannerTask(stat));
}
void ParCompactionManager::push_region(size_t index)
@ -111,43 +106,38 @@ inline void ParCompactionManager::FollowStackClosure::do_void() {
}
template <typename T>
inline void follow_array_specialized(objArrayOop obj, int index, ParCompactionManager* cm) {
const size_t len = size_t(obj->length());
const size_t beg_index = size_t(index);
assert(beg_index < len || len == 0, "index too large");
const size_t stride = MIN2(len - beg_index, (size_t)ObjArrayMarkingStride);
const size_t end_index = beg_index + stride;
inline void follow_array_specialized(objArrayOop obj, size_t start, size_t end, ParCompactionManager* cm) {
assert(start <= end, "invariant");
T* const base = (T*)obj->base();
T* const beg = base + beg_index;
T* const end = base + end_index;
if (end_index < len) {
cm->push_objarray(obj, end_index); // Push the continuation.
}
T* const beg = base + start;
T* const chunk_end = base + end;
// Push the non-null elements of the next stride on the marking stack.
for (T* e = beg; e < end; e++) {
for (T* e = beg; e < chunk_end; e++) {
cm->mark_and_push<T>(e);
}
}
inline void ParCompactionManager::follow_array(objArrayOop obj, int index) {
inline void ParCompactionManager::follow_array(objArrayOop obj, size_t start, size_t end) {
if (UseCompressedOops) {
follow_array_specialized<narrowOop>(obj, index, this);
follow_array_specialized<narrowOop>(obj, start, end, this);
} else {
follow_array_specialized<oop>(obj, index, this);
follow_array_specialized<oop>(obj, start, end, this);
}
}
inline void ParCompactionManager::follow_contents(oop obj) {
assert(PSParallelCompact::mark_bitmap()->is_marked(obj), "should be marked");
if (obj->is_objArray()) {
_mark_and_push_closure.do_klass(obj->klass());
follow_array(objArrayOop(obj), 0);
inline void ParCompactionManager::follow_contents(const ScannerTask& task, bool stolen) {
if (task.is_partial_array_state()) {
assert(PSParallelCompact::mark_bitmap()->is_marked(task.to_partial_array_state()->source()), "should be marked");
process_array_chunk(task.to_partial_array_state(), stolen);
} else {
obj->oop_iterate(&_mark_and_push_closure);
oop obj = task.to_oop();
assert(PSParallelCompact::mark_bitmap()->is_marked(obj), "should be marked");
if (obj->is_objArray()) {
push_objArray(obj);
} else {
obj->oop_iterate(&_mark_and_push_closure);
}
}
}
@ -219,5 +209,4 @@ inline void ParCompactionManager::flush_and_destroy_marking_stats_cache() {
delete _marking_stats_cache;
_marking_stats_cache = nullptr;
}
#endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_INLINE_HPP

View File

@ -1214,12 +1214,9 @@ void steal_marking_work(TaskTerminator& terminator, uint worker_id) {
ParCompactionManager::gc_thread_compaction_manager(worker_id);
do {
oop obj = nullptr;
ObjArrayTask task;
if (ParCompactionManager::steal_objarray(worker_id, task)) {
cm->follow_array((objArrayOop)task.obj(), task.index());
} else if (ParCompactionManager::steal(worker_id, obj)) {
cm->follow_contents(obj);
ScannerTask task;
if (ParCompactionManager::steal(worker_id, task)) {
cm->follow_contents(task, true);
}
cm->follow_marking_stacks();
} while (!terminator.offer_termination());
@ -1235,7 +1232,7 @@ public:
MarkFromRootsTask(uint active_workers) :
WorkerTask("MarkFromRootsTask"),
_strong_roots_scope(active_workers),
_terminator(active_workers, ParCompactionManager::oop_task_queues()),
_terminator(active_workers, ParCompactionManager::marking_stacks()),
_active_workers(active_workers) {}
virtual void work(uint worker_id) {
@ -1273,7 +1270,7 @@ class ParallelCompactRefProcProxyTask : public RefProcProxyTask {
public:
ParallelCompactRefProcProxyTask(uint max_workers)
: RefProcProxyTask("ParallelCompactRefProcProxyTask", max_workers),
_terminator(_max_workers, ParCompactionManager::oop_task_queues()) {}
_terminator(_max_workers, ParCompactionManager::marking_stacks()) {}
void work(uint worker_id) override {
assert(worker_id < _max_workers, "sanity");
@ -1383,8 +1380,7 @@ void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) {
_gc_tracer.report_object_count_after_gc(is_alive_closure(), &ParallelScavengeHeap::heap()->workers());
}
#if TASKQUEUE_STATS
ParCompactionManager::oop_task_queues()->print_and_reset_taskqueue_stats("Oop Queue");
ParCompactionManager::_objarray_task_queues->print_and_reset_taskqueue_stats("ObjArrayOop Queue");
ParCompactionManager::print_and_reset_taskqueue_stats();
#endif
}

View File

@ -204,7 +204,7 @@ class ParallelScavengeRefProcProxyTask : public RefProcProxyTask {
public:
ParallelScavengeRefProcProxyTask(uint max_workers)
: RefProcProxyTask("ParallelScavengeRefProcProxyTask", max_workers),
_terminator(max_workers, ParCompactionManager::oop_task_queues()) {}
_terminator(max_workers, ParCompactionManager::marking_stacks()) {}
void work(uint worker_id) override {
assert(worker_id < _max_workers, "sanity");

View File

@ -561,8 +561,10 @@ private:
class PartialArrayState;
// Discriminated union over oop*, narrowOop*, and PartialArrayState.
// Discriminated union over oop/oop*, narrowOop*, and PartialArrayState.
// Uses a low tag in the associated pointer to identify the category.
// Oop/oop* are overloaded using the same tag because they can not appear at the
// same time.
// Used as a task queue element type.
class ScannerTask {
void* _p;
@ -595,6 +597,8 @@ class ScannerTask {
public:
ScannerTask() : _p(nullptr) {}
explicit ScannerTask(oop p) : _p(encode(p, OopTag)) {}
explicit ScannerTask(oop* p) : _p(encode(p, OopTag)) {}
explicit ScannerTask(narrowOop* p) : _p(encode(p, NarrowOopTag)) {}
@ -622,6 +626,10 @@ public:
return static_cast<oop*>(decode(OopTag));
}
oop to_oop() const {
return cast_to_oop(decode(OopTag));
}
narrowOop* to_narrow_oop_ptr() const {
return static_cast<narrowOop*>(decode(NarrowOopTag));
}