diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.cpp b/src/hotspot/share/gc/parallel/psCompactionManager.cpp index 0601c5047eb..f00e18c3297 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.cpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.cpp @@ -28,6 +28,8 @@ #include "gc/parallel/psCompactionManager.inline.hpp" #include "gc/parallel/psOldGen.hpp" #include "gc/parallel/psParallelCompact.inline.hpp" +#include "gc/shared/partialArraySplitter.inline.hpp" +#include "gc/shared/partialArrayState.hpp" #include "gc/shared/preservedMarks.inline.hpp" #include "gc/shared/taskqueue.inline.hpp" #include "logging/log.hpp" @@ -42,9 +44,9 @@ PSOldGen* ParCompactionManager::_old_gen = nullptr; ParCompactionManager** ParCompactionManager::_manager_array = nullptr; -ParCompactionManager::OopTaskQueueSet* ParCompactionManager::_oop_task_queues = nullptr; -ParCompactionManager::ObjArrayTaskQueueSet* ParCompactionManager::_objarray_task_queues = nullptr; +ParCompactionManager::PSMarkTasksQueueSet* ParCompactionManager::_marking_stacks = nullptr; ParCompactionManager::RegionTaskQueueSet* ParCompactionManager::_region_task_queues = nullptr; +PartialArrayStateManager* ParCompactionManager::_partial_array_state_manager = nullptr; ObjectStartArray* ParCompactionManager::_start_array = nullptr; ParMarkBitMap* ParCompactionManager::_mark_bitmap = nullptr; @@ -54,8 +56,10 @@ Monitor* ParCompactionManager::_shadow_region_monitor = nullptr; PreservedMarksSet* ParCompactionManager::_preserved_marks_set = nullptr; ParCompactionManager::ParCompactionManager(PreservedMarks* preserved_marks, - ReferenceProcessor* ref_processor) - : _mark_and_push_closure(this, ref_processor) { + ReferenceProcessor* ref_processor, + uint parallel_gc_threads) + :_partial_array_splitter(_partial_array_state_manager, parallel_gc_threads), + _mark_and_push_closure(this, ref_processor) { ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); @@ -78,8 +82,10 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) { assert(_manager_array == nullptr, "Attempt to initialize twice"); _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads, mtGC); - _oop_task_queues = new OopTaskQueueSet(parallel_gc_threads); - _objarray_task_queues = new ObjArrayTaskQueueSet(parallel_gc_threads); + assert(_partial_array_state_manager == nullptr, "Attempt to initialize twice"); + _partial_array_state_manager + = new PartialArrayStateManager(parallel_gc_threads); + _marking_stacks = new PSMarkTasksQueueSet(parallel_gc_threads); _region_task_queues = new RegionTaskQueueSet(parallel_gc_threads); _preserved_marks_set = new PreservedMarksSet(true); @@ -88,16 +94,15 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) { // Create and register the ParCompactionManager(s) for the worker threads. for(uint i=0; iget(i), - PSParallelCompact::ref_processor()); - oop_task_queues()->register_queue(i, _manager_array[i]->oop_stack()); - _objarray_task_queues->register_queue(i, &_manager_array[i]->_objarray_stack); + PSParallelCompact::ref_processor(), + parallel_gc_threads); + marking_stacks()->register_queue(i, _manager_array[i]->marking_stack()); region_task_queues()->register_queue(i, _manager_array[i]->region_stack()); } _shadow_region_array = new (mtGC) GrowableArray(10, mtGC); _shadow_region_monitor = new Monitor(Mutex::nosafepoint, "CompactionManager_lock"); - } void ParCompactionManager::flush_all_string_dedup_requests() { @@ -114,42 +119,41 @@ ParCompactionManager::gc_thread_compaction_manager(uint index) { return _manager_array[index]; } -inline void ParCompactionManager::publish_and_drain_oop_tasks() { - oop obj; - while (oop_stack()->pop_overflow(obj)) { - if (!oop_stack()->try_push_to_taskqueue(obj)) { - follow_contents(obj); - } - } - while (oop_stack()->pop_local(obj)) { - follow_contents(obj); - } +void ParCompactionManager::push_objArray(oop obj) { + assert(obj->is_objArray(), "precondition"); + _mark_and_push_closure.do_klass(obj->klass()); + + objArrayOop obj_array = objArrayOop(obj); + size_t array_length = obj_array->length(); + size_t initial_chunk_size = + _partial_array_splitter.start(&_marking_stack, obj_array, nullptr, array_length); + follow_array(obj_array, 0, initial_chunk_size); } -bool ParCompactionManager::publish_or_pop_objarray_tasks(ObjArrayTask& task) { - while (_objarray_stack.pop_overflow(task)) { - if (!_objarray_stack.try_push_to_taskqueue(task)) { - return true; - } - } - return false; +void ParCompactionManager::process_array_chunk(PartialArrayState* state, bool stolen) { + // Access before release by claim(). + oop obj = state->source(); + PartialArraySplitter::Claim claim = + _partial_array_splitter.claim(state, &_marking_stack, stolen); + follow_array(objArrayOop(obj), claim._start, claim._end); } void ParCompactionManager::follow_marking_stacks() { + ScannerTask task; do { // First, try to move tasks from the overflow stack into the shared buffer, so // that other threads can steal. Otherwise process the overflow stack first. - publish_and_drain_oop_tasks(); - - // Process ObjArrays one at a time to avoid marking stack bloat. - ObjArrayTask task; - if (publish_or_pop_objarray_tasks(task) || - _objarray_stack.pop_local(task)) { - follow_array((objArrayOop)task.obj(), task.index()); + while (marking_stack()->pop_overflow(task)) { + if (!marking_stack()->try_push_to_taskqueue(task)) { + follow_contents(task, false); + } } - } while (!marking_stacks_empty()); + while (marking_stack()->pop_local(task)) { + follow_contents(task, false); + } + } while (!marking_stack_empty()); - assert(marking_stacks_empty(), "Sanity"); + assert(marking_stack_empty(), "Sanity"); } void ParCompactionManager::drain_region_stacks() { @@ -196,11 +200,32 @@ void ParCompactionManager::remove_all_shadow_regions() { _shadow_region_array->clear(); } + +#if TASKQUEUE_STATS +void ParCompactionManager::print_and_reset_taskqueue_stats() { + marking_stacks()->print_and_reset_taskqueue_stats("Marking Stacks"); + + auto get_pa_stats = [&](uint i) { + return _manager_array[i]->partial_array_task_stats(); + }; + PartialArrayTaskStats::log_set(ParallelGCThreads, get_pa_stats, + "Partial Array Task Stats"); + uint parallel_gc_threads = ParallelScavengeHeap::heap()->workers().max_workers(); + for (uint i = 0; i < parallel_gc_threads; ++i) { + get_pa_stats(i)->reset(); + } +} + +PartialArrayTaskStats* ParCompactionManager::partial_array_task_stats() { + return _partial_array_splitter.stats(); +} +#endif // TASKQUEUE_STATS + #ifdef ASSERT void ParCompactionManager::verify_all_marking_stack_empty() { uint parallel_gc_threads = ParallelGCThreads; for (uint i = 0; i < parallel_gc_threads; i++) { - assert(_manager_array[i]->marking_stacks_empty(), "Marking stack should be empty"); + assert(_manager_array[i]->marking_stack_empty(), "Marking stack should be empty"); } } diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.hpp index cd4eefe775b..739d2cb1cc7 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp @@ -27,6 +27,9 @@ #include "classfile/classLoaderData.hpp" #include "gc/parallel/psParallelCompact.hpp" +#include "gc/shared/partialArraySplitter.hpp" +#include "gc/shared/partialArrayTaskStats.hpp" +#include "gc/shared/partialArrayState.hpp" #include "gc/shared/preservedMarks.hpp" #include "gc/shared/stringdedup/stringDedup.hpp" #include "gc/shared/taskqueue.hpp" @@ -64,26 +67,22 @@ class ParCompactionManager : public CHeapObj { friend class PCAddThreadRootsMarkingTaskClosure; private: - typedef OverflowTaskQueue OopTaskQueue; - typedef GenericTaskQueueSet OopTaskQueueSet; - - // 32-bit: 4K * 8 = 32KiB; 64-bit: 8K * 16 = 128KiB - #define QUEUE_SIZE (1 << NOT_LP64(12) LP64_ONLY(13)) - typedef OverflowTaskQueue ObjArrayTaskQueue; - typedef GenericTaskQueueSet ObjArrayTaskQueueSet; - #undef QUEUE_SIZE - typedef OverflowTaskQueue RegionTaskQueue; - typedef GenericTaskQueueSet RegionTaskQueueSet; + typedef OverflowTaskQueue PSMarkTaskQueue; + typedef GenericTaskQueueSet PSMarkTasksQueueSet; + typedef OverflowTaskQueue RegionTaskQueue; + typedef GenericTaskQueueSet RegionTaskQueueSet; static ParCompactionManager** _manager_array; - static OopTaskQueueSet* _oop_task_queues; - static ObjArrayTaskQueueSet* _objarray_task_queues; + static PSMarkTasksQueueSet* _marking_stacks; static ObjectStartArray* _start_array; static RegionTaskQueueSet* _region_task_queues; static PSOldGen* _old_gen; - OopTaskQueue _oop_stack; - ObjArrayTaskQueue _objarray_stack; + static PartialArrayStateManager* _partial_array_state_manager; + PartialArraySplitter _partial_array_splitter; + + PSMarkTaskQueue _marking_stack; + size_t _next_shadow_region; PCMarkAndPushClosure _mark_and_push_closure; @@ -109,23 +108,20 @@ class ParCompactionManager : public CHeapObj { static PSOldGen* old_gen() { return _old_gen; } static ObjectStartArray* start_array() { return _start_array; } - static OopTaskQueueSet* oop_task_queues() { return _oop_task_queues; } + static PSMarkTasksQueueSet* marking_stacks() { return _marking_stacks; } static void initialize(ParMarkBitMap* mbm); - void publish_and_drain_oop_tasks(); - // Try to publish all contents from the objArray task queue overflow stack to - // the shared objArray stack. - // Returns true and a valid task if there has not been enough space in the shared - // objArray stack, otherwise returns false and the task is invalid. - bool publish_or_pop_objarray_tasks(ObjArrayTask& task); - ParCompactionManager(PreservedMarks* preserved_marks, - ReferenceProcessor* ref_processor); + ReferenceProcessor* ref_processor, + uint parallel_gc_threads); // Array of task queues. Needed by the task terminator. static RegionTaskQueueSet* region_task_queues() { return _region_task_queues; } - OopTaskQueue* oop_stack() { return &_oop_stack; } + + inline PSMarkTaskQueue* marking_stack() { return &_marking_stack; } + inline void push(PartialArrayState* stat); + void push_objArray(oop obj); // To collect per-region live-words in a worker local cache in order to // reduce threads contention. @@ -155,6 +151,11 @@ class ParCompactionManager : public CHeapObj { MarkingStatsCache* _marking_stats_cache; +#if TASKQUEUE_STATS + static void print_and_reset_taskqueue_stats(); + PartialArrayTaskStats* partial_array_task_stats(); +#endif // TASKQUEUE_STATS + public: static const size_t InvalidShadow = ~0; static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr); @@ -189,7 +190,6 @@ public: // Save for later processing. Must not fail. inline void push(oop obj); - inline void push_objarray(oop objarray, size_t index); inline void push_region(size_t index); // Check mark and maybe push on marking stack. @@ -198,19 +198,19 @@ public: // Access function for compaction managers static ParCompactionManager* gc_thread_compaction_manager(uint index); - static bool steal(int queue_num, oop& t); - static bool steal_objarray(int queue_num, ObjArrayTask& t); + static bool steal(int queue_num, ScannerTask& t); static bool steal(int queue_num, size_t& region); - // Process tasks remaining on any marking stack + // Process tasks remaining on marking stack void follow_marking_stacks(); - inline bool marking_stacks_empty() const; + inline bool marking_stack_empty() const; // Process tasks remaining on any stack void drain_region_stacks(); - void follow_contents(oop obj); - void follow_array(objArrayOop array, int index); + inline void follow_contents(const ScannerTask& task, bool stolen); + inline void follow_array(objArrayOop array, size_t start, size_t end); + void process_array_chunk(PartialArrayState* state, bool stolen); class FollowStackClosure: public VoidClosure { private: @@ -234,8 +234,8 @@ public: static void verify_all_region_stack_empty() NOT_DEBUG_RETURN; }; -bool ParCompactionManager::marking_stacks_empty() const { - return _oop_stack.is_empty() && _objarray_stack.is_empty(); +bool ParCompactionManager::marking_stack_empty() const { + return _marking_stack.is_empty(); } #endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_HPP diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp index 94529d27423..2c0b8480726 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp @@ -32,6 +32,8 @@ #include "gc/parallel/parMarkBitMap.hpp" #include "gc/parallel/psParallelCompact.inline.hpp" #include "gc/parallel/psStringDedup.hpp" +#include "gc/shared/partialArrayState.hpp" +#include "gc/shared/partialArrayTaskStepper.inline.hpp" #include "gc/shared/taskqueue.inline.hpp" #include "oops/access.inline.hpp" #include "oops/arrayOop.hpp" @@ -46,12 +48,8 @@ inline void PCMarkAndPushClosure::do_oop_work(T* p) { _compaction_manager->mark_and_push(p); } -inline bool ParCompactionManager::steal(int queue_num, oop& t) { - return oop_task_queues()->steal(queue_num, t); -} - -inline bool ParCompactionManager::steal_objarray(int queue_num, ObjArrayTask& t) { - return _objarray_task_queues->steal(queue_num, t); +inline bool ParCompactionManager::steal(int queue_num, ScannerTask& t) { + return marking_stacks()->steal(queue_num, t); } inline bool ParCompactionManager::steal(int queue_num, size_t& region) { @@ -59,14 +57,11 @@ inline bool ParCompactionManager::steal(int queue_num, size_t& region) { } inline void ParCompactionManager::push(oop obj) { - _oop_stack.push(obj); + marking_stack()->push(ScannerTask(obj)); } -void ParCompactionManager::push_objarray(oop obj, size_t index) -{ - ObjArrayTask task(obj, index); - assert(task.is_valid(), "bad ObjArrayTask"); - _objarray_stack.push(task); +inline void ParCompactionManager::push(PartialArrayState* stat) { + marking_stack()->push(ScannerTask(stat)); } void ParCompactionManager::push_region(size_t index) @@ -111,43 +106,38 @@ inline void ParCompactionManager::FollowStackClosure::do_void() { } template -inline void follow_array_specialized(objArrayOop obj, int index, ParCompactionManager* cm) { - const size_t len = size_t(obj->length()); - const size_t beg_index = size_t(index); - assert(beg_index < len || len == 0, "index too large"); - - const size_t stride = MIN2(len - beg_index, (size_t)ObjArrayMarkingStride); - const size_t end_index = beg_index + stride; +inline void follow_array_specialized(objArrayOop obj, size_t start, size_t end, ParCompactionManager* cm) { + assert(start <= end, "invariant"); T* const base = (T*)obj->base(); - T* const beg = base + beg_index; - T* const end = base + end_index; - - if (end_index < len) { - cm->push_objarray(obj, end_index); // Push the continuation. - } + T* const beg = base + start; + T* const chunk_end = base + end; // Push the non-null elements of the next stride on the marking stack. - for (T* e = beg; e < end; e++) { + for (T* e = beg; e < chunk_end; e++) { cm->mark_and_push(e); } } -inline void ParCompactionManager::follow_array(objArrayOop obj, int index) { +inline void ParCompactionManager::follow_array(objArrayOop obj, size_t start, size_t end) { if (UseCompressedOops) { - follow_array_specialized(obj, index, this); + follow_array_specialized(obj, start, end, this); } else { - follow_array_specialized(obj, index, this); + follow_array_specialized(obj, start, end, this); } } -inline void ParCompactionManager::follow_contents(oop obj) { - assert(PSParallelCompact::mark_bitmap()->is_marked(obj), "should be marked"); - - if (obj->is_objArray()) { - _mark_and_push_closure.do_klass(obj->klass()); - follow_array(objArrayOop(obj), 0); +inline void ParCompactionManager::follow_contents(const ScannerTask& task, bool stolen) { + if (task.is_partial_array_state()) { + assert(PSParallelCompact::mark_bitmap()->is_marked(task.to_partial_array_state()->source()), "should be marked"); + process_array_chunk(task.to_partial_array_state(), stolen); } else { - obj->oop_iterate(&_mark_and_push_closure); + oop obj = task.to_oop(); + assert(PSParallelCompact::mark_bitmap()->is_marked(obj), "should be marked"); + if (obj->is_objArray()) { + push_objArray(obj); + } else { + obj->oop_iterate(&_mark_and_push_closure); + } } } @@ -219,5 +209,4 @@ inline void ParCompactionManager::flush_and_destroy_marking_stats_cache() { delete _marking_stats_cache; _marking_stats_cache = nullptr; } - #endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_INLINE_HPP diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp index d2342dfd897..90f4d6367bd 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp @@ -1214,12 +1214,9 @@ void steal_marking_work(TaskTerminator& terminator, uint worker_id) { ParCompactionManager::gc_thread_compaction_manager(worker_id); do { - oop obj = nullptr; - ObjArrayTask task; - if (ParCompactionManager::steal_objarray(worker_id, task)) { - cm->follow_array((objArrayOop)task.obj(), task.index()); - } else if (ParCompactionManager::steal(worker_id, obj)) { - cm->follow_contents(obj); + ScannerTask task; + if (ParCompactionManager::steal(worker_id, task)) { + cm->follow_contents(task, true); } cm->follow_marking_stacks(); } while (!terminator.offer_termination()); @@ -1235,7 +1232,7 @@ public: MarkFromRootsTask(uint active_workers) : WorkerTask("MarkFromRootsTask"), _strong_roots_scope(active_workers), - _terminator(active_workers, ParCompactionManager::oop_task_queues()), + _terminator(active_workers, ParCompactionManager::marking_stacks()), _active_workers(active_workers) {} virtual void work(uint worker_id) { @@ -1273,7 +1270,7 @@ class ParallelCompactRefProcProxyTask : public RefProcProxyTask { public: ParallelCompactRefProcProxyTask(uint max_workers) : RefProcProxyTask("ParallelCompactRefProcProxyTask", max_workers), - _terminator(_max_workers, ParCompactionManager::oop_task_queues()) {} + _terminator(_max_workers, ParCompactionManager::marking_stacks()) {} void work(uint worker_id) override { assert(worker_id < _max_workers, "sanity"); @@ -1383,8 +1380,7 @@ void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) { _gc_tracer.report_object_count_after_gc(is_alive_closure(), &ParallelScavengeHeap::heap()->workers()); } #if TASKQUEUE_STATS - ParCompactionManager::oop_task_queues()->print_and_reset_taskqueue_stats("Oop Queue"); - ParCompactionManager::_objarray_task_queues->print_and_reset_taskqueue_stats("ObjArrayOop Queue"); + ParCompactionManager::print_and_reset_taskqueue_stats(); #endif } diff --git a/src/hotspot/share/gc/parallel/psScavenge.cpp b/src/hotspot/share/gc/parallel/psScavenge.cpp index 7434097da21..be31da5b05d 100644 --- a/src/hotspot/share/gc/parallel/psScavenge.cpp +++ b/src/hotspot/share/gc/parallel/psScavenge.cpp @@ -204,7 +204,7 @@ class ParallelScavengeRefProcProxyTask : public RefProcProxyTask { public: ParallelScavengeRefProcProxyTask(uint max_workers) : RefProcProxyTask("ParallelScavengeRefProcProxyTask", max_workers), - _terminator(max_workers, ParCompactionManager::oop_task_queues()) {} + _terminator(max_workers, ParCompactionManager::marking_stacks()) {} void work(uint worker_id) override { assert(worker_id < _max_workers, "sanity"); diff --git a/src/hotspot/share/gc/shared/taskqueue.hpp b/src/hotspot/share/gc/shared/taskqueue.hpp index a6ab5741048..42d32f3dc96 100644 --- a/src/hotspot/share/gc/shared/taskqueue.hpp +++ b/src/hotspot/share/gc/shared/taskqueue.hpp @@ -561,8 +561,10 @@ private: class PartialArrayState; -// Discriminated union over oop*, narrowOop*, and PartialArrayState. +// Discriminated union over oop/oop*, narrowOop*, and PartialArrayState. // Uses a low tag in the associated pointer to identify the category. +// Oop/oop* are overloaded using the same tag because they can not appear at the +// same time. // Used as a task queue element type. class ScannerTask { void* _p; @@ -595,6 +597,8 @@ class ScannerTask { public: ScannerTask() : _p(nullptr) {} + explicit ScannerTask(oop p) : _p(encode(p, OopTag)) {} + explicit ScannerTask(oop* p) : _p(encode(p, OopTag)) {} explicit ScannerTask(narrowOop* p) : _p(encode(p, NarrowOopTag)) {} @@ -622,6 +626,10 @@ public: return static_cast(decode(OopTag)); } + oop to_oop() const { + return cast_to_oop(decode(OopTag)); + } + narrowOop* to_narrow_oop_ptr() const { return static_cast(decode(NarrowOopTag)); }