From 2d3c9c5e673cf72fd4873fb5139e7d7a3d65a097 Mon Sep 17 00:00:00 2001 From: Albert Mingkun Yang Date: Mon, 26 Feb 2024 15:34:10 +0000 Subject: [PATCH] 8325553: Parallel: Use per-marker cache for marking stats during Full GC Reviewed-by: sjohanss, tschatzl --- .../share/gc/parallel/psCompactionManager.cpp | 1 + .../share/gc/parallel/psCompactionManager.hpp | 34 ++++++++- .../parallel/psCompactionManager.inline.hpp | 71 +++++++++++++++++++ .../share/gc/parallel/psParallelCompact.cpp | 14 ++++ .../gc/parallel/psParallelCompact.inline.hpp | 1 - 5 files changed, 119 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.cpp b/src/hotspot/share/gc/parallel/psCompactionManager.cpp index e3b35db4bff..fe9bcbff703 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.cpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.cpp @@ -61,6 +61,7 @@ ParCompactionManager::ParCompactionManager() { reset_bitmap_query_cache(); _deferred_obj_array = new (mtGC) GrowableArray(10, mtGC); + _marking_stats_cache = nullptr; } void ParCompactionManager::initialize(ParMarkBitMap* mbm) { diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.hpp index 458d33af74a..b33ad06ee3e 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp @@ -111,7 +111,35 @@ class ParCompactionManager : public CHeapObj { static RegionTaskQueueSet* region_task_queues() { return _region_task_queues; } OopTaskQueue* oop_stack() { return &_oop_stack; } - public: + // To collect per-region live-words in a worker local cache in order to + // reduce threads contention. + class MarkingStatsCache : public CHeapObj { + constexpr static size_t num_entries = 1024; + static_assert(is_power_of_2(num_entries), "inv"); + static_assert(num_entries > 0, "inv"); + + constexpr static size_t entry_mask = num_entries - 1; + + struct CacheEntry { + size_t region_id; + size_t live_words; + }; + + CacheEntry entries[num_entries] = {}; + + inline void push(size_t region_id, size_t live_words); + + public: + inline void push(oop obj, size_t live_words); + + inline void evict(size_t index); + + inline void evict_all(); + }; + + MarkingStatsCache* _marking_stats_cache; + +public: static const size_t InvalidShadow = ~0; static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr); static void push_shadow_region_mt_safe(size_t shadow_region); @@ -198,6 +226,10 @@ class ParCompactionManager : public CHeapObj { virtual void do_void(); }; + inline void create_marking_stats_cache(); + + inline void flush_and_destroy_marking_stats_cache(); + // Called after marking. static void verify_all_marking_stack_empty() NOT_DEBUG_RETURN; diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp index c0eb5460228..ae2e449e0b6 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp @@ -107,6 +107,8 @@ inline void ParCompactionManager::mark_and_push(T* p) { assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap"); if (mark_bitmap()->is_unmarked(obj) && PSParallelCompact::mark_obj(obj)) { + assert(_marking_stats_cache != nullptr, "inv"); + _marking_stats_cache->push(obj, obj->size()); push(obj); if (StringDedup::is_enabled() && @@ -174,4 +176,73 @@ inline void ParCompactionManager::follow_contents(oop obj) { } } +inline void ParCompactionManager::MarkingStatsCache::push(size_t region_id, size_t live_words) { + size_t index = (region_id & entry_mask); + if (entries[index].region_id == region_id) { + // Hit + entries[index].live_words += live_words; + return; + } + // Miss + if (entries[index].live_words != 0) { + evict(index); + } + entries[index].region_id = region_id; + entries[index].live_words = live_words; +} + +inline void ParCompactionManager::MarkingStatsCache::push(oop obj, size_t live_words) { + ParallelCompactData& data = PSParallelCompact::summary_data(); + const size_t region_size = ParallelCompactData::RegionSize; + + HeapWord* addr = cast_from_oop(obj); + const size_t start_region_id = data.addr_to_region_idx(addr); + const size_t end_region_id = data.addr_to_region_idx(addr + live_words - 1); + if (start_region_id == end_region_id) { + // Completely inside this region + push(start_region_id, live_words); + return; + } + + // First region + push(start_region_id, region_size - data.region_offset(addr)); + + // Middle regions; bypass cache + for (size_t i = start_region_id + 1; i < end_region_id; ++i) { + data.region(i)->set_partial_obj_size(region_size); + data.region(i)->set_partial_obj_addr(addr); + } + + // Last region; bypass cache + const size_t end_offset = data.region_offset(addr + live_words - 1); + data.region(end_region_id)->set_partial_obj_size(end_offset + 1); + data.region(end_region_id)->set_partial_obj_addr(addr); +} + +inline void ParCompactionManager::MarkingStatsCache::evict(size_t index) { + ParallelCompactData& data = PSParallelCompact::summary_data(); + // flush to global data + data.region(entries[index].region_id)->add_live_obj(entries[index].live_words); +} + +inline void ParCompactionManager::MarkingStatsCache::evict_all() { + for (size_t i = 0; i < num_entries; ++i) { + if (entries[i].live_words != 0) { + evict(i); + entries[i].live_words = 0; + } + } +} + +inline void ParCompactionManager::create_marking_stats_cache() { + assert(_marking_stats_cache == nullptr, "precondition"); + _marking_stats_cache = new MarkingStatsCache(); +} + +inline void ParCompactionManager::flush_and_destroy_marking_stats_cache() { + _marking_stats_cache->evict_all(); + delete _marking_stats_cache; + _marking_stats_cache = nullptr; +} + #endif // SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_INLINE_HPP diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp index d7bfe5242a9..9df0d7fb4d7 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp @@ -1969,6 +1969,7 @@ public: virtual void work(uint worker_id) { ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id); + cm->create_marking_stats_cache(); PCMarkAndPushClosure mark_and_push_closure(cm); { @@ -2017,6 +2018,13 @@ public: } }; +static void flush_marking_stats_cache(const uint num_workers) { + for (uint i = 0; i < num_workers; ++i) { + ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(i); + cm->flush_and_destroy_marking_stats_cache(); + } +} + void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) { // Recursively traverse all live objects and mark them GCTraceTime(Info, gc, phases) tm("Marking Phase", &_gc_timer); @@ -2046,6 +2054,12 @@ void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) { pt.print_all_references(); } + { + GCTraceTime(Debug, gc, phases) tm("Flush Marking Stats", &_gc_timer); + + flush_marking_stats_cache(active_gc_threads); + } + // This is the point where the entire marking should have completed. ParCompactionManager::verify_all_marking_stack_empty(); diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp index ff6052d5070..bec95c0e13c 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp @@ -100,7 +100,6 @@ inline void PSParallelCompact::check_new_location(HeapWord* old_addr, HeapWord* inline bool PSParallelCompact::mark_obj(oop obj) { const size_t obj_size = obj->size(); if (mark_bitmap()->mark_obj(obj, obj_size)) { - _summary_data.add_obj(obj, obj_size); ContinuationGCSupport::transform_stack_chunk(obj); return true; } else {