diff --git a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp index deffcc019c9..6452f03b5cb 100644 --- a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp +++ b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp @@ -37,9 +37,6 @@ bool ParMarkBitMap::initialize(MemRegion covered_region) { const idx_t bits = bits_required(covered_region); - // The bits will be divided evenly between two bitmaps; each of them should be - // an integral number of words. - assert(is_aligned(bits, (BitsPerWord * 2)), "region size unaligned"); const size_t words = bits / BitsPerWord; const size_t raw_bytes = words * sizeof(idx_t); @@ -61,8 +58,7 @@ ParMarkBitMap::initialize(MemRegion covered_region) _region_start = covered_region.start(); _region_size = covered_region.word_size(); BitMap::bm_word_t* map = (BitMap::bm_word_t*)_virtual_space->reserved_low_addr(); - _beg_bits = BitMapView(map, bits / 2); - _end_bits = BitMapView(map + words / 2, bits / 2); + _beg_bits = BitMapView(map, bits); return true; } @@ -77,176 +73,6 @@ ParMarkBitMap::initialize(MemRegion covered_region) return false; } -bool -ParMarkBitMap::mark_obj(HeapWord* addr, size_t size) -{ - const idx_t beg_bit = addr_to_bit(addr); - if (_beg_bits.par_set_bit(beg_bit)) { - const idx_t end_bit = addr_to_bit(addr + size - 1); - bool end_bit_ok = _end_bits.par_set_bit(end_bit); - assert(end_bit_ok, "concurrency problem"); - return true; - } - return false; -} - -inline bool -ParMarkBitMap::is_live_words_in_range_in_cache(ParCompactionManager* cm, HeapWord* beg_addr) const { - return cm->last_query_begin() == beg_addr; -} - -inline void -ParMarkBitMap::update_live_words_in_range_cache(ParCompactionManager* cm, HeapWord* beg_addr, oop end_obj, size_t result) const { - cm->set_last_query_begin(beg_addr); - cm->set_last_query_object(end_obj); - cm->set_last_query_return(result); -} - -size_t -ParMarkBitMap::live_words_in_range_helper(HeapWord* beg_addr, oop end_obj) const -{ - assert(beg_addr <= cast_from_oop(end_obj), "bad range"); - assert(is_marked(end_obj), "end_obj must be live"); - - idx_t live_bits = 0; - - // The bitmap routines require the right boundary to be word-aligned. - const idx_t end_bit = addr_to_bit(cast_from_oop(end_obj)); - const idx_t range_end = align_range_end(end_bit); - - idx_t beg_bit = find_obj_beg(addr_to_bit(beg_addr), range_end); - while (beg_bit < end_bit) { - idx_t tmp_end = find_obj_end(beg_bit, range_end); - assert(tmp_end < end_bit, "missing end bit"); - live_bits += tmp_end - beg_bit + 1; - beg_bit = find_obj_beg(tmp_end + 1, range_end); - } - return bits_to_words(live_bits); -} - -size_t -ParMarkBitMap::live_words_in_range_use_cache(ParCompactionManager* cm, HeapWord* beg_addr, oop end_oop) const -{ - HeapWord* last_beg = cm->last_query_begin(); - HeapWord* last_obj = cast_from_oop(cm->last_query_object()); - HeapWord* end_obj = cast_from_oop(end_oop); - - size_t last_ret = cm->last_query_return(); - if (end_obj > last_obj) { - last_ret = last_ret + live_words_in_range_helper(last_obj, end_oop); - last_obj = end_obj; - } else if (end_obj < last_obj) { - // The cached value is for an object that is to the left (lower address) of the current - // end_obj. Calculate back from that cached value. - if (pointer_delta(end_obj, beg_addr) > pointer_delta(last_obj, end_obj)) { - last_ret = last_ret - live_words_in_range_helper(end_obj, cast_to_oop(last_obj)); - } else { - last_ret = live_words_in_range_helper(beg_addr, end_oop); - } - last_obj = end_obj; - } - - update_live_words_in_range_cache(cm, last_beg, cast_to_oop(last_obj), last_ret); - return last_ret; -} - -size_t -ParMarkBitMap::live_words_in_range(ParCompactionManager* cm, HeapWord* beg_addr, oop end_obj) const -{ - // Try to reuse result from ParCompactionManager cache first. - if (is_live_words_in_range_in_cache(cm, beg_addr)) { - return live_words_in_range_use_cache(cm, beg_addr, end_obj); - } - size_t ret = live_words_in_range_helper(beg_addr, end_obj); - update_live_words_in_range_cache(cm, beg_addr, end_obj, ret); - return ret; -} - -ParMarkBitMap::IterationStatus -ParMarkBitMap::iterate(ParMarkBitMapClosure* live_closure, - idx_t range_beg, idx_t range_end) const -{ - DEBUG_ONLY(verify_bit(range_beg);) - DEBUG_ONLY(verify_bit(range_end);) - assert(range_beg <= range_end, "live range invalid"); - - // The bitmap routines require the right boundary to be word-aligned. - const idx_t search_end = align_range_end(range_end); - - idx_t cur_beg = range_beg; - while (true) { - cur_beg = find_obj_beg(cur_beg, search_end); - if (cur_beg >= range_end) { - break; - } - - const size_t size = obj_size(cur_beg); - IterationStatus status = live_closure->do_addr(bit_to_addr(cur_beg), size); - if (status != incomplete) { - assert(status == would_overflow || status == full, "sanity"); - return status; - } - - cur_beg += words_to_bits(size); - if (cur_beg >= range_end) { - break; - } - } - - return complete; -} - -ParMarkBitMap::IterationStatus -ParMarkBitMap::iterate(ParMarkBitMapClosure* live_closure, - ParMarkBitMapClosure* dead_closure, - idx_t range_beg, idx_t range_end, - idx_t dead_range_end) const -{ - DEBUG_ONLY(verify_bit(range_beg);) - DEBUG_ONLY(verify_bit(range_end);) - DEBUG_ONLY(verify_bit(dead_range_end);) - assert(range_beg <= range_end, "live range invalid"); - assert(range_end <= dead_range_end, "dead range invalid"); - - // The bitmap routines require the right boundary to be word-aligned. - const idx_t dead_search_end = align_range_end(dead_range_end); - - idx_t cur_beg = range_beg; - if (range_beg < range_end && is_unmarked(range_beg)) { - // The range starts with dead space. Look for the next object, then fill. - // This must be the beginning of old/eden/from/to-space, so it's must be - // large enough for a filler. - cur_beg = find_obj_beg(range_beg + 1, dead_search_end); - const idx_t dead_space_end = cur_beg - 1; - const size_t size = obj_size(range_beg, dead_space_end); - dead_closure->do_addr(bit_to_addr(range_beg), size); - } - - while (cur_beg < range_end) { - const size_t size = obj_size(cur_beg); - IterationStatus status = live_closure->do_addr(bit_to_addr(cur_beg), size); - if (status != incomplete) { - assert(status == would_overflow || status == full, "sanity"); - return status; - } - - const idx_t dead_space_beg = cur_beg + words_to_bits(size); - if (dead_space_beg >= dead_search_end) { - break; - } - // Look for the start of the next object. - cur_beg = find_obj_beg(dead_space_beg, dead_search_end); - if (cur_beg > dead_space_beg) { - // Found dead space; compute the size and invoke the dead closure. - const idx_t dead_space_end = cur_beg - 1; - dead_closure->do_addr(bit_to_addr(dead_space_beg), - obj_size(dead_space_beg, dead_space_end)); - } - } - - return complete; -} - #ifdef ASSERT void ParMarkBitMap::verify_clear() const { diff --git a/src/hotspot/share/gc/parallel/parMarkBitMap.hpp b/src/hotspot/share/gc/parallel/parMarkBitMap.hpp index ad0e51fdcdf..1975f1e3221 100644 --- a/src/hotspot/share/gc/parallel/parMarkBitMap.hpp +++ b/src/hotspot/share/gc/parallel/parMarkBitMap.hpp @@ -39,18 +39,14 @@ public: typedef BitMap::idx_t idx_t; // Values returned by the iterate() methods. - enum IterationStatus { incomplete, complete, full, would_overflow }; + enum IterationStatus { incomplete, complete, full }; inline ParMarkBitMap(); bool initialize(MemRegion covered_region); // Atomically mark an object as live. - bool mark_obj(HeapWord* addr, size_t size); - inline bool mark_obj(oop obj, size_t size); - - // Return whether the specified begin or end bit is set. - inline bool is_obj_beg(idx_t bit) const; - inline bool is_obj_end(idx_t bit) const; + inline bool mark_obj(HeapWord* addr); + inline bool mark_obj(oop obj); // Traditional interface for testing whether an object is marked or not (these // test only the begin bits). @@ -68,61 +64,6 @@ public: inline static size_t bits_to_words(idx_t bits); inline static idx_t words_to_bits(size_t words); - // Return the size in words of an object given a begin bit and an end bit, or - // the equivalent beg_addr and end_addr. - inline size_t obj_size(idx_t beg_bit, idx_t end_bit) const; - inline size_t obj_size(HeapWord* beg_addr, HeapWord* end_addr) const; - - // Return the size in words of the object (a search is done for the end bit). - inline size_t obj_size(idx_t beg_bit) const; - inline size_t obj_size(HeapWord* addr) const; - - // Apply live_closure to each live object that lies completely within the - // range [live_range_beg, live_range_end). This is used to iterate over the - // compacted region of the heap. Return values: - // - // complete The iteration is complete. All objects in the range - // were processed and the closure is not full; - // closure->source() is set one past the end of the range. - // - // full The closure is full; closure->source() is set to one - // past the end of the last object processed. - // - // would_overflow The next object in the range would overflow the closure; - // closure->source() is set to the start of that object. - IterationStatus iterate(ParMarkBitMapClosure* live_closure, - idx_t range_beg, idx_t range_end) const; - inline IterationStatus iterate(ParMarkBitMapClosure* live_closure, - HeapWord* range_beg, - HeapWord* range_end) const; - - // Apply live closure as above and additionally apply dead_closure to all dead - // space in the range [range_beg, dead_range_end). Note that dead_range_end - // must be >= range_end. This is used to iterate over the dense prefix. - // - // This method assumes that if the first bit in the range (range_beg) is not - // marked, then dead space begins at that point and the dead_closure is - // applied. Thus callers must ensure that range_beg is not in the middle of a - // live object. - IterationStatus iterate(ParMarkBitMapClosure* live_closure, - ParMarkBitMapClosure* dead_closure, - idx_t range_beg, idx_t range_end, - idx_t dead_range_end) const; - inline IterationStatus iterate(ParMarkBitMapClosure* live_closure, - ParMarkBitMapClosure* dead_closure, - HeapWord* range_beg, - HeapWord* range_end, - HeapWord* dead_range_end) const; - - // Return the number of live words in the range [beg_addr, end_obj) due to - // objects that start in the range. If a live object extends onto the range, - // the caller must detect and account for any live words due to that object. - // If a live object extends beyond the end of the range, only the words within - // the range are included in the result. The end of the range must be a live object, - // which is the case when updating pointers. This allows a branch to be removed - // from inside the loop. - size_t live_words_in_range(ParCompactionManager* cm, HeapWord* beg_addr, oop end_obj) const; - inline HeapWord* region_start() const; inline HeapWord* region_end() const; inline size_t region_size() const; @@ -141,11 +82,12 @@ public: // respectively) in the range [beg, end). If no object is found, return end. // end must be word-aligned. inline idx_t find_obj_beg(idx_t beg, idx_t end) const; - inline idx_t find_obj_end(idx_t beg, idx_t end) const; inline HeapWord* find_obj_beg(HeapWord* beg, HeapWord* end) const; - inline HeapWord* find_obj_end(HeapWord* beg, HeapWord* end) const; + // Return the address of the last obj-start in the range [beg, end). If no + // object is found, return end. + inline HeapWord* find_obj_beg_reverse(HeapWord* beg, HeapWord* end) const; // Clear a range of bits or the entire bitmap (both begin and end bits are // cleared). inline void clear_range(idx_t beg, idx_t end); @@ -158,7 +100,6 @@ public: void print_on_error(outputStream* st) const { st->print_cr("Marking Bits: (ParMarkBitMap*) " PTR_FORMAT, p2i(this)); _beg_bits.print_on_error(st, " Begin Bits: "); - _end_bits.print_on_error(st, " End Bits: "); } #ifdef ASSERT @@ -168,11 +109,6 @@ public: #endif // #ifdef ASSERT private: - size_t live_words_in_range_helper(HeapWord* beg_addr, oop end_obj) const; - - bool is_live_words_in_range_in_cache(ParCompactionManager* cm, HeapWord* beg_addr) const; - size_t live_words_in_range_use_cache(ParCompactionManager* cm, HeapWord* beg_addr, oop end_obj) const; - void update_live_words_in_range_cache(ParCompactionManager* cm, HeapWord* beg_addr, oop end_obj, size_t result) const; // Each bit in the bitmap represents one unit of 'object granularity.' Objects // are double-word aligned in 32-bit VMs, but not in 64-bit VMs, so the 32-bit @@ -183,7 +119,6 @@ private: HeapWord* _region_start; size_t _region_size; BitMapView _beg_bits; - BitMapView _end_bits; PSVirtualSpace* _virtual_space; size_t _reserved_byte_size; }; diff --git a/src/hotspot/share/gc/parallel/parMarkBitMap.inline.hpp b/src/hotspot/share/gc/parallel/parMarkBitMap.inline.hpp index f1dabaf3580..98c2fcc1981 100644 --- a/src/hotspot/share/gc/parallel/parMarkBitMap.inline.hpp +++ b/src/hotspot/share/gc/parallel/parMarkBitMap.inline.hpp @@ -31,18 +31,15 @@ #include "utilities/bitMap.inline.hpp" inline ParMarkBitMap::ParMarkBitMap(): - _region_start(nullptr), _region_size(0), _beg_bits(), _end_bits(), _virtual_space(nullptr), _reserved_byte_size(0) + _region_start(nullptr), _region_size(0), _beg_bits(), _virtual_space(nullptr), _reserved_byte_size(0) { } inline void ParMarkBitMap::clear_range(idx_t beg, idx_t end) { _beg_bits.clear_range(beg, end); - _end_bits.clear_range(beg, end); } inline ParMarkBitMap::idx_t ParMarkBitMap::bits_required(size_t words) { - // Need two bits (one begin bit, one end bit) for each unit of 'object - // granularity' in the heap. - return words_to_bits(words * 2); + return words_to_bits(words); } inline ParMarkBitMap::idx_t ParMarkBitMap::bits_required(MemRegion covered_region) { @@ -65,16 +62,8 @@ inline size_t ParMarkBitMap::size() const { return _beg_bits.size(); } -inline bool ParMarkBitMap::is_obj_beg(idx_t bit) const { - return _beg_bits.at(bit); -} - -inline bool ParMarkBitMap::is_obj_end(idx_t bit) const { - return _end_bits.at(bit); -} - inline bool ParMarkBitMap::is_marked(idx_t bit) const { - return is_obj_beg(bit); + return _beg_bits.at(bit); } inline bool ParMarkBitMap::is_marked(HeapWord* addr) const { @@ -105,47 +94,12 @@ inline ParMarkBitMap::idx_t ParMarkBitMap::words_to_bits(size_t words) { return words >> obj_granularity_shift(); } -inline size_t ParMarkBitMap::obj_size(idx_t beg_bit, idx_t end_bit) const { - DEBUG_ONLY(verify_bit(beg_bit);) - DEBUG_ONLY(verify_bit(end_bit);) - return bits_to_words(end_bit - beg_bit + 1); +inline bool ParMarkBitMap::mark_obj(HeapWord* addr) { + return _beg_bits.par_set_bit(addr_to_bit(addr)); } -inline size_t ParMarkBitMap::obj_size(HeapWord* beg_addr, HeapWord* end_addr) const { - DEBUG_ONLY(verify_addr(beg_addr);) - DEBUG_ONLY(verify_addr(end_addr);) - return pointer_delta(end_addr, beg_addr) + obj_granularity(); -} - -inline size_t ParMarkBitMap::obj_size(idx_t beg_bit) const { - const idx_t end_bit = _end_bits.find_first_set_bit(beg_bit, size()); - assert(is_marked(beg_bit), "obj not marked"); - assert(end_bit < size(), "end bit missing"); - return obj_size(beg_bit, end_bit); -} - -inline size_t ParMarkBitMap::obj_size(HeapWord* addr) const { - return obj_size(addr_to_bit(addr)); -} - -inline ParMarkBitMap::IterationStatus ParMarkBitMap::iterate(ParMarkBitMapClosure* live_closure, - HeapWord* range_beg, - HeapWord* range_end) const { - return iterate(live_closure, addr_to_bit(range_beg), addr_to_bit(range_end)); -} - -inline ParMarkBitMap::IterationStatus ParMarkBitMap::iterate(ParMarkBitMapClosure* live_closure, - ParMarkBitMapClosure* dead_closure, - HeapWord* range_beg, - HeapWord* range_end, - HeapWord* dead_range_end) const { - return iterate(live_closure, dead_closure, - addr_to_bit(range_beg), addr_to_bit(range_end), - addr_to_bit(dead_range_end)); -} - -inline bool ParMarkBitMap::mark_obj(oop obj, size_t size) { - return mark_obj(cast_from_oop(obj), size); +inline bool ParMarkBitMap::mark_obj(oop obj) { + return mark_obj(cast_from_oop(obj)); } inline ParMarkBitMap::idx_t ParMarkBitMap::addr_to_bit(HeapWord* addr) const { @@ -168,10 +122,6 @@ inline ParMarkBitMap::idx_t ParMarkBitMap::find_obj_beg(idx_t beg, idx_t end) co return _beg_bits.find_first_set_bit_aligned_right(beg, end); } -inline ParMarkBitMap::idx_t ParMarkBitMap::find_obj_end(idx_t beg, idx_t end) const { - return _end_bits.find_first_set_bit_aligned_right(beg, end); -} - inline HeapWord* ParMarkBitMap::find_obj_beg(HeapWord* beg, HeapWord* end) const { const idx_t beg_bit = addr_to_bit(beg); const idx_t end_bit = addr_to_bit(end); @@ -180,11 +130,10 @@ inline HeapWord* ParMarkBitMap::find_obj_beg(HeapWord* beg, HeapWord* end) const return bit_to_addr(res_bit); } -inline HeapWord* ParMarkBitMap::find_obj_end(HeapWord* beg, HeapWord* end) const { +inline HeapWord* ParMarkBitMap::find_obj_beg_reverse(HeapWord* beg, HeapWord* end) const { const idx_t beg_bit = addr_to_bit(beg); const idx_t end_bit = addr_to_bit(end); - const idx_t search_end = align_range_end(end_bit); - const idx_t res_bit = MIN2(find_obj_end(beg_bit, search_end), end_bit); + const idx_t res_bit = _beg_bits.find_last_set_bit_aligned_left(beg_bit, end_bit); return bit_to_addr(res_bit); } diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.cpp b/src/hotspot/share/gc/parallel/psCompactionManager.cpp index fe9bcbff703..b95c7c619af 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.cpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.cpp @@ -29,6 +29,7 @@ #include "gc/parallel/psCompactionManager.inline.hpp" #include "gc/parallel/psOldGen.hpp" #include "gc/parallel/psParallelCompact.inline.hpp" +#include "gc/shared/preservedMarks.inline.hpp" #include "gc/shared/taskqueue.inline.hpp" #include "logging/log.hpp" #include "memory/iterator.inline.hpp" @@ -51,16 +52,16 @@ ParMarkBitMap* ParCompactionManager::_mark_bitmap = nullptr; GrowableArray* ParCompactionManager::_shadow_region_array = nullptr; Monitor* ParCompactionManager::_shadow_region_monitor = nullptr; -ParCompactionManager::ParCompactionManager() { +PreservedMarksSet* ParCompactionManager::_preserved_marks_set = nullptr; + +ParCompactionManager::ParCompactionManager(PreservedMarks* preserved_marks) { ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); _old_gen = heap->old_gen(); _start_array = old_gen()->start_array(); - reset_bitmap_query_cache(); - - _deferred_obj_array = new (mtGC) GrowableArray(10, mtGC); + _preserved_marks = preserved_marks; _marking_stats_cache = nullptr; } @@ -79,9 +80,12 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) { _objarray_task_queues = new ObjArrayTaskQueueSet(parallel_gc_threads); _region_task_queues = new RegionTaskQueueSet(parallel_gc_threads); + _preserved_marks_set = new PreservedMarksSet(true); + _preserved_marks_set->init(parallel_gc_threads); + // Create and register the ParCompactionManager(s) for the worker threads. for(uint i=0; iget(i)); oop_task_queues()->register_queue(i, _manager_array[i]->oop_stack()); _objarray_task_queues->register_queue(i, &_manager_array[i]->_objarray_stack); region_task_queues()->register_queue(i, _manager_array[i]->region_stack()); @@ -93,13 +97,7 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) { _shadow_region_array = new (mtGC) GrowableArray(10, mtGC); _shadow_region_monitor = new Monitor(Mutex::nosafepoint, "CompactionManager_lock"); -} -void ParCompactionManager::reset_all_bitmap_query_caches() { - uint parallel_gc_threads = ParallelScavengeHeap::heap()->workers().max_workers(); - for (uint i=0; ireset_bitmap_query_cache(); - } } void ParCompactionManager::flush_all_string_dedup_requests() { @@ -168,15 +166,6 @@ void ParCompactionManager::drain_region_stacks() { } while (!region_stack()->is_empty()); } -void ParCompactionManager::drain_deferred_objects() { - while (!_deferred_obj_array->is_empty()) { - HeapWord* addr = _deferred_obj_array->pop(); - assert(addr != nullptr, "expected a deferred object"); - PSParallelCompact::update_deferred_object(this, addr); - } - _deferred_obj_array->clear_and_deallocate(); -} - size_t ParCompactionManager::pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr) { MonitorLocker ml(_shadow_region_monitor, Mutex::_no_safepoint_check_flag); while (true) { @@ -207,10 +196,6 @@ void ParCompactionManager::remove_all_shadow_regions() { _shadow_region_array->clear(); } -void ParCompactionManager::push_deferred_object(HeapWord* addr) { - _deferred_obj_array->push(addr); -} - #ifdef ASSERT void ParCompactionManager::verify_all_marking_stack_empty() { uint parallel_gc_threads = ParallelGCThreads; diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.hpp index b33ad06ee3e..0dd68d2e2f7 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp @@ -26,6 +26,7 @@ #define SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_HPP #include "gc/parallel/psParallelCompact.hpp" +#include "gc/shared/preservedMarks.hpp" #include "gc/shared/stringdedup/stringDedup.hpp" #include "gc/shared/taskqueue.hpp" #include "gc/shared/taskTerminator.hpp" @@ -45,7 +46,7 @@ class ParCompactionManager : public CHeapObj { friend class ParallelScavengeRefProcProxyTask; friend class ParMarkBitMap; friend class PSParallelCompact; - friend class UpdateDensePrefixAndCompactionTask; + friend class FillDensePrefixAndCompactionTask; private: typedef OverflowTaskQueue OopTaskQueue; @@ -75,7 +76,8 @@ class ParCompactionManager : public CHeapObj { // type of TaskQueue. RegionTaskQueue _region_stack; - GrowableArray* _deferred_obj_array; + static PreservedMarksSet* _preserved_marks_set; + PreservedMarks* _preserved_marks; static ParMarkBitMap* _mark_bitmap; @@ -87,10 +89,6 @@ class ParCompactionManager : public CHeapObj { // See pop/push_shadow_region_mt_safe() below static Monitor* _shadow_region_monitor; - HeapWord* _last_query_beg; - oop _last_query_obj; - size_t _last_query_ret; - StringDedup::Requests _string_dedup_requests; static PSOldGen* old_gen() { return _old_gen; } @@ -106,7 +104,7 @@ class ParCompactionManager : public CHeapObj { // objArray stack, otherwise returns false and the task is invalid. bool publish_or_pop_objarray_tasks(ObjArrayTask& task); - ParCompactionManager(); + ParCompactionManager(PreservedMarks* preserved_marks); // Array of task queues. Needed by the task terminator. static RegionTaskQueueSet* region_task_queues() { return _region_task_queues; } OopTaskQueue* oop_stack() { return &_oop_stack; } @@ -153,29 +151,10 @@ public: return next_shadow_region(); } - void push_deferred_object(HeapWord* addr); - - void reset_bitmap_query_cache() { - _last_query_beg = nullptr; - _last_query_obj = nullptr; - _last_query_ret = 0; - } - void flush_string_dedup_requests() { _string_dedup_requests.flush(); } - // Bitmap query support, cache last query and result - HeapWord* last_query_begin() { return _last_query_beg; } - oop last_query_object() { return _last_query_obj; } - size_t last_query_return() { return _last_query_ret; } - - void set_last_query_begin(HeapWord *new_beg) { _last_query_beg = new_beg; } - void set_last_query_object(oop new_obj) { _last_query_obj = new_obj; } - void set_last_query_return(size_t new_ret) { _last_query_ret = new_ret; } - - static void reset_all_bitmap_query_caches(); - static void flush_all_string_dedup_requests(); RegionTaskQueue* region_stack() { return &_region_stack; } @@ -184,6 +163,9 @@ public: // Simply use the first compaction manager here. static ParCompactionManager* get_vmthread_cm() { return _manager_array[0]; } + PreservedMarks* preserved_marks() const { + return _preserved_marks; + } ParMarkBitMap* mark_bitmap() { return _mark_bitmap; } @@ -208,13 +190,10 @@ public: // Process tasks remaining on any stack void drain_region_stacks(); - void drain_deferred_objects(); void follow_contents(oop obj); void follow_array(objArrayOop array, int index); - void update_contents(oop obj); - class FollowStackClosure: public VoidClosure { private: ParCompactionManager* _compaction_manager; diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp index ae2e449e0b6..64b0da58fda 100644 --- a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp +++ b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp @@ -157,13 +157,6 @@ inline void ParCompactionManager::follow_array(objArrayOop obj, int index) { } } -inline void ParCompactionManager::update_contents(oop obj) { - if (!obj->klass()->is_typeArray_klass()) { - PCAdjustPointerClosure apc(this); - obj->oop_iterate(&apc); - } -} - inline void ParCompactionManager::follow_contents(oop obj) { assert(PSParallelCompact::mark_bitmap()->is_marked(obj), "should be marked"); PCIterateMarkAndPushClosure cl(this, PSParallelCompact::ref_processor()); diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp index 994bdc847cf..e72fa42e6f6 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp @@ -30,6 +30,7 @@ #include "classfile/systemDictionary.hpp" #include "code/codeCache.hpp" #include "compiler/oopMap.hpp" +#include "gc/parallel/objectStartArray.inline.hpp" #include "gc/parallel/parallelArguments.hpp" #include "gc/parallel/parallelScavengeHeap.inline.hpp" #include "gc/parallel/parMarkBitMap.inline.hpp" @@ -54,6 +55,7 @@ #include "gc/shared/oopStorage.inline.hpp" #include "gc/shared/oopStorageSet.inline.hpp" #include "gc/shared/oopStorageSetParState.inline.hpp" +#include "gc/shared/preservedMarks.inline.hpp" #include "gc/shared/referencePolicy.hpp" #include "gc/shared/referenceProcessor.hpp" #include "gc/shared/referenceProcessorPhaseTimes.hpp" @@ -98,24 +100,13 @@ // All sizes are in HeapWords. const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; +static_assert(ParallelCompactData::RegionSize >= BitsPerWord, "region-start bit word-aligned"); const size_t ParallelCompactData::RegionSizeBytes = RegionSize << LogHeapWordSize; const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; -const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words -const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; -const size_t ParallelCompactData::BlockSizeBytes = - BlockSize << LogHeapWordSize; -const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1; -const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1; -const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask; - -const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize; -const size_t ParallelCompactData::Log2BlocksPerRegion = - Log2RegionSize - Log2BlockSize; - const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_shift = 27; @@ -412,10 +403,7 @@ ParallelCompactData::ParallelCompactData() : _region_vspace(nullptr), _reserved_byte_size(0), _region_data(nullptr), - _region_count(0), - _block_vspace(nullptr), - _block_data(nullptr), - _block_count(0) {} + _region_count(0) {} bool ParallelCompactData::initialize(MemRegion reserved_heap) { @@ -426,8 +414,7 @@ bool ParallelCompactData::initialize(MemRegion reserved_heap) assert(region_align_down(_heap_start) == _heap_start, "region start not aligned"); - bool result = initialize_region_data(heap_size) && initialize_block_data(); - return result; + return initialize_region_data(heap_size); } PSVirtualSpace* @@ -473,44 +460,12 @@ bool ParallelCompactData::initialize_region_data(size_t heap_size) return false; } -bool ParallelCompactData::initialize_block_data() -{ - assert(_region_count != 0, "region data must be initialized first"); - const size_t count = _region_count << Log2BlocksPerRegion; - _block_vspace = create_vspace(count, sizeof(BlockData)); - if (_block_vspace != 0) { - _block_data = (BlockData*)_block_vspace->reserved_low_addr(); - _block_count = count; - return true; - } - return false; -} - void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { assert(beg_region <= _region_count, "beg_region out of range"); assert(end_region <= _region_count, "end_region out of range"); - assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize"); const size_t region_cnt = end_region - beg_region; memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); - - const size_t beg_block = beg_region * BlocksPerRegion; - const size_t block_cnt = region_cnt * BlocksPerRegion; - memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); -} - -HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const -{ - const RegionData* cur_cp = region(region_idx); - const RegionData* const end_cp = region(region_count() - 1); - - HeapWord* result = region_to_addr(region_idx); - if (cur_cp < end_cp) { - do { - result += cur_cp->partial_obj_size(); - } while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp); - } - return result; } void @@ -761,49 +716,6 @@ bool ParallelCompactData::summarize(SplitInfo& split_info, return true; } -HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr, ParCompactionManager* cm) const { - assert(addr != nullptr, "Should detect null oop earlier"); - assert(ParallelScavengeHeap::heap()->is_in(addr), "not in heap"); - assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked"); - - // Region covering the object. - RegionData* const region_ptr = addr_to_region_ptr(addr); - HeapWord* result = region_ptr->destination(); - - // If the entire Region is live, the new location is region->destination + the - // offset of the object within in the Region. - - // Run some performance tests to determine if this special case pays off. It - // is worth it for pointers into the dense prefix. If the optimization to - // avoid pointer updates in regions that only point to the dense prefix is - // ever implemented, this should be revisited. - if (region_ptr->data_size() == RegionSize) { - result += region_offset(addr); - return result; - } - - // Otherwise, the new location is region->destination + block offset + the - // number of live words in the Block that are (a) to the left of addr and (b) - // due to objects that start in the Block. - - // Fill in the block table if necessary. This is unsynchronized, so multiple - // threads may fill the block table for a region (harmless, since it is - // idempotent). - if (!region_ptr->blocks_filled()) { - PSParallelCompact::fill_blocks(addr_to_region_idx(addr)); - region_ptr->set_blocks_filled(); - } - - HeapWord* const search_start = block_align_down(addr); - const size_t block_offset = addr_to_block_ptr(addr)->offset(); - - const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); - const size_t live = bitmap->live_words_in_range(cm, search_start, cast_to_oop(addr)); - result += block_offset + live; - DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result)); - return result; -} - #ifdef ASSERT void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace) { @@ -817,7 +729,6 @@ void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace) void ParallelCompactData::verify_clear() { verify_clear(_region_vspace); - verify_clear(_block_vspace); } #endif // #ifdef ASSERT @@ -831,6 +742,19 @@ ParallelCompactData PSParallelCompact::_summary_data; PSParallelCompact::IsAliveClosure PSParallelCompact::_is_alive_closure; +class PCAdjustPointerClosure: public BasicOopIterateClosure { + template + void do_oop_work(T* p) { PSParallelCompact::adjust_pointer(p); } + +public: + virtual void do_oop(oop* p) { do_oop_work(p); } + virtual void do_oop(narrowOop* p) { do_oop_work(p); } + + virtual ReferenceIterationMode reference_iteration_mode() { return DO_FIELDS; } +}; + +static PCAdjustPointerClosure pc_adjust_pointer_closure; + bool PSParallelCompact::IsAliveClosure::do_object_b(oop p) { return mark_bitmap()->is_marked(p); } void PSParallelCompact::post_initialize() { @@ -947,8 +871,6 @@ void PSParallelCompact::pre_compact() DEBUG_ONLY(mark_bitmap()->verify_clear();) DEBUG_ONLY(summary_data().verify_clear();) - - ParCompactionManager::reset_all_bitmap_query_caches(); } void PSParallelCompact::post_compact() @@ -1082,18 +1004,20 @@ void PSParallelCompact::fill_dense_prefix_end(SpaceId id) { idx_t const dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end); if (region_after_dense_prefix->partial_obj_size() != 0 || - _mark_bitmap.is_obj_beg(dense_prefix_bit)) { + _mark_bitmap.is_marked(dense_prefix_bit)) { // The region after the dense prefix starts with live bytes. return; } - if (_mark_bitmap.is_obj_end(dense_prefix_bit - 2)) { + HeapWord* block_start = start_array(id)->block_start_reaching_into_card(dense_prefix_end); + if (block_start == dense_prefix_end - 1) { + assert(!_mark_bitmap.is_marked(block_start), "inv"); // There is exactly one heap word gap right before the dense prefix end, so we need a filler object. - // The filler object will extend into the region after the last dense prefix region. + // The filler object will extend into region_after_dense_prefix. const size_t obj_len = 2; // min-fill-size HeapWord* const obj_beg = dense_prefix_end - 1; CollectedHeap::fill_with_object(obj_beg, obj_len); - _mark_bitmap.mark_obj(obj_beg, obj_len); + _mark_bitmap.mark_obj(obj_beg); _summary_data.addr_to_region_ptr(obj_beg)->add_live_obj(1); region_after_dense_prefix->set_partial_obj_size(1); region_after_dense_prefix->set_partial_obj_addr(obj_beg); @@ -1359,12 +1283,14 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { DerivedPointerTable::set_active(false); #endif - // adjust_roots() updates Universe::_intArrayKlass which is - // needed by the compaction for filling holes in the dense prefix. - adjust_roots(); + forward_to_new_addr(); + + adjust_pointers(); compact(); + ParCompactionManager::_preserved_marks_set->restore(&ParallelScavengeHeap::heap()->workers()); + ParCompactionManager::verify_all_region_stack_empty(); // Reset the mark bitmap, summary data, and do other bookkeeping. Must be @@ -1685,11 +1611,90 @@ void PSParallelCompact::marking_phase(ParallelOldTracer *gc_tracer) { #endif } +template +void PSParallelCompact::adjust_in_space_helper(SpaceId id, volatile uint* claim_counter, Func&& on_stripe) { + MutableSpace* sp = PSParallelCompact::space(id); + HeapWord* const bottom = sp->bottom(); + HeapWord* const top = sp->top(); + if (bottom == top) { + return; + } + + const uint num_regions_per_stripe = 2; + const size_t region_size = ParallelCompactData::RegionSize; + const size_t stripe_size = num_regions_per_stripe * region_size; + + while (true) { + uint counter = Atomic::fetch_then_add(claim_counter, num_regions_per_stripe); + HeapWord* cur_stripe = bottom + counter * region_size; + if (cur_stripe >= top) { + break; + } + HeapWord* stripe_end = MIN2(cur_stripe + stripe_size, top); + on_stripe(cur_stripe, stripe_end); + } +} + +void PSParallelCompact::adjust_in_old_space(volatile uint* claim_counter) { + // Regions in old-space shouldn't be split. + assert(!_space_info[old_space_id].split_info().is_valid(), "inv"); + + auto scan_obj_with_limit = [&] (HeapWord* obj_start, HeapWord* left, HeapWord* right) { + assert(mark_bitmap()->is_marked(obj_start), "inv"); + oop obj = cast_to_oop(obj_start); + return obj->oop_iterate_size(&pc_adjust_pointer_closure, MemRegion(left, right)); + }; + + adjust_in_space_helper(old_space_id, claim_counter, [&] (HeapWord* stripe_start, HeapWord* stripe_end) { + assert(_summary_data.is_region_aligned(stripe_start), "inv"); + RegionData* cur_region = _summary_data.addr_to_region_ptr(stripe_start); + HeapWord* obj_start; + if (cur_region->partial_obj_size() != 0) { + obj_start = cur_region->partial_obj_addr(); + obj_start += scan_obj_with_limit(obj_start, stripe_start, stripe_end); + } else { + obj_start = stripe_start; + } + + while (obj_start < stripe_end) { + obj_start = mark_bitmap()->find_obj_beg(obj_start, stripe_end); + if (obj_start >= stripe_end) { + break; + } + obj_start += scan_obj_with_limit(obj_start, stripe_start, stripe_end); + } + }); +} + +void PSParallelCompact::adjust_in_young_space(SpaceId id, volatile uint* claim_counter) { + adjust_in_space_helper(id, claim_counter, [](HeapWord* stripe_start, HeapWord* stripe_end) { + HeapWord* obj_start = stripe_start; + while (obj_start < stripe_end) { + obj_start = mark_bitmap()->find_obj_beg(obj_start, stripe_end); + if (obj_start >= stripe_end) { + break; + } + oop obj = cast_to_oop(obj_start); + obj_start += obj->oop_iterate_size(&pc_adjust_pointer_closure); + } + }); +} + +void PSParallelCompact::adjust_pointers_in_spaces(uint worker_id, volatile uint* claim_counters) { + auto start_time = Ticks::now(); + adjust_in_old_space(&claim_counters[0]); + for (uint id = eden_space_id; id < last_space_id; ++id) { + adjust_in_young_space(SpaceId(id), &claim_counters[id]); + } + log_trace(gc, phases)("adjust_pointers_in_spaces worker %u: %.3f ms", worker_id, (Ticks::now() - start_time).seconds() * 1000); +} + class PSAdjustTask final : public WorkerTask { SubTasksDone _sub_tasks; WeakProcessor::Task _weak_proc_task; OopStorageSetStrongParState _oop_storage_iter; uint _nworkers; + volatile uint _claim_counters[PSParallelCompact::last_space_id] = {}; enum PSAdjustSubTask { PSAdjustSubTask_code_cache, @@ -1716,36 +1721,165 @@ public: void work(uint worker_id) { ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id); - PCAdjustPointerClosure adjust(cm); + cm->preserved_marks()->adjust_during_full_gc(); + { + // adjust pointers in all spaces + PSParallelCompact::adjust_pointers_in_spaces(worker_id, _claim_counters); + } { ResourceMark rm; - Threads::possibly_parallel_oops_do(_nworkers > 1, &adjust, nullptr); + Threads::possibly_parallel_oops_do(_nworkers > 1, &pc_adjust_pointer_closure, nullptr); } - _oop_storage_iter.oops_do(&adjust); + _oop_storage_iter.oops_do(&pc_adjust_pointer_closure); { - CLDToOopClosure cld_closure(&adjust, ClassLoaderData::_claim_stw_fullgc_adjust); + CLDToOopClosure cld_closure(&pc_adjust_pointer_closure, ClassLoaderData::_claim_stw_fullgc_adjust); ClassLoaderDataGraph::cld_do(&cld_closure); } { AlwaysTrueClosure always_alive; - _weak_proc_task.work(worker_id, &always_alive, &adjust); + _weak_proc_task.work(worker_id, &always_alive, &pc_adjust_pointer_closure); } if (_sub_tasks.try_claim_task(PSAdjustSubTask_code_cache)) { - NMethodToOopClosure adjust_code(&adjust, NMethodToOopClosure::FixRelocations); + NMethodToOopClosure adjust_code(&pc_adjust_pointer_closure, NMethodToOopClosure::FixRelocations); CodeCache::nmethods_do(&adjust_code); } _sub_tasks.all_tasks_claimed(); } }; -void PSParallelCompact::adjust_roots() { +void PSParallelCompact::adjust_pointers() { // Adjust the pointers to reflect the new locations - GCTraceTime(Info, gc, phases) tm("Adjust Roots", &_gc_timer); + GCTraceTime(Info, gc, phases) tm("Adjust Pointers", &_gc_timer); uint nworkers = ParallelScavengeHeap::heap()->workers().active_workers(); PSAdjustTask task(nworkers); ParallelScavengeHeap::heap()->workers().run_task(&task); } +// Split [start, end) evenly for a number of workers and return the +// range for worker_id. +static void split_regions_for_worker(size_t start, size_t end, + uint worker_id, uint num_workers, + size_t* worker_start, size_t* worker_end) { + assert(start < end, "precondition"); + assert(num_workers > 0, "precondition"); + assert(worker_id < num_workers, "precondition"); + + size_t num_regions = end - start; + size_t num_regions_per_worker = num_regions / num_workers; + size_t remainder = num_regions % num_workers; + // The first few workers will get one extra. + *worker_start = start + worker_id * num_regions_per_worker + + MIN2(checked_cast(worker_id), remainder); + *worker_end = *worker_start + num_regions_per_worker + + (worker_id < remainder ? 1 : 0); +} + +void PSParallelCompact::forward_to_new_addr() { + GCTraceTime(Info, gc, phases) tm("Forward", &_gc_timer); + uint nworkers = ParallelScavengeHeap::heap()->workers().active_workers(); + + struct ForwardTask final : public WorkerTask { + uint _num_workers; + + explicit ForwardTask(uint num_workers) : + WorkerTask("PSForward task"), + _num_workers(num_workers) {} + + void work(uint worker_id) override { + ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id); + for (uint id = old_space_id; id < last_space_id; ++id) { + MutableSpace* sp = PSParallelCompact::space(SpaceId(id)); + HeapWord* dense_prefix_addr = dense_prefix(SpaceId(id)); + HeapWord* top = sp->top(); + + if (dense_prefix_addr == top) { + continue; + } + + size_t dense_prefix_region = _summary_data.addr_to_region_idx(dense_prefix_addr); + size_t top_region = _summary_data.addr_to_region_idx(_summary_data.region_align_up(top)); + size_t start_region; + size_t end_region; + split_regions_for_worker(dense_prefix_region, top_region, + worker_id, _num_workers, + &start_region, &end_region); + for (size_t cur_region = start_region; cur_region < end_region; ++cur_region) { + RegionData* region_ptr = _summary_data.region(cur_region); + size_t live_words = region_ptr->partial_obj_size(); + + if (live_words == ParallelCompactData::RegionSize) { + // No obj-start + continue; + } + + HeapWord* region_start = _summary_data.region_to_addr(cur_region); + HeapWord* region_end = region_start + ParallelCompactData::RegionSize; + + HeapWord* cur_addr = region_start + live_words; + + HeapWord* destination = region_ptr->destination(); + while (cur_addr < region_end) { + cur_addr = mark_bitmap()->find_obj_beg(cur_addr, region_end); + if (cur_addr >= region_end) { + break; + } + assert(mark_bitmap()->is_marked(cur_addr), "inv"); + HeapWord* new_addr = destination + live_words; + oop obj = cast_to_oop(cur_addr); + if (new_addr != cur_addr) { + cm->preserved_marks()->push_if_necessary(obj, obj->mark()); + obj->forward_to(cast_to_oop(new_addr)); + } + size_t obj_size = obj->size(); + live_words += obj_size; + cur_addr += obj_size; + } + } + } + } + } task(nworkers); + + ParallelScavengeHeap::heap()->workers().run_task(&task); + debug_only(verify_forward();) +} + +#ifdef ASSERT +void PSParallelCompact::verify_forward() { + HeapWord* old_dense_prefix_addr = dense_prefix(SpaceId(old_space_id)); + RegionData* old_region = _summary_data.region(_summary_data.addr_to_region_idx(old_dense_prefix_addr)); + HeapWord* bump_ptr = old_region->partial_obj_size() != 0 + ? old_dense_prefix_addr + old_region->partial_obj_size() + : old_dense_prefix_addr; + SpaceId bump_ptr_space = old_space_id; + + for (uint id = old_space_id; id < last_space_id; ++id) { + MutableSpace* sp = PSParallelCompact::space(SpaceId(id)); + HeapWord* dense_prefix_addr = dense_prefix(SpaceId(id)); + HeapWord* top = sp->top(); + HeapWord* cur_addr = dense_prefix_addr; + + while (cur_addr < top) { + cur_addr = mark_bitmap()->find_obj_beg(cur_addr, top); + if (cur_addr >= top) { + break; + } + assert(mark_bitmap()->is_marked(cur_addr), "inv"); + // Move to the space containing cur_addr + if (bump_ptr == _space_info[bump_ptr_space].new_top()) { + bump_ptr = space(space_id(cur_addr))->bottom(); + bump_ptr_space = space_id(bump_ptr); + } + oop obj = cast_to_oop(cur_addr); + if (cur_addr != bump_ptr) { + assert(obj->forwardee() == cast_to_oop(bump_ptr), "inv"); + } + bump_ptr += obj->size(); + cur_addr += obj->size(); + } + } +} +#endif + // Helper class to print 8 region numbers per line and then print the total at the end. class FillableRegionLogger : public StackObj { private: @@ -1826,160 +1960,6 @@ void PSParallelCompact::prepare_region_draining_tasks(uint parallel_gc_threads) } } -class TaskQueue : StackObj { - volatile uint _counter; - uint _size; - uint _insert_index; - PSParallelCompact::UpdateDensePrefixTask* _backing_array; -public: - explicit TaskQueue(uint size) : _counter(0), _size(size), _insert_index(0), _backing_array(nullptr) { - _backing_array = NEW_C_HEAP_ARRAY(PSParallelCompact::UpdateDensePrefixTask, _size, mtGC); - } - ~TaskQueue() { - assert(_counter >= _insert_index, "not all queue elements were claimed"); - FREE_C_HEAP_ARRAY(T, _backing_array); - } - - void push(const PSParallelCompact::UpdateDensePrefixTask& value) { - assert(_insert_index < _size, "too small backing array"); - _backing_array[_insert_index++] = value; - } - - bool try_claim(PSParallelCompact::UpdateDensePrefixTask& reference) { - uint claimed = Atomic::fetch_then_add(&_counter, 1u); - if (claimed < _insert_index) { - reference = _backing_array[claimed]; - return true; - } else { - return false; - } - } -}; - -#define PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING 4 - -void PSParallelCompact::enqueue_dense_prefix_tasks(TaskQueue& task_queue, - uint parallel_gc_threads) { - GCTraceTime(Trace, gc, phases) tm("Dense Prefix Task Setup", &_gc_timer); - - ParallelCompactData& sd = PSParallelCompact::summary_data(); - - // Iterate over all the spaces adding tasks for updating - // regions in the dense prefix. Assume that 1 gc thread - // will work on opening the gaps and the remaining gc threads - // will work on the dense prefix. - unsigned int space_id; - for (space_id = old_space_id; space_id < last_space_id; ++ space_id) { - HeapWord* const dense_prefix_end = _space_info[space_id].dense_prefix(); - const MutableSpace* const space = _space_info[space_id].space(); - - if (dense_prefix_end == space->bottom()) { - // There is no dense prefix for this space. - continue; - } - - // The dense prefix is before this region. - size_t region_index_end_dense_prefix = - sd.addr_to_region_idx(dense_prefix_end); - RegionData* const dense_prefix_cp = - sd.region(region_index_end_dense_prefix); - assert(dense_prefix_end == space->end() || - dense_prefix_cp->available() || - dense_prefix_cp->claimed(), - "The region after the dense prefix should always be ready to fill"); - - size_t region_index_start = sd.addr_to_region_idx(space->bottom()); - - // Is there dense prefix work? - size_t total_dense_prefix_regions = - region_index_end_dense_prefix - region_index_start; - // How many regions of the dense prefix should be given to - // each thread? - if (total_dense_prefix_regions > 0) { - uint tasks_for_dense_prefix = 1; - if (total_dense_prefix_regions <= - (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) { - // Don't over partition. This assumes that - // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value - // so there are not many regions to process. - tasks_for_dense_prefix = parallel_gc_threads; - } else { - // Over partition - tasks_for_dense_prefix = parallel_gc_threads * - PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING; - } - size_t regions_per_thread = total_dense_prefix_regions / - tasks_for_dense_prefix; - // Give each thread at least 1 region. - if (regions_per_thread == 0) { - regions_per_thread = 1; - } - - for (uint k = 0; k < tasks_for_dense_prefix; k++) { - if (region_index_start >= region_index_end_dense_prefix) { - break; - } - // region_index_end is not processed - size_t region_index_end = MIN2(region_index_start + regions_per_thread, - region_index_end_dense_prefix); - task_queue.push(UpdateDensePrefixTask(SpaceId(space_id), - region_index_start, - region_index_end)); - region_index_start = region_index_end; - } - } - // This gets any part of the dense prefix that did not - // fit evenly. - if (region_index_start < region_index_end_dense_prefix) { - task_queue.push(UpdateDensePrefixTask(SpaceId(space_id), - region_index_start, - region_index_end_dense_prefix)); - } - } -} - -#ifdef ASSERT -// Write a histogram of the number of times the block table was filled for a -// region. -void PSParallelCompact::write_block_fill_histogram() -{ - if (!log_develop_is_enabled(Trace, gc, compaction)) { - return; - } - - Log(gc, compaction) log; - ResourceMark rm; - LogStream ls(log.trace()); - outputStream* out = &ls; - - typedef ParallelCompactData::RegionData rd_t; - ParallelCompactData& sd = summary_data(); - - for (unsigned int id = old_space_id; id < last_space_id; ++id) { - MutableSpace* const spc = _space_info[id].space(); - if (spc->bottom() != spc->top()) { - const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom()); - HeapWord* const top_aligned_up = sd.region_align_up(spc->top()); - const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up); - - size_t histo[5] = { 0, 0, 0, 0, 0 }; - const size_t histo_len = sizeof(histo) / sizeof(size_t); - const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t)); - - for (const rd_t* cur = beg; cur < end; ++cur) { - ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)]; - } - out->print("Block fill histogram: %u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt); - for (size_t i = 0; i < histo_len; ++i) { - out->print(" " SIZE_FORMAT_W(5) " %5.1f%%", - histo[i], 100.0 * histo[i] / region_cnt); - } - out->cr(); - } - } -} -#endif // #ifdef ASSERT - static void compaction_with_stealing_work(TaskTerminator* terminator, uint worker_id) { assert(ParallelScavengeHeap::heap()->is_stw_gc_active(), "called outside gc"); @@ -2012,72 +1992,146 @@ static void compaction_with_stealing_work(TaskTerminator* terminator, uint worke } } -class UpdateDensePrefixAndCompactionTask: public WorkerTask { - TaskQueue& _tq; +class FillDensePrefixAndCompactionTask: public WorkerTask { + uint _num_workers; TaskTerminator _terminator; public: - UpdateDensePrefixAndCompactionTask(TaskQueue& tq, uint active_workers) : - WorkerTask("UpdateDensePrefixAndCompactionTask"), - _tq(tq), + FillDensePrefixAndCompactionTask(uint active_workers) : + WorkerTask("FillDensePrefixAndCompactionTask"), + _num_workers(active_workers), _terminator(active_workers, ParCompactionManager::region_task_queues()) { } + virtual void work(uint worker_id) { - ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id); - - for (PSParallelCompact::UpdateDensePrefixTask task; _tq.try_claim(task); /* empty */) { - PSParallelCompact::update_and_deadwood_in_dense_prefix(cm, - task._space_id, - task._region_index_start, - task._region_index_end); + { + auto start = Ticks::now(); + PSParallelCompact::fill_dead_objs_in_dense_prefix(worker_id, _num_workers); + log_trace(gc, phases)("Fill dense prefix by worker %u: %.3f ms", worker_id, (Ticks::now() - start).seconds() * 1000); } - - // Once a thread has drained it's stack, it should try to steal regions from - // other threads. compaction_with_stealing_work(&_terminator, worker_id); - - // At this point all regions have been compacted, so it's now safe - // to update the deferred objects that cross region boundaries. - cm->drain_deferred_objects(); } }; +void PSParallelCompact::fill_range_in_dense_prefix(HeapWord* start, HeapWord* end) { +#ifdef ASSERT + { + assert(start < end, "precondition"); + assert(mark_bitmap()->find_obj_beg(start, end) == end, "precondition"); + HeapWord* bottom = _space_info[old_space_id].space()->bottom(); + if (start != bottom) { + HeapWord* obj_start = mark_bitmap()->find_obj_beg_reverse(bottom, start); + HeapWord* after_obj = obj_start + cast_to_oop(obj_start)->size(); + assert(after_obj == start, "precondition"); + } + } +#endif + + CollectedHeap::fill_with_objects(start, pointer_delta(end, start)); + HeapWord* addr = start; + do { + size_t size = cast_to_oop(addr)->size(); + start_array(old_space_id)->update_for_block(addr, addr + size); + addr += size; + } while (addr < end); +} + +void PSParallelCompact::fill_dead_objs_in_dense_prefix(uint worker_id, uint num_workers) { + ParMarkBitMap* bitmap = mark_bitmap(); + + HeapWord* const bottom = _space_info[old_space_id].space()->bottom(); + HeapWord* const prefix_end = dense_prefix(old_space_id); + + if (bottom == prefix_end) { + return; + } + + size_t bottom_region = _summary_data.addr_to_region_idx(bottom); + size_t prefix_end_region = _summary_data.addr_to_region_idx(prefix_end); + + size_t start_region; + size_t end_region; + split_regions_for_worker(bottom_region, prefix_end_region, + worker_id, num_workers, + &start_region, &end_region); + + if (start_region == end_region) { + return; + } + + HeapWord* const start_addr = _summary_data.region_to_addr(start_region); + HeapWord* const end_addr = _summary_data.region_to_addr(end_region); + + // Skip live partial obj (if any) from previous region. + HeapWord* cur_addr; + RegionData* start_region_ptr = _summary_data.region(start_region); + if (start_region_ptr->partial_obj_size() != 0) { + HeapWord* partial_obj_start = start_region_ptr->partial_obj_addr(); + assert(bitmap->is_marked(partial_obj_start), "inv"); + cur_addr = partial_obj_start + cast_to_oop(partial_obj_start)->size(); + } else { + cur_addr = start_addr; + } + + // end_addr is inclusive to handle regions starting with dead space. + while (cur_addr <= end_addr) { + // Use prefix_end to handle trailing obj in each worker region-chunk. + HeapWord* live_start = bitmap->find_obj_beg(cur_addr, prefix_end); + if (cur_addr != live_start) { + // Only worker 0 handles proceeding dead space. + if (cur_addr != start_addr || worker_id == 0) { + fill_range_in_dense_prefix(cur_addr, live_start); + } + } + if (live_start >= end_addr) { + break; + } + assert(bitmap->is_marked(live_start), "inv"); + cur_addr = live_start + cast_to_oop(live_start)->size(); + } +} + void PSParallelCompact::compact() { GCTraceTime(Info, gc, phases) tm("Compaction Phase", &_gc_timer); - ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); - PSOldGen* old_gen = heap->old_gen(); uint active_gc_threads = ParallelScavengeHeap::heap()->workers().active_workers(); - // for [0..last_space_id) - // for [0..active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING) - // push - // push - // - // max push count is thus: last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1) - TaskQueue task_queue(last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1)); initialize_shadow_regions(active_gc_threads); prepare_region_draining_tasks(active_gc_threads); - enqueue_dense_prefix_tasks(task_queue, active_gc_threads); { GCTraceTime(Trace, gc, phases) tm("Par Compact", &_gc_timer); - UpdateDensePrefixAndCompactionTask task(task_queue, active_gc_threads); + FillDensePrefixAndCompactionTask task(active_gc_threads); ParallelScavengeHeap::heap()->workers().run_task(&task); #ifdef ASSERT + verify_filler_in_dense_prefix(); + // Verify that all regions have been processed. for (unsigned int id = old_space_id; id < last_space_id; ++id) { verify_complete(SpaceId(id)); } #endif } - - DEBUG_ONLY(write_block_fill_histogram()); } #ifdef ASSERT +void PSParallelCompact::verify_filler_in_dense_prefix() { + HeapWord* bottom = _space_info[old_space_id].space()->bottom(); + HeapWord* dense_prefix_end = dense_prefix(old_space_id); + HeapWord* cur_addr = bottom; + while (cur_addr < dense_prefix_end) { + oop obj = cast_to_oop(cur_addr); + oopDesc::verify(obj); + if (!mark_bitmap()->is_marked(cur_addr)) { + Klass* k = cast_to_oop(cur_addr)->klass_without_asserts(); + assert(k == Universe::fillerArrayKlass() || k == vmClasses::FillerObject_klass(), "inv"); + } + cur_addr += obj->size(); + } +} + void PSParallelCompact::verify_complete(SpaceId space_id) { // All Regions between space bottom() to new_top() should be marked as filled // and all Regions between new_top() and top() should be available (i.e., @@ -2117,72 +2171,6 @@ void PSParallelCompact::verify_complete(SpaceId space_id) { } #endif // #ifdef ASSERT -inline void UpdateOnlyClosure::do_addr(HeapWord* addr) { - compaction_manager()->update_contents(cast_to_oop(addr)); -} - -// Update interior oops in the ranges of regions [beg_region, end_region). -void -PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, - SpaceId space_id, - size_t beg_region, - size_t end_region) { - ParallelCompactData& sd = summary_data(); - ParMarkBitMap* const mbm = mark_bitmap(); - - HeapWord* beg_addr = sd.region_to_addr(beg_region); - HeapWord* const end_addr = sd.region_to_addr(end_region); - assert(beg_region <= end_region, "bad region range"); - assert(end_addr <= dense_prefix(space_id), "not in the dense prefix"); - -#ifdef ASSERT - // Claim the regions to avoid triggering an assert when they are marked as - // filled. - for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) { - assert(sd.region(claim_region)->claim_unsafe(), "claim() failed"); - } -#endif // #ifdef ASSERT - HeapWord* const space_bottom = space(space_id)->bottom(); - - // Check if it's the first region in this space. - if (beg_addr != space_bottom) { - // Find the first live object or block of dead space that *starts* in this - // range of regions. If a partial object crosses onto the region, skip it; - // it will be marked for 'deferred update' when the object head is - // processed. If dead space crosses onto the region, it is also skipped; it - // will be filled when the prior region is processed. If neither of those - // apply, the first word in the region is the start of a live object or dead - // space. - assert(beg_addr > space(space_id)->bottom(), "sanity"); - const RegionData* const cp = sd.region(beg_region); - if (cp->partial_obj_size() != 0) { - beg_addr = sd.partial_obj_end(beg_region); - } else { - idx_t beg_bit = mbm->addr_to_bit(beg_addr); - if (!mbm->is_obj_beg(beg_bit) && !mbm->is_obj_end(beg_bit - 1)) { - beg_addr = mbm->find_obj_beg(beg_addr, end_addr); - } - } - } - - if (beg_addr < end_addr) { - // A live object or block of dead space starts in this range of Regions. - HeapWord* const dense_prefix_end = dense_prefix(space_id); - - // Create closures and iterate. - UpdateOnlyClosure update_closure(mbm, cm, space_id); - FillClosure fill_closure(cm, space_id); - mbm->iterate(&update_closure, &fill_closure, beg_addr, end_addr, dense_prefix_end); - } - - // Mark the regions as filled. - RegionData* const beg_cp = sd.region(beg_region); - RegionData* const end_cp = sd.region(end_region); - for (RegionData* cp = beg_cp; cp < end_cp; ++cp) { - cp->set_completed(); - } -} - // Return the SpaceId for the space containing addr. If addr is not in the // heap, last_space_id is returned. In debug mode it expects the address to be // in the heap and asserts such. @@ -2199,24 +2187,6 @@ PSParallelCompact::SpaceId PSParallelCompact::space_id(HeapWord* addr) { return last_space_id; } -void PSParallelCompact::update_deferred_object(ParCompactionManager* cm, HeapWord *addr) { -#ifdef ASSERT - ParallelCompactData& sd = summary_data(); - size_t region_idx = sd.addr_to_region_idx(addr); - assert(sd.region(region_idx)->completed(), "first region must be completed before deferred updates"); - assert(sd.region(region_idx + 1)->completed(), "second region must be completed before deferred updates"); -#endif - - const SpaceInfo* const space_info = _space_info + space_id(addr); - ObjectStartArray* const start_array = space_info->start_array(); - if (start_array != nullptr) { - start_array->update_for_block(addr, addr + cast_to_oop(addr)->size()); - } - - cm->update_contents(cast_to_oop(addr)); - assert(oopDesc::is_oop(cast_to_oop(addr)), "Expected an oop at " PTR_FORMAT, p2i(cast_to_oop(addr))); -} - // Skip over count live words starting from beg, and return the address of the // next live word. Unless marked, the word corresponding to beg is assumed to // be dead. Callers must either ensure beg does not correspond to the middle of @@ -2228,26 +2198,18 @@ PSParallelCompact::skip_live_words(HeapWord* beg, HeapWord* end, size_t count) assert(count > 0, "sanity"); ParMarkBitMap* m = mark_bitmap(); - idx_t bits_to_skip = m->words_to_bits(count); - idx_t cur_beg = m->addr_to_bit(beg); - const idx_t search_end = m->align_range_end(m->addr_to_bit(end)); - - do { - cur_beg = m->find_obj_beg(cur_beg, search_end); - idx_t cur_end = m->find_obj_end(cur_beg, search_end); - const size_t obj_bits = cur_end - cur_beg + 1; - if (obj_bits > bits_to_skip) { - return m->bit_to_addr(cur_beg + bits_to_skip); + HeapWord* cur_addr = beg; + while (true) { + cur_addr = m->find_obj_beg(cur_addr, end); + assert(cur_addr < end, "inv"); + size_t obj_size = cast_to_oop(cur_addr)->size(); + // Strictly greater-than + if (obj_size > count) { + return cur_addr + count; } - bits_to_skip -= obj_bits; - cur_beg = cur_end + 1; - } while (bits_to_skip > 0); - - // Skipping the desired number of words landed just past the end of an object. - // Find the start of the next object. - cur_beg = m->find_obj_beg(cur_beg, search_end); - assert(cur_beg < m->addr_to_bit(end), "not enough live words to skip"); - return m->bit_to_addr(cur_beg); + count -= obj_size; + cur_addr += obj_size; + } } HeapWord* PSParallelCompact::first_src_addr(HeapWord* const dest_addr, @@ -2431,9 +2393,31 @@ size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure, return 0; } +HeapWord* PSParallelCompact::partial_obj_end(HeapWord* region_start_addr) { + ParallelCompactData& sd = summary_data(); + assert(sd.is_region_aligned(region_start_addr), "precondition"); + + // Use per-region partial_obj_size to locate the end of the obj, that extends to region_start_addr. + SplitInfo& split_info = _space_info[space_id(region_start_addr)].split_info(); + size_t start_region_idx = sd.addr_to_region_idx(region_start_addr); + size_t end_region_idx = sd.region_count(); + size_t accumulated_size = 0; + for (size_t region_idx = start_region_idx; region_idx < end_region_idx; ++region_idx) { + if (split_info.is_split(region_idx)) { + accumulated_size += split_info.partial_obj_size(); + break; + } + size_t cur_partial_obj_size = sd.region(region_idx)->partial_obj_size(); + accumulated_size += cur_partial_obj_size; + if (cur_partial_obj_size != ParallelCompactData::RegionSize) { + break; + } + } + return region_start_addr + accumulated_size; +} + void PSParallelCompact::fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region_idx) { - typedef ParMarkBitMap::IterationStatus IterationStatus; ParMarkBitMap* const bitmap = mark_bitmap(); ParallelCompactData& sd = summary_data(); RegionData* const region_ptr = sd.region(region_idx); @@ -2457,7 +2441,30 @@ void PSParallelCompact::fill_region(ParCompactionManager* cm, MoveAndUpdateClosu // of the object or as much as will fit. The fact that pointer updates were // deferred will be noted when the object header is processed. HeapWord* const old_src_addr = closure.source(); - closure.copy_partial_obj(); + { + HeapWord* region_start = sd.region_align_down(closure.source()); + HeapWord* obj_start = bitmap->find_obj_beg_reverse(region_start, closure.source()); + HeapWord* obj_end; + if (bitmap->is_marked(obj_start)) { + HeapWord* next_region_start = region_start + ParallelCompactData::RegionSize; + HeapWord* partial_obj_start = (next_region_start >= src_space_top) + ? nullptr + : sd.addr_to_region_ptr(next_region_start)->partial_obj_addr(); + if (partial_obj_start == obj_start) { + // This obj extends to next region. + obj_end = partial_obj_end(next_region_start); + } else { + // Completely contained in this region; safe to use size(). + obj_end = obj_start + cast_to_oop(obj_start)->size(); + } + } else { + // This obj extends to current region. + obj_end = partial_obj_end(region_start); + } + size_t partial_obj_size = pointer_delta(obj_end, closure.source()); + closure.copy_partial_obj(partial_obj_size); + } + if (closure.is_full()) { decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); @@ -2478,24 +2485,30 @@ void PSParallelCompact::fill_region(ParCompactionManager* cm, MoveAndUpdateClosu } do { - HeapWord* const cur_addr = closure.source(); + HeapWord* cur_addr = closure.source(); HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1), src_space_top); - IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr); + HeapWord* partial_obj_start = (end_addr == src_space_top) + ? nullptr + : sd.addr_to_region_ptr(end_addr)->partial_obj_addr(); + // apply closure on objs inside [cur_addr, end_addr) + do { + cur_addr = bitmap->find_obj_beg(cur_addr, end_addr); + if (cur_addr == end_addr) { + break; + } + size_t obj_size; + if (partial_obj_start == cur_addr) { + obj_size = pointer_delta(partial_obj_end(end_addr), cur_addr); + } else { + // This obj doesn't extend into next region; size() is safe to use. + obj_size = cast_to_oop(cur_addr)->size(); + } + closure.do_addr(cur_addr, obj_size); + cur_addr += obj_size; + } while (cur_addr < end_addr && !closure.is_full()); - if (status == ParMarkBitMap::would_overflow) { - // The last object did not fit. Note that interior oop updates were - // deferred, then copy enough of the object to fill the region. - cm->push_deferred_object(closure.destination()); - status = closure.copy_until_full(); // copies from closure.source() - - decrement_destination_counts(cm, src_space_id, src_region_idx, - closure.source()); - closure.complete_region(cm, dest_addr, region_ptr); - return; - } - - if (status == ParMarkBitMap::full) { + if (closure.is_full()) { decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); closure.complete_region(cm, dest_addr, region_ptr); @@ -2595,77 +2608,9 @@ void PSParallelCompact::initialize_shadow_regions(uint parallel_gc_threads) } } -void PSParallelCompact::fill_blocks(size_t region_idx) +void MoveAndUpdateClosure::copy_partial_obj(size_t partial_obj_size) { - // Fill in the block table elements for the specified region. Each block - // table element holds the number of live words in the region that are to the - // left of the first object that starts in the block. Thus only blocks in - // which an object starts need to be filled. - // - // The algorithm scans the section of the bitmap that corresponds to the - // region, keeping a running total of the live words. When an object start is - // found, if it's the first to start in the block that contains it, the - // current total is written to the block table element. - const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize; - const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize; - const size_t RegionSize = ParallelCompactData::RegionSize; - - ParallelCompactData& sd = summary_data(); - const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size(); - if (partial_obj_size >= RegionSize) { - return; // No objects start in this region. - } - - // Ensure the first loop iteration decides that the block has changed. - size_t cur_block = sd.block_count(); - - const ParMarkBitMap* const bitmap = mark_bitmap(); - - const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment; - assert((size_t)1 << Log2BitsPerBlock == - bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity"); - - size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize); - const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize); - size_t live_bits = bitmap->words_to_bits(partial_obj_size); - beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end); - while (beg_bit < range_end) { - const size_t new_block = beg_bit >> Log2BitsPerBlock; - if (new_block != cur_block) { - cur_block = new_block; - sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); - } - - const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end); - if (end_bit < range_end - 1) { - live_bits += end_bit - beg_bit + 1; - beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end); - } else { - return; - } - } -} - -ParMarkBitMap::IterationStatus MoveAndUpdateClosure::copy_until_full() -{ - if (source() != copy_destination()) { - DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) - Copy::aligned_conjoint_words(source(), copy_destination(), words_remaining()); - } - update_state(words_remaining()); - assert(is_full(), "sanity"); - return ParMarkBitMap::full; -} - -void MoveAndUpdateClosure::copy_partial_obj() -{ - size_t words = words_remaining(); - - HeapWord* const range_end = MIN2(source() + words, bitmap()->region_end()); - HeapWord* const end_addr = bitmap()->find_obj_end(source(), range_end); - if (end_addr < range_end) { - words = bitmap()->obj_size(source(), end_addr); - } + size_t words = MIN2(partial_obj_size, words_remaining()); // This test is necessary; if omitted, the pointer updates to a partial object // that crosses the dense prefix boundary could be overwritten. @@ -2685,32 +2630,27 @@ void MoveAndUpdateClosure::complete_region(ParCompactionManager *cm, HeapWord *d ParMarkBitMapClosure::IterationStatus MoveAndUpdateClosure::do_addr(HeapWord* addr, size_t words) { assert(destination() != nullptr, "sanity"); - assert(bitmap()->obj_size(addr) == words, "bad size"); - _source = addr; - assert(PSParallelCompact::summary_data().calc_new_pointer(source(), compaction_manager()) == - destination(), "wrong destination"); - - if (words > words_remaining()) { - return ParMarkBitMap::would_overflow; - } // The start_array must be updated even if the object is not moving. if (_start_array != nullptr) { _start_array->update_for_block(destination(), destination() + words); } + // Avoid overflow + words = MIN2(words, words_remaining()); + assert(words > 0, "inv"); + if (copy_destination() != source()) { DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) + assert(source() != destination(), "inv"); + assert(cast_to_oop(source())->is_forwarded(), "inv"); + assert(cast_to_oop(source())->forwardee() == cast_to_oop(destination()), "inv"); Copy::aligned_conjoint_words(source(), copy_destination(), words); + cast_to_oop(copy_destination())->init_mark(); } - oop moved_oop = cast_to_oop(copy_destination()); - compaction_manager()->update_contents(moved_oop); - assert(oopDesc::is_oop_or_null(moved_oop), "Expected an oop or null at " PTR_FORMAT, p2i(moved_oop)); - update_state(words); - assert(copy_destination() == cast_from_oop(moved_oop) + moved_oop->size(), "sanity"); return is_full() ? ParMarkBitMap::full : ParMarkBitMap::incomplete; } @@ -2733,37 +2673,3 @@ void MoveAndUpdateShadowClosure::complete_region(ParCompactionManager *cm, HeapW } } -UpdateOnlyClosure::UpdateOnlyClosure(ParMarkBitMap* mbm, - ParCompactionManager* cm, - PSParallelCompact::SpaceId space_id) : - ParMarkBitMapClosure(mbm, cm), - _start_array(PSParallelCompact::start_array(space_id)) -{ -} - -// Updates the references in the object to their new values. -ParMarkBitMapClosure::IterationStatus -UpdateOnlyClosure::do_addr(HeapWord* addr, size_t words) { - do_addr(addr); - return ParMarkBitMap::incomplete; -} - -FillClosure::FillClosure(ParCompactionManager* cm, PSParallelCompact::SpaceId space_id) : - ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm), - _start_array(PSParallelCompact::start_array(space_id)) -{ - assert(space_id == PSParallelCompact::old_space_id, - "cannot use FillClosure in the young gen"); -} - -ParMarkBitMapClosure::IterationStatus -FillClosure::do_addr(HeapWord* addr, size_t size) { - CollectedHeap::fill_with_objects(addr, size); - HeapWord* const end = addr + size; - do { - size_t size = cast_to_oop(addr)->size(); - _start_array->update_for_block(addr, addr + size); - addr += size; - } while (addr < end); - return ParMarkBitMap::incomplete; -} diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.hpp index a36acc72a9b..67346d7eee5 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp @@ -215,17 +215,6 @@ public: // Mask for the bits in a pointer to get the address of the start of a region. static const size_t RegionAddrMask; - static const size_t Log2BlockSize; - static const size_t BlockSize; - static const size_t BlockSizeBytes; - - static const size_t BlockSizeOffsetMask; - static const size_t BlockAddrOffsetMask; - static const size_t BlockAddrMask; - - static const size_t BlocksPerRegion; - static const size_t Log2BlocksPerRegion; - class RegionData { public: @@ -274,12 +263,6 @@ public: inline uint destination_count() const; inline uint destination_count_raw() const; - // Whether the block table for this region has been filled. - inline bool blocks_filled() const; - - // Number of times the block table was filled. - DEBUG_ONLY(inline size_t blocks_filled_count() const;) - // Whether this region is available to be claimed, has been claimed, or has // been completed. // @@ -298,7 +281,6 @@ public: void set_partial_obj_size(size_t words) { _partial_obj_size = (region_sz_t) words; } - inline void set_blocks_filled(); inline void set_destination_count(uint count); inline void set_live_obj_size(size_t words); @@ -356,13 +338,8 @@ public: HeapWord* _partial_obj_addr; region_sz_t _partial_obj_size; region_sz_t volatile _dc_and_los; - bool volatile _blocks_filled; int volatile _shadow_state; -#ifdef ASSERT - size_t _blocks_filled_count; // Number of block table fills. -#endif // #ifdef ASSERT - #ifdef ASSERT public: uint _pushed; // 0 until region is pushed onto a stack @@ -370,21 +347,6 @@ public: #endif }; - // "Blocks" allow shorter sections of the bitmap to be searched. Each Block - // holds an offset, which is the amount of live data in the Region to the left - // of the first live object that starts in the Block. - class BlockData - { - public: - typedef unsigned short int blk_ofs_t; - - blk_ofs_t offset() const { return _offset; } - void set_offset(size_t val) { _offset = (blk_ofs_t)val; } - - private: - blk_ofs_t _offset; - }; - public: ParallelCompactData(); bool initialize(MemRegion reserved_heap); @@ -396,9 +358,6 @@ public: inline RegionData* region(size_t region_idx) const; inline size_t region(const RegionData* const region_ptr) const; - size_t block_count() const { return _block_count; } - inline BlockData* block(size_t block_idx) const; - // Fill in the regions covering [beg, end) so that no data moves; i.e., the // destination of region n is simply the start of region n. Both arguments // beg and end must be region-aligned. @@ -436,28 +395,12 @@ public: inline HeapWord* region_align_up(HeapWord* addr) const; inline bool is_region_aligned(HeapWord* addr) const; - size_t addr_to_block_idx(const HeapWord* addr) const; - inline BlockData* addr_to_block_ptr(const HeapWord* addr) const; - - inline HeapWord* block_align_down(HeapWord* addr) const; - - // Return the address one past the end of the partial object. - HeapWord* partial_obj_end(size_t region_idx) const; - - // Return the location of the object after compaction. - HeapWord* calc_new_pointer(HeapWord* addr, ParCompactionManager* cm) const; - - HeapWord* calc_new_pointer(oop p, ParCompactionManager* cm) const { - return calc_new_pointer(cast_from_oop(p), cm); - } - #ifdef ASSERT void verify_clear(const PSVirtualSpace* vspace); void verify_clear(); #endif // #ifdef ASSERT private: - bool initialize_block_data(); bool initialize_region_data(size_t heap_size); PSVirtualSpace* create_vspace(size_t count, size_t element_size); @@ -470,10 +413,6 @@ private: size_t _reserved_byte_size; RegionData* _region_data; size_t _region_count; - - PSVirtualSpace* _block_vspace; - BlockData* _block_data; - size_t _block_count; }; inline uint @@ -488,31 +427,6 @@ ParallelCompactData::RegionData::destination_count() const return destination_count_raw() >> dc_shift; } -inline bool -ParallelCompactData::RegionData::blocks_filled() const -{ - bool result = _blocks_filled; - OrderAccess::acquire(); - return result; -} - -#ifdef ASSERT -inline size_t -ParallelCompactData::RegionData::blocks_filled_count() const -{ - return _blocks_filled_count; -} -#endif // #ifdef ASSERT - -inline void -ParallelCompactData::RegionData::set_blocks_filled() -{ - OrderAccess::release(); - _blocks_filled = true; - // Debug builds count the number of times the table was filled. - DEBUG_ONLY(Atomic::inc(&_blocks_filled_count)); -} - inline void ParallelCompactData::RegionData::set_destination_count(uint count) { @@ -602,12 +516,6 @@ ParallelCompactData::region(const RegionData* const region_ptr) const return pointer_delta(region_ptr, _region_data, sizeof(RegionData)); } -inline ParallelCompactData::BlockData* -ParallelCompactData::block(size_t n) const { - assert(n < block_count(), "bad arg"); - return _block_data + n; -} - inline size_t ParallelCompactData::region_offset(const HeapWord* addr) const { @@ -667,28 +575,6 @@ ParallelCompactData::is_region_aligned(HeapWord* addr) const return (size_t(addr) & RegionAddrOffsetMask) == 0; } -inline size_t -ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const -{ - assert(addr >= _heap_start, "bad addr"); - assert(addr <= _heap_end, "bad addr"); - return pointer_delta(addr, _heap_start) >> Log2BlockSize; -} - -inline ParallelCompactData::BlockData* -ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const -{ - return block(addr_to_block_idx(addr)); -} - -inline HeapWord* -ParallelCompactData::block_align_down(HeapWord* addr) const -{ - assert(addr >= _heap_start, "bad addr"); - assert(addr < _heap_end + RegionSize, "bad addr"); - return (HeapWord*)(size_t(addr) & BlockAddrMask); -} - // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the // do_addr() method. // @@ -774,25 +660,24 @@ inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) { // does parts of the collection using parallel threads. The collection includes // the tenured generation and the young generation. // -// There are four phases of the collection. +// A collection consists of the following phases. // // - marking phase -// - summary phase +// - summary phase (single-threaded) +// - forward (to new address) phase +// - adjust pointers phase // - compacting phase // - clean up phase // // Roughly speaking these phases correspond, respectively, to +// // - mark all the live objects +// - calculating destination-region for each region for better parallellism in following phases // - calculate the destination of each object at the end of the collection +// - adjust pointers to reflect new destination of objects // - move the objects to their destination // - update some references and reinitialize some variables // -// These three phases are invoked in PSParallelCompact::invoke_no_policy(). The -// marking phase is implemented in PSParallelCompact::marking_phase() and does a -// complete marking of the heap. The summary phase is implemented in -// PSParallelCompact::summary_phase(). The move and update phase is implemented -// in PSParallelCompact::compact(). -// // A space that is being collected is divided into regions and with each region // is associated an object of type ParallelCompactData. Each region is of a // fixed size and typically will contain more than 1 object and may have parts @@ -828,17 +713,12 @@ inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) { // dense prefix do need to have their object references updated. See method // summarize_dense_prefix(). // -// The summary phase is done using 1 GC thread. +// The forward (to new address) phase calculates the new address of each +// objects and records old-addr-to-new-addr asssociation. // -// The compaction phase moves objects to their new location and updates all -// references in the object. +// The adjust pointers phase remap all pointers to reflect the new address of each object. // -// A current exception is that objects that cross a region boundary are moved -// but do not have their references updated. References are not updated because -// it cannot easily be determined if the klass pointer KKK for the object AAA -// has been updated. KKK likely resides in a region to the left of the region -// containing AAA. These AAA's have their references updated at the end in a -// clean up phase. See the method PSParallelCompact::update_deferred_object(). +// The compaction phase moves objects to their new location. // // Compaction is done on a region basis. A region that is ready to be filled is // put on a ready list and GC threads take region off the list and fill them. A @@ -869,39 +749,18 @@ inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) { // Environments (VEE 2019). ACM, New York, NY, USA, 108-121. DOI: // https://doi.org/10.1145/3313808.3313820 -class TaskQueue; - class PSParallelCompact : AllStatic { public: // Convenient access to type names. typedef ParMarkBitMap::idx_t idx_t; typedef ParallelCompactData::RegionData RegionData; - typedef ParallelCompactData::BlockData BlockData; typedef enum { old_space_id, eden_space_id, from_space_id, to_space_id, last_space_id } SpaceId; - struct UpdateDensePrefixTask : public CHeapObj { - SpaceId _space_id; - size_t _region_index_start; - size_t _region_index_end; - - UpdateDensePrefixTask() : - _space_id(SpaceId(0)), - _region_index_start(0), - _region_index_end(0) {} - - UpdateDensePrefixTask(SpaceId space_id, - size_t region_index_start, - size_t region_index_end) : - _space_id(space_id), - _region_index_start(region_index_start), - _region_index_end(region_index_end) {} - }; - - public: +public: // Inline closure decls // class IsAliveClosure: public BoolObjectClosure { @@ -909,7 +768,6 @@ class PSParallelCompact : AllStatic { virtual bool do_object_b(oop p); }; - friend class RefProcTaskProxy; friend class PSParallelCompactTest; private: @@ -958,10 +816,11 @@ class PSParallelCompact : AllStatic { static void summary_phase(bool maximum_compaction); - // Adjust addresses in roots. Does not adjust addresses in heap. - static void adjust_roots(); + static void adjust_pointers(); + static void forward_to_new_addr(); - DEBUG_ONLY(static void write_block_fill_histogram();) + static void verify_forward() NOT_DEBUG_RETURN; + static void verify_filler_in_dense_prefix() NOT_DEBUG_RETURN; // Move objects to new locations. static void compact(); @@ -969,10 +828,6 @@ class PSParallelCompact : AllStatic { // Add available regions to the stack and draining tasks to the task queue. static void prepare_region_draining_tasks(uint parallel_gc_threads); - // Add dense prefix update tasks to the task queue. - static void enqueue_dense_prefix_tasks(TaskQueue& task_queue, - uint parallel_gc_threads); - #ifndef PRODUCT // Print generic summary data static void print_generic_summary_data(ParallelCompactData& summary_data, @@ -980,10 +835,23 @@ class PSParallelCompact : AllStatic { HeapWord* const end_addr); #endif // #ifndef PRODUCT + static void fill_range_in_dense_prefix(HeapWord* start, HeapWord* end); + public: + static void fill_dead_objs_in_dense_prefix(uint worker_id, uint num_workers); + static bool invoke(bool maximum_heap_compaction); static bool invoke_no_policy(bool maximum_heap_compaction); + template + static void adjust_in_space_helper(SpaceId id, volatile uint* claim_counter, Func&& on_stripe); + + static void adjust_in_old_space(volatile uint* claim_counter); + + static void adjust_in_young_space(SpaceId id, volatile uint* claim_counter); + + static void adjust_pointers_in_spaces(uint worker_id, volatile uint* claim_counter); + static void post_initialize(); // Perform initialization for PSParallelCompact that requires // allocations. This should be called during the VM initialization @@ -1003,7 +871,7 @@ class PSParallelCompact : AllStatic { static inline bool mark_obj(oop obj); static inline bool is_marked(oop obj); - template static inline void adjust_pointer(T* p, ParCompactionManager* cm); + template static inline void adjust_pointer(T* p); // Compaction support. // Return true if p is in the range [beg_addr, end_addr). @@ -1016,19 +884,6 @@ class PSParallelCompact : AllStatic { static inline HeapWord* dense_prefix(SpaceId space_id); static inline ObjectStartArray* start_array(SpaceId space_id); - // Update a region in the dense prefix. For each live object - // in the region, update it's interior references. For each - // dead object, fill it with deadwood. Dead space at the end - // of a region range will be filled to the start of the next - // live object regardless of the region_index_end. None of the - // objects in the dense prefix move and dead space is dead - // (holds only dead objects that don't need any processing), so - // dead space can be filled in any order. - static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, - SpaceId space_id, - size_t region_index_start, - size_t region_index_end); - // Return the address of the count + 1st live word in the range [beg, end). static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count); @@ -1056,6 +911,8 @@ class PSParallelCompact : AllStatic { size_t beg_region, HeapWord* end_addr); + static HeapWord* partial_obj_end(HeapWord* region_start_addr); + static void fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region); static void fill_and_update_region(ParCompactionManager* cm, size_t region); @@ -1067,12 +924,6 @@ class PSParallelCompact : AllStatic { // _next_shadow_region filed for each compact manager static void initialize_shadow_regions(uint parallel_gc_threads); - // Fill in the block table for the specified region. - static void fill_blocks(size_t region_idx); - - // Update a single deferred object. - static void update_deferred_object(ParCompactionManager* cm, HeapWord* addr); - static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; } static ParallelCompactData& summary_data() { return _summary_data; } @@ -1120,14 +971,10 @@ class MoveAndUpdateClosure: public ParMarkBitMapClosure { // return would_overflow. IterationStatus do_addr(HeapWord* addr, size_t size); - // Copy enough words to fill this closure, starting at source(). Interior - // oops and the start array are not updated. Return full. - IterationStatus copy_until_full(); - // Copy enough words to fill this closure or to the end of an object, - // whichever is smaller, starting at source(). Interior oops and the start - // array are not updated. - void copy_partial_obj(); + // whichever is smaller, starting at source(). The start array is not + // updated. + void copy_partial_obj(size_t partial_obj_size); virtual void complete_region(ParCompactionManager* cm, HeapWord* dest_addr, PSParallelCompact::RegionData* region_ptr); @@ -1198,31 +1045,6 @@ MoveAndUpdateShadowClosure::MoveAndUpdateShadowClosure(ParMarkBitMap *bitmap, _offset = calculate_shadow_offset(region, shadow); } -class UpdateOnlyClosure: public ParMarkBitMapClosure { - private: - ObjectStartArray* const _start_array; - - public: - UpdateOnlyClosure(ParMarkBitMap* mbm, - ParCompactionManager* cm, - PSParallelCompact::SpaceId space_id); - - // Update the object. - virtual IterationStatus do_addr(HeapWord* addr, size_t words); - - inline void do_addr(HeapWord* addr); -}; - -class FillClosure: public ParMarkBitMapClosure { - public: - FillClosure(ParCompactionManager* cm, PSParallelCompact::SpaceId space_id); - - virtual IterationStatus do_addr(HeapWord* addr, size_t size); - - private: - ObjectStartArray* const _start_array; -}; - void steal_marking_work(TaskTerminator& terminator, uint worker_id); #endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp index a4a5060ffd5..75a092e2dd1 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp @@ -78,8 +78,7 @@ inline void PSParallelCompact::check_new_location(HeapWord* old_addr, HeapWord* #endif // ASSERT inline bool PSParallelCompact::mark_obj(oop obj) { - const size_t obj_size = obj->size(); - if (mark_bitmap()->mark_obj(obj, obj_size)) { + if (mark_bitmap()->mark_obj(obj)) { ContinuationGCSupport::transform_stack_chunk(obj); return true; } else { @@ -88,34 +87,22 @@ inline bool PSParallelCompact::mark_obj(oop obj) { } template -inline void PSParallelCompact::adjust_pointer(T* p, ParCompactionManager* cm) { +inline void PSParallelCompact::adjust_pointer(T* p) { T heap_oop = RawAccess<>::oop_load(p); if (!CompressedOops::is_null(heap_oop)) { oop obj = CompressedOops::decode_not_null(heap_oop); assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap"); - oop new_obj = cast_to_oop(summary_data().calc_new_pointer(obj, cm)); - assert(new_obj != nullptr, "non-null address for live objects"); - // Is it actually relocated at all? - if (new_obj != obj) { - assert(ParallelScavengeHeap::heap()->is_in_reserved(new_obj), - "should be in object space"); - RawAccess::oop_store(p, new_obj); + if (!obj->is_forwarded()) { + return; } + oop new_obj = obj->forwardee(); + assert(new_obj != nullptr, "non-null address for live objects"); + assert(new_obj != obj, "inv"); + assert(ParallelScavengeHeap::heap()->is_in_reserved(new_obj), + "should be in object space"); + RawAccess::oop_store(p, new_obj); } } -class PCAdjustPointerClosure: public BasicOopIterateClosure { -public: - PCAdjustPointerClosure(ParCompactionManager* cm) : _cm(cm) { - } - template void do_oop_work(T* p) { PSParallelCompact::adjust_pointer(p, _cm); } - virtual void do_oop(oop* p) { do_oop_work(p); } - virtual void do_oop(narrowOop* p) { do_oop_work(p); } - - virtual ReferenceIterationMode reference_iteration_mode() { return DO_FIELDS; } -private: - ParCompactionManager* _cm; -}; - #endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACT_INLINE_HPP