8177707: Specialize G1RemSet::refine_card for concurrent/during safepoint refinement

Reviewed-by: ehelin, kbarrett
This commit is contained in:
Thomas Schatzl 2017-06-02 13:47:54 +02:00
parent 3e624c317a
commit c982403296
6 changed files with 262 additions and 125 deletions

View File

@ -101,11 +101,7 @@ public:
RefineCardTableEntryClosure() : _concurrent(true) { }
bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, NULL);
// This path is executed by the concurrent refine or mutator threads,
// concurrently, and so we do not care if card_ptr contains references
// that point into the collection set.
assert(!oops_into_cset, "should be");
G1CollectedHeap::heap()->g1_rem_set()->refine_card_concurrently(card_ptr, worker_i);
if (_concurrent && SuspendibleThreadSet::should_yield()) {
// Caller will actually yield.

View File

@ -171,6 +171,24 @@ public:
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
class G1ConcurrentRefineOopClosure: public ExtendedOopClosure {
G1CollectedHeap* _g1;
uint _worker_i;
public:
G1ConcurrentRefineOopClosure(G1CollectedHeap* g1h, uint worker_i) :
_g1(g1h),
_worker_i(worker_i) {
}
// This closure needs special handling for InstanceRefKlass.
virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
template <class T> void do_oop_nv(T* p);
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
virtual void do_oop(oop* p) { do_oop_nv(p); }
};
class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure {
G1CollectedHeap* _g1;
HeapRegion* _from;

View File

@ -110,6 +110,54 @@ inline void G1RootRegionScanClosure::do_oop_nv(T* p) {
}
}
template <class T>
inline static void check_obj_during_refinement(T* p, oop const obj) {
#ifdef ASSERT
G1CollectedHeap* g1 = G1CollectedHeap::heap();
// can't do because of races
// assert(obj == NULL || obj->is_oop(), "expected an oop");
assert(check_obj_alignment(obj), "not oop aligned");
assert(g1->is_in_reserved(obj), "must be in heap");
HeapRegion* from = g1->heap_region_containing(p);
assert(from != NULL, "from region must be non-NULL");
assert(from->is_in_reserved(p) ||
(from->is_humongous() &&
g1->heap_region_containing(p)->is_humongous() &&
from->humongous_start_region() == g1->heap_region_containing(p)->humongous_start_region()),
"p " PTR_FORMAT " is not in the same region %u or part of the correct humongous object starting at region %u.",
p2i(p), from->hrm_index(), from->humongous_start_region()->hrm_index());
#endif // ASSERT
}
template <class T>
inline void G1ConcurrentRefineOopClosure::do_oop_nv(T* p) {
T o = oopDesc::load_heap_oop(p);
if (oopDesc::is_null(o)) {
return;
}
oop obj = oopDesc::decode_heap_oop_not_null(o);
check_obj_during_refinement(p, obj);
if (HeapRegion::is_in_same_region(p, obj)) {
// Normally this closure should only be called with cross-region references.
// But since Java threads are manipulating the references concurrently and we
// reload the values things may have changed.
// This check lets slip through references from a humongous continues region
// to its humongous start region, as they are in different regions, and adds a
// remembered set entry. This is benign (apart from memory usage), as this
// closure is never called during evacuation.
return;
}
HeapRegion* to = _g1->heap_region_containing(obj);
assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
to->rem_set()->add_reference(p, _worker_i);
}
template <class T>
inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
oop obj = oopDesc::load_decode_heap_oop(p);
@ -161,9 +209,9 @@ inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
// we have already visited/tried to copy this object
// there is no need to retry.
if (!self_forwarded(obj)) {
assert(_push_ref_cl != NULL, "should not be null");
// Push the reference in the refs queue of the G1ParScanThreadState
// instance for this worker thread.
assert(_push_ref_cl != NULL, "should not be null");
// Push the reference in the refs queue of the G1ParScanThreadState
// instance for this worker thread.
_push_ref_cl->do_oop(p);
}
_has_refs_into_cset = true;

View File

@ -413,7 +413,7 @@ public:
assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
assert(worker_i < ParallelGCThreads, "should be a GC worker");
if (_g1rs->refine_card(card_ptr, worker_i, _cl)) {
if (_g1rs->refine_card_during_gc(card_ptr, worker_i, _cl)) {
// 'card_ptr' contains references that point into the collection
// set. We need to record the card in the DCQS
// (_into_cset_dirty_card_queue_set)
@ -523,6 +523,18 @@ void G1RemSet::scrub(uint worker_num, HeapRegionClaimer *hrclaimer) {
_g1->heap_region_par_iterate(&scrub_cl, worker_num, hrclaimer);
}
inline void check_card_ptr(jbyte* card_ptr, CardTableModRefBS* ct_bs) {
#ifdef ASSERT
G1CollectedHeap* g1 = G1CollectedHeap::heap();
assert(g1->is_in_exact(ct_bs->addr_for(card_ptr)),
"Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap at " PTR_FORMAT " (%u) must be in committed heap",
p2i(card_ptr),
ct_bs->index_for(ct_bs->addr_for(card_ptr)),
p2i(ct_bs->addr_for(card_ptr)),
g1->addr_to_region(ct_bs->addr_for(card_ptr)));
#endif
}
G1UpdateRSOrPushRefOopClosure::G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
G1ParPushHeapRSClosure* push_ref_cl,
bool record_refs_into_cset,
@ -534,23 +546,164 @@ G1UpdateRSOrPushRefOopClosure::G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1
_push_ref_cl(push_ref_cl),
_worker_i(worker_i) { }
// Returns true if the given card contains references that point
// into the collection set, if we're checking for such references;
// false otherwise.
void G1RemSet::refine_card_concurrently(jbyte* card_ptr,
uint worker_i) {
assert(!_g1->is_gc_active(), "Only call concurrently");
bool G1RemSet::refine_card(jbyte* card_ptr,
uint worker_i,
G1ParPushHeapRSClosure* oops_in_heap_closure) {
assert(_g1->is_in_exact(_ct_bs->addr_for(card_ptr)),
"Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap at " PTR_FORMAT " (%u) must be in committed heap",
p2i(card_ptr),
_ct_bs->index_for(_ct_bs->addr_for(card_ptr)),
p2i(_ct_bs->addr_for(card_ptr)),
_g1->addr_to_region(_ct_bs->addr_for(card_ptr)));
bool check_for_refs_into_cset = oops_in_heap_closure != NULL;
check_card_ptr(card_ptr, _ct_bs);
// If the card is no longer dirty, nothing to do.
if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
return;
}
// Construct the region representing the card.
HeapWord* start = _ct_bs->addr_for(card_ptr);
// And find the region containing it.
HeapRegion* r = _g1->heap_region_containing(start);
// This check is needed for some uncommon cases where we should
// ignore the card.
//
// The region could be young. Cards for young regions are
// distinctly marked (set to g1_young_gen), so the post-barrier will
// filter them out. However, that marking is performed
// concurrently. A write to a young object could occur before the
// card has been marked young, slipping past the filter.
//
// The card could be stale, because the region has been freed since
// the card was recorded. In this case the region type could be
// anything. If (still) free or (reallocated) young, just ignore
// it. If (reallocated) old or humongous, the later card trimming
// and additional checks in iteration may detect staleness. At
// worst, we end up processing a stale card unnecessarily.
//
// In the normal (non-stale) case, the synchronization between the
// enqueueing of the card and processing it here will have ensured
// we see the up-to-date region type here.
if (!r->is_old_or_humongous()) {
return;
}
// While we are processing RSet buffers during the collection, we
// actually don't want to scan any cards on the collection set,
// since we don't want to update remembered sets with entries that
// point into the collection set, given that live objects from the
// collection set are about to move and such entries will be stale
// very soon. This change also deals with a reliability issue which
// involves scanning a card in the collection set and coming across
// an array that was being chunked and looking malformed. Note,
// however, that if evacuation fails, we have to scan any objects
// that were not moved and create any missing entries.
if (r->in_collection_set()) {
return;
}
// The result from the hot card cache insert call is either:
// * pointer to the current card
// (implying that the current card is not 'hot'),
// * null
// (meaning we had inserted the card ptr into the "hot" card cache,
// which had some headroom),
// * a pointer to a "hot" card that was evicted from the "hot" cache.
//
if (_hot_card_cache->use_cache()) {
assert(!SafepointSynchronize::is_at_safepoint(), "sanity");
const jbyte* orig_card_ptr = card_ptr;
card_ptr = _hot_card_cache->insert(card_ptr);
if (card_ptr == NULL) {
// There was no eviction. Nothing to do.
return;
} else if (card_ptr != orig_card_ptr) {
// Original card was inserted and an old card was evicted.
start = _ct_bs->addr_for(card_ptr);
r = _g1->heap_region_containing(start);
// Check whether the region formerly in the cache should be
// ignored, as discussed earlier for the original card. The
// region could have been freed while in the cache. The cset is
// not relevant here, since we're in concurrent phase.
if (!r->is_old_or_humongous()) {
return;
}
} // Else we still have the original card.
}
// Trim the region designated by the card to what's been allocated
// in the region. The card could be stale, or the card could cover
// (part of) an object at the end of the allocated space and extend
// beyond the end of allocation.
// Non-humongous objects are only allocated in the old-gen during
// GC, so if region is old then top is stable. Humongous object
// allocation sets top last; if top has not yet been set, this is
// a stale card and we'll end up with an empty intersection. If
// this is not a stale card, the synchronization between the
// enqueuing of the card and processing it here will have ensured
// we see the up-to-date top here.
HeapWord* scan_limit = r->top();
if (scan_limit <= start) {
// If the trimmed region is empty, the card must be stale.
return;
}
// Okay to clean and process the card now. There are still some
// stale card cases that may be detected by iteration and dealt with
// as iteration failure.
*const_cast<volatile jbyte*>(card_ptr) = CardTableModRefBS::clean_card_val();
// This fence serves two purposes. First, the card must be cleaned
// before processing the contents. Second, we can't proceed with
// processing until after the read of top, for synchronization with
// possibly concurrent humongous object allocation. It's okay that
// reading top and reading type were racy wrto each other. We need
// both set, in any order, to proceed.
OrderAccess::fence();
// Don't use addr_for(card_ptr + 1) which can ask for
// a card beyond the heap.
HeapWord* end = start + CardTableModRefBS::card_size_in_words;
MemRegion dirty_region(start, MIN2(scan_limit, end));
assert(!dirty_region.is_empty(), "sanity");
G1ConcurrentRefineOopClosure conc_refine_cl(_g1, worker_i);
bool card_processed =
r->oops_on_card_seq_iterate_careful<false>(dirty_region, &conc_refine_cl);
// If unable to process the card then we encountered an unparsable
// part of the heap (e.g. a partially allocated object) while
// processing a stale card. Despite the card being stale, redirty
// and re-enqueue, because we've already cleaned the card. Without
// this we could incorrectly discard a non-stale card.
if (!card_processed) {
// The card might have gotten re-dirtied and re-enqueued while we
// worked. (In fact, it's pretty likely.)
if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
*card_ptr = CardTableModRefBS::dirty_card_val();
MutexLockerEx x(Shared_DirtyCardQ_lock,
Mutex::_no_safepoint_check_flag);
DirtyCardQueue* sdcq =
JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
sdcq->enqueue(card_ptr);
}
} else {
_conc_refine_cards++;
}
}
bool G1RemSet::refine_card_during_gc(jbyte* card_ptr,
uint worker_i,
G1ParPushHeapRSClosure* oops_in_heap_closure) {
assert(_g1->is_gc_active(), "Only call during GC");
check_card_ptr(card_ptr, _ct_bs);
// If the card is no longer dirty, nothing to do. This covers cards that were already
// scanned as parts of the remembered sets.
if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
// No need to return that this card contains refs that point
// into the collection set.
@ -599,59 +752,16 @@ bool G1RemSet::refine_card(jbyte* card_ptr,
return false;
}
// The result from the hot card cache insert call is either:
// * pointer to the current card
// (implying that the current card is not 'hot'),
// * null
// (meaning we had inserted the card ptr into the "hot" card cache,
// which had some headroom),
// * a pointer to a "hot" card that was evicted from the "hot" cache.
//
if (_hot_card_cache->use_cache()) {
assert(!check_for_refs_into_cset, "sanity");
assert(!SafepointSynchronize::is_at_safepoint(), "sanity");
const jbyte* orig_card_ptr = card_ptr;
card_ptr = _hot_card_cache->insert(card_ptr);
if (card_ptr == NULL) {
// There was no eviction. Nothing to do.
return false;
} else if (card_ptr != orig_card_ptr) {
// Original card was inserted and an old card was evicted.
start = _ct_bs->addr_for(card_ptr);
r = _g1->heap_region_containing(start);
// Check whether the region formerly in the cache should be
// ignored, as discussed earlier for the original card. The
// region could have been freed while in the cache. The cset is
// not relevant here, since we're in concurrent phase.
if (!r->is_old_or_humongous()) {
return false;
}
} // Else we still have the original card.
}
// Trim the region designated by the card to what's been allocated
// in the region. The card could be stale, or the card could cover
// (part of) an object at the end of the allocated space and extend
// beyond the end of allocation.
HeapWord* scan_limit;
if (_g1->is_gc_active()) {
// If we're in a STW GC, then a card might be in a GC alloc region
// and extend onto a GC LAB, which may not be parsable. Stop such
// at the "scan_top" of the region.
scan_limit = r->scan_top();
} else {
// Non-humongous objects are only allocated in the old-gen during
// GC, so if region is old then top is stable. Humongous object
// allocation sets top last; if top has not yet been set, this is
// a stale card and we'll end up with an empty intersection. If
// this is not a stale card, the synchronization between the
// enqueuing of the card and processing it here will have ensured
// we see the up-to-date top here.
scan_limit = r->top();
}
// If we're in a STW GC, then a card might be in a GC alloc region
// and extend onto a GC LAB, which may not be parsable. Stop such
// at the "scan_top" of the region.
HeapWord* scan_limit = r->scan_top();
if (scan_limit <= start) {
// If the trimmed region is empty, the card must be stale.
return false;
@ -662,14 +772,6 @@ bool G1RemSet::refine_card(jbyte* card_ptr,
// as iteration failure.
*const_cast<volatile jbyte*>(card_ptr) = CardTableModRefBS::clean_card_val();
// This fence serves two purposes. First, the card must be cleaned
// before processing the contents. Second, we can't proceed with
// processing until after the read of top, for synchronization with
// possibly concurrent humongous object allocation. It's okay that
// reading top and reading type were racy wrto each other. We need
// both set, in any order, to proceed.
OrderAccess::fence();
// Don't use addr_for(card_ptr + 1) which can ask for
// a card beyond the heap.
HeapWord* end = start + CardTableModRefBS::card_size_in_words;
@ -678,49 +780,17 @@ bool G1RemSet::refine_card(jbyte* card_ptr,
G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
oops_in_heap_closure,
check_for_refs_into_cset,
true,
worker_i);
update_rs_oop_cl.set_from(r);
bool card_processed;
if (_g1->is_gc_active()) {
card_processed = r->oops_on_card_seq_iterate_careful<true>(dirty_region, &update_rs_oop_cl);
} else {
card_processed = r->oops_on_card_seq_iterate_careful<false>(dirty_region, &update_rs_oop_cl);
}
bool card_processed =
r->oops_on_card_seq_iterate_careful<true>(dirty_region,
&update_rs_oop_cl);
assert(card_processed, "must be");
_conc_refine_cards++;
// If unable to process the card then we encountered an unparsable
// part of the heap (e.g. a partially allocated object) while
// processing a stale card. Despite the card being stale, redirty
// and re-enqueue, because we've already cleaned the card. Without
// this we could incorrectly discard a non-stale card.
if (!card_processed) {
assert(!_g1->is_gc_active(), "Unparsable heap during GC");
// The card might have gotten re-dirtied and re-enqueued while we
// worked. (In fact, it's pretty likely.)
if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
*card_ptr = CardTableModRefBS::dirty_card_val();
MutexLockerEx x(Shared_DirtyCardQ_lock,
Mutex::_no_safepoint_check_flag);
DirtyCardQueue* sdcq =
JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
sdcq->enqueue(card_ptr);
}
} else {
_conc_refine_cards++;
}
// This gets set to true if the card being refined has references that point
// into the collection set.
bool has_refs_into_cset = update_rs_oop_cl.has_refs_into_cset();
// We should only be detecting that the card contains references
// that point into the collection set if the current thread is
// a GC worker thread.
assert(!has_refs_into_cset || SafepointSynchronize::is_at_safepoint(),
"invalid result at non safepoint");
return has_refs_into_cset;
return update_rs_oop_cl.has_refs_into_cset();
}
void G1RemSet::print_periodic_summary_info(const char* header, uint period_count) {

View File

@ -146,12 +146,14 @@ public:
void scrub(uint worker_num, HeapRegionClaimer* hrclaimer);
// Refine the card corresponding to "card_ptr".
// If oops_in_heap_closure is not NULL, a true result is returned
// if the given card contains oops that have references into the
// current collection set.
bool refine_card(jbyte* card_ptr,
uint worker_i,
G1ParPushHeapRSClosure* oops_in_heap_closure);
void refine_card_concurrently(jbyte* card_ptr,
uint worker_i);
// Refine the card corresponding to "card_ptr". Returns "true" if the given card contains
// oops that have references into the current collection set.
bool refine_card_during_gc(jbyte* card_ptr,
uint worker_i,
G1ParPushHeapRSClosure* oops_in_heap_closure);
// Print accumulated summary info from the start of the VM.
void print_summary_info();

View File

@ -36,6 +36,8 @@ class G1ParScanClosure;
class G1ParPushHeapRSClosure;
class G1UpdateRSOrPushRefOopClosure;
class G1ConcurrentRefineOopClosure;
class G1CMOopClosure;
class G1RootRegionScanClosure;
@ -43,6 +45,7 @@ class G1RootRegionScanClosure;
f(G1ParScanClosure,_nv) \
f(G1ParPushHeapRSClosure,_nv) \
f(G1UpdateRSOrPushRefOopClosure,_nv) \
f(G1ConcurrentRefineOopClosure,_nv) \
f(G1CMOopClosure,_nv) \
f(G1RootRegionScanClosure,_nv)