mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-16 21:35:25 +00:00
Merge
This commit is contained in:
commit
f495cb2581
@ -664,19 +664,14 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
|
||||
return;
|
||||
}
|
||||
|
||||
// XXX use a global constant instead of 64!
|
||||
typedef struct OopTaskQueuePadded {
|
||||
OopTaskQueue work_queue;
|
||||
char pad[64 - sizeof(OopTaskQueue)]; // prevent false sharing
|
||||
} OopTaskQueuePadded;
|
||||
|
||||
typedef Padded<OopTaskQueue> PaddedOopTaskQueue;
|
||||
for (i = 0; i < num_queues; i++) {
|
||||
OopTaskQueuePadded *q_padded = new OopTaskQueuePadded();
|
||||
if (q_padded == NULL) {
|
||||
PaddedOopTaskQueue *q = new PaddedOopTaskQueue();
|
||||
if (q == NULL) {
|
||||
warning("work_queue allocation failure.");
|
||||
return;
|
||||
}
|
||||
_task_queues->register_queue(i, &q_padded->work_queue);
|
||||
_task_queues->register_queue(i, q);
|
||||
}
|
||||
for (i = 0; i < num_queues; i++) {
|
||||
_task_queues->queue(i)->initialize();
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -271,21 +271,16 @@ jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* def
|
||||
if (cas_res == prev_epoch_entry) {
|
||||
// We successfully updated the card num value in the epoch entry
|
||||
count_ptr->_count = 0; // initialize counter for new card num
|
||||
jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
|
||||
|
||||
// Even though the region containg the card at old_card_num was not
|
||||
// in the young list when old_card_num was recorded in the epoch
|
||||
// cache it could have been added to the free list and subsequently
|
||||
// added to the young list in the intervening time. If the evicted
|
||||
// card is in a young region just return the card_ptr and the evicted
|
||||
// card will not be cleaned. See CR 6817995.
|
||||
|
||||
jbyte* old_card_ptr = card_num_2_ptr(old_card_num);
|
||||
if (is_young_card(old_card_ptr)) {
|
||||
*count = 0;
|
||||
// We can defer the processing of card_ptr
|
||||
*defer = true;
|
||||
return card_ptr;
|
||||
}
|
||||
// added to the young list in the intervening time. See CR 6817995.
|
||||
// We do not deal with this case here - it will be handled in
|
||||
// HeapRegion::oops_on_card_seq_iterate_careful after it has been
|
||||
// determined that the region containing the card has been allocated
|
||||
// to, and it's safe to check the young type of the region.
|
||||
|
||||
// We do not want to defer processing of card_ptr in this case
|
||||
// (we need to refine old_card_ptr and card_ptr)
|
||||
@ -301,22 +296,22 @@ jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
|
||||
jbyte* cached_ptr = add_card_count(card_ptr, &count, defer);
|
||||
assert(cached_ptr != NULL, "bad cached card ptr");
|
||||
|
||||
if (is_young_card(cached_ptr)) {
|
||||
// The region containing cached_ptr has been freed during a clean up
|
||||
// pause, reallocated, and tagged as young.
|
||||
assert(cached_ptr != card_ptr, "shouldn't be");
|
||||
// We've just inserted a card pointer into the card count cache
|
||||
// and got back the card that we just inserted or (evicted) the
|
||||
// previous contents of that count slot.
|
||||
|
||||
// We've just inserted a new old-gen card pointer into the card count
|
||||
// cache and evicted the previous contents of that count slot.
|
||||
// The evicted card pointer has been determined to be in a young region
|
||||
// and so cannot be the newly inserted card pointer (that will be
|
||||
// in an old region).
|
||||
// The count for newly inserted card will be set to zero during the
|
||||
// insertion, so we don't want to defer the cleaning of the newly
|
||||
// inserted card pointer.
|
||||
assert(*defer == false, "deferring non-hot card");
|
||||
return NULL;
|
||||
}
|
||||
// The card we got back could be in a young region. When the
|
||||
// returned card (if evicted) was originally inserted, we had
|
||||
// determined that its containing region was not young. However
|
||||
// it is possible for the region to be freed during a cleanup
|
||||
// pause, then reallocated and tagged as young which will result
|
||||
// in the returned card residing in a young region.
|
||||
//
|
||||
// We do not deal with this case here - the change from non-young
|
||||
// to young could be observed at any time - it will be handled in
|
||||
// HeapRegion::oops_on_card_seq_iterate_careful after it has been
|
||||
// determined that the region containing the card has been allocated
|
||||
// to.
|
||||
|
||||
// The card pointer we obtained from card count cache is not hot
|
||||
// so do not store it in the cache; return it for immediate
|
||||
@ -325,7 +320,7 @@ jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
|
||||
return cached_ptr;
|
||||
}
|
||||
|
||||
// Otherwise, the pointer we got from the _card_counts is hot.
|
||||
// Otherwise, the pointer we got from the _card_counts cache is hot.
|
||||
jbyte* res = NULL;
|
||||
MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
|
||||
if (_n_hot == _hot_cache_size) {
|
||||
@ -338,17 +333,8 @@ jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) {
|
||||
if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
|
||||
_n_hot++;
|
||||
|
||||
if (res != NULL) {
|
||||
// Even though the region containg res was not in the young list
|
||||
// when it was recorded in the hot cache it could have been added
|
||||
// to the free list and subsequently added to the young list in
|
||||
// the intervening time. If res is in a young region, return NULL
|
||||
// so that res is not cleaned. See CR 6817995.
|
||||
|
||||
if (is_young_card(res)) {
|
||||
res = NULL;
|
||||
}
|
||||
}
|
||||
// The card obtained from the hot card cache could be in a young
|
||||
// region. See above on how this can happen.
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -638,6 +638,11 @@ G1CollectedHeap::attempt_allocation_slow(size_t word_size,
|
||||
|
||||
// Now retry the allocation.
|
||||
if (_cur_alloc_region != NULL) {
|
||||
if (allocated_young_region != NULL) {
|
||||
// We need to ensure that the store to top does not
|
||||
// float above the setting of the young type.
|
||||
OrderAccess::storestore();
|
||||
}
|
||||
res = _cur_alloc_region->allocate(word_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -676,9 +676,27 @@ void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i
|
||||
// We must complete this write before we do any of the reads below.
|
||||
OrderAccess::storeload();
|
||||
// And process it, being careful of unallocated portions of TLAB's.
|
||||
|
||||
// The region for the current card may be a young region. The
|
||||
// current card may have been a card that was evicted from the
|
||||
// card cache. When the card was inserted into the cache, we had
|
||||
// determined that its region was non-young. While in the cache,
|
||||
// the region may have been freed during a cleanup pause, reallocated
|
||||
// and tagged as young.
|
||||
//
|
||||
// We wish to filter out cards for such a region but the current
|
||||
// thread, if we're running conucrrently, may "see" the young type
|
||||
// change at any time (so an earlier "is_young" check may pass or
|
||||
// fail arbitrarily). We tell the iteration code to perform this
|
||||
// filtering when it has been determined that there has been an actual
|
||||
// allocation in this region and making it safe to check the young type.
|
||||
bool filter_young = true;
|
||||
|
||||
HeapWord* stop_point =
|
||||
r->oops_on_card_seq_iterate_careful(dirtyRegion,
|
||||
&filter_then_update_rs_oop_cl);
|
||||
&filter_then_update_rs_oop_cl,
|
||||
filter_young);
|
||||
|
||||
// If stop_point is non-null, then we encountered an unallocated region
|
||||
// (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the
|
||||
// card and re-enqueue: if we put off the card until a GC pause, then the
|
||||
@ -789,8 +807,14 @@ void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
|
||||
if (r == NULL) {
|
||||
assert(_g1->is_in_permanent(start), "Or else where?");
|
||||
} else {
|
||||
guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
|
||||
// Process card pointer we get back from the hot card cache
|
||||
// Checking whether the region we got back from the cache
|
||||
// is young here is inappropriate. The region could have been
|
||||
// freed, reallocated and tagged as young while in the cache.
|
||||
// Hence we could see its young type change at any time.
|
||||
//
|
||||
// Process card pointer we get back from the hot card cache. This
|
||||
// will check whether the region containing the card is young
|
||||
// _after_ checking that the region has been allocated from.
|
||||
concurrentRefineOneCard_impl(res, worker_i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -658,7 +658,8 @@ HeapRegion::object_iterate_mem_careful(MemRegion mr,
|
||||
HeapWord*
|
||||
HeapRegion::
|
||||
oops_on_card_seq_iterate_careful(MemRegion mr,
|
||||
FilterOutOfRegionClosure* cl) {
|
||||
FilterOutOfRegionClosure* cl,
|
||||
bool filter_young) {
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
|
||||
// If we're within a stop-world GC, then we might look at a card in a
|
||||
@ -672,6 +673,16 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
|
||||
if (mr.is_empty()) return NULL;
|
||||
// Otherwise, find the obj that extends onto mr.start().
|
||||
|
||||
// The intersection of the incoming mr (for the card) and the
|
||||
// allocated part of the region is non-empty. This implies that
|
||||
// we have actually allocated into this region. The code in
|
||||
// G1CollectedHeap.cpp that allocates a new region sets the
|
||||
// is_young tag on the region before allocating. Thus we
|
||||
// safely know if this region is young.
|
||||
if (is_young() && filter_young) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// We used to use "block_start_careful" here. But we're actually happy
|
||||
// to update the BOT while we do this...
|
||||
HeapWord* cur = block_start(mr.start());
|
||||
|
||||
@ -252,7 +252,7 @@ class HeapRegion: public G1OffsetTableContigSpace {
|
||||
// survivor
|
||||
};
|
||||
|
||||
YoungType _young_type;
|
||||
volatile YoungType _young_type;
|
||||
int _young_index_in_cset;
|
||||
SurvRateGroup* _surv_rate_group;
|
||||
int _age_index;
|
||||
@ -726,9 +726,12 @@ class HeapRegion: public G1OffsetTableContigSpace {
|
||||
HeapWord*
|
||||
object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
|
||||
|
||||
// In this version - if filter_young is true and the region
|
||||
// is a young region then we skip the iteration.
|
||||
HeapWord*
|
||||
oops_on_card_seq_iterate_careful(MemRegion mr,
|
||||
FilterOutOfRegionClosure* cl);
|
||||
FilterOutOfRegionClosure* cl,
|
||||
bool filter_young);
|
||||
|
||||
// The region "mr" is entirely in "this", and starts and ends at block
|
||||
// boundaries. The caller declares that all the contained blocks are
|
||||
|
||||
@ -539,10 +539,9 @@ ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level)
|
||||
guarantee(_task_queues != NULL, "task_queues allocation failure.");
|
||||
|
||||
for (uint i1 = 0; i1 < ParallelGCThreads; i1++) {
|
||||
ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded();
|
||||
guarantee(q_padded != NULL, "work_queue Allocation failure.");
|
||||
|
||||
_task_queues->register_queue(i1, &q_padded->work_queue);
|
||||
ObjToScanQueue *q = new ObjToScanQueue();
|
||||
guarantee(q != NULL, "work_queue Allocation failure.");
|
||||
_task_queues->register_queue(i1, q);
|
||||
}
|
||||
|
||||
for (uint i2 = 0; i2 < ParallelGCThreads; i2++)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -33,8 +33,8 @@ class ParEvacuateFollowersClosure;
|
||||
// but they must be here to allow ParScanClosure::do_oop_work to be defined
|
||||
// in genOopClosures.inline.hpp.
|
||||
|
||||
typedef OopTaskQueue ObjToScanQueue;
|
||||
typedef OopTaskQueueSet ObjToScanQueueSet;
|
||||
typedef Padded<OopTaskQueue> ObjToScanQueue;
|
||||
typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
|
||||
|
||||
// Enable this to get push/pop/steal stats.
|
||||
const int PAR_STATS_ENABLED = 0;
|
||||
@ -304,12 +304,6 @@ class ParNewGeneration: public DefNewGeneration {
|
||||
friend class ParEvacuateFollowersClosure;
|
||||
|
||||
private:
|
||||
// XXX use a global constant instead of 64!
|
||||
struct ObjToScanQueuePadded {
|
||||
ObjToScanQueue work_queue;
|
||||
char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing
|
||||
};
|
||||
|
||||
// The per-worker-thread work queues
|
||||
ObjToScanQueueSet* _task_queues;
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -26,7 +26,8 @@
|
||||
|
||||
class ParScanThreadState;
|
||||
class ParNewGeneration;
|
||||
typedef OopTaskQueueSet ObjToScanQueueSet;
|
||||
typedef Padded<OopTaskQueue> ObjToScanQueue;
|
||||
typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
|
||||
class ParallelTaskTerminator;
|
||||
|
||||
class ParScanClosure: public OopsInGenClosure {
|
||||
|
||||
@ -90,10 +90,7 @@ void PSPromotionManager::pre_scavenge() {
|
||||
}
|
||||
|
||||
void PSPromotionManager::post_scavenge() {
|
||||
#if PS_PM_STATS
|
||||
print_stats();
|
||||
#endif // PS_PM_STATS
|
||||
|
||||
TASKQUEUE_STATS_ONLY(if (PrintGCDetails && ParallelGCVerbose) print_stats());
|
||||
for (uint i = 0; i < ParallelGCThreads + 1; i++) {
|
||||
PSPromotionManager* manager = manager_array(i);
|
||||
if (UseDepthFirstScavengeOrder) {
|
||||
@ -105,37 +102,58 @@ void PSPromotionManager::post_scavenge() {
|
||||
}
|
||||
}
|
||||
|
||||
#if PS_PM_STATS
|
||||
#if TASKQUEUE_STATS
|
||||
void
|
||||
PSPromotionManager::print_taskqueue_stats(uint i) const {
|
||||
const TaskQueueStats& stats = depth_first() ?
|
||||
_claimed_stack_depth.stats : _claimed_stack_breadth.stats;
|
||||
tty->print("%3u ", i);
|
||||
stats.print();
|
||||
tty->cr();
|
||||
}
|
||||
|
||||
void
|
||||
PSPromotionManager::print_stats(uint i) {
|
||||
tty->print_cr("---- GC Worker %2d Stats", i);
|
||||
tty->print_cr(" total pushes %8d", _total_pushes);
|
||||
tty->print_cr(" masked pushes %8d", _masked_pushes);
|
||||
tty->print_cr(" overflow pushes %8d", _overflow_pushes);
|
||||
tty->print_cr(" max overflow length %8d", _max_overflow_length);
|
||||
tty->print_cr("");
|
||||
tty->print_cr(" arrays chunked %8d", _arrays_chunked);
|
||||
tty->print_cr(" array chunks processed %8d", _array_chunks_processed);
|
||||
tty->print_cr("");
|
||||
tty->print_cr(" total steals %8d", _total_steals);
|
||||
tty->print_cr(" masked steals %8d", _masked_steals);
|
||||
tty->print_cr("");
|
||||
PSPromotionManager::print_local_stats(uint i) const {
|
||||
#define FMT " " SIZE_FORMAT_W(10)
|
||||
tty->print_cr("%3u" FMT FMT FMT FMT, i, _masked_pushes, _masked_steals,
|
||||
_arrays_chunked, _array_chunks_processed);
|
||||
#undef FMT
|
||||
}
|
||||
|
||||
static const char* const pm_stats_hdr[] = {
|
||||
" --------masked------- arrays array",
|
||||
"thr push steal chunked chunks",
|
||||
"--- ---------- ---------- ---------- ----------"
|
||||
};
|
||||
|
||||
void
|
||||
PSPromotionManager::print_stats() {
|
||||
tty->print_cr("== GC Tasks Stats (%s), GC %3d",
|
||||
(UseDepthFirstScavengeOrder) ? "Depth-First" : "Breadth-First",
|
||||
const bool df = UseDepthFirstScavengeOrder;
|
||||
tty->print_cr("== GC Task Stats (%s-First), GC %3d", df ? "Depth" : "Breadth",
|
||||
Universe::heap()->total_collections());
|
||||
|
||||
for (uint i = 0; i < ParallelGCThreads+1; ++i) {
|
||||
PSPromotionManager* manager = manager_array(i);
|
||||
manager->print_stats(i);
|
||||
tty->print("thr "); TaskQueueStats::print_header(1); tty->cr();
|
||||
tty->print("--- "); TaskQueueStats::print_header(2); tty->cr();
|
||||
for (uint i = 0; i < ParallelGCThreads + 1; ++i) {
|
||||
manager_array(i)->print_taskqueue_stats(i);
|
||||
}
|
||||
|
||||
const uint hlines = sizeof(pm_stats_hdr) / sizeof(pm_stats_hdr[0]);
|
||||
for (uint i = 0; i < hlines; ++i) tty->print_cr(pm_stats_hdr[i]);
|
||||
for (uint i = 0; i < ParallelGCThreads + 1; ++i) {
|
||||
manager_array(i)->print_local_stats(i);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // PS_PM_STATS
|
||||
void
|
||||
PSPromotionManager::reset_stats() {
|
||||
TaskQueueStats& stats = depth_first() ?
|
||||
claimed_stack_depth()->stats : claimed_stack_breadth()->stats;
|
||||
stats.reset();
|
||||
_masked_pushes = _masked_steals = 0;
|
||||
_arrays_chunked = _array_chunks_processed = 0;
|
||||
}
|
||||
#endif // TASKQUEUE_STATS
|
||||
|
||||
PSPromotionManager::PSPromotionManager() {
|
||||
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
|
||||
@ -189,16 +207,7 @@ void PSPromotionManager::reset() {
|
||||
|
||||
_prefetch_queue.clear();
|
||||
|
||||
#if PS_PM_STATS
|
||||
_total_pushes = 0;
|
||||
_masked_pushes = 0;
|
||||
_overflow_pushes = 0;
|
||||
_max_overflow_length = 0;
|
||||
_arrays_chunked = 0;
|
||||
_array_chunks_processed = 0;
|
||||
_total_steals = 0;
|
||||
_masked_steals = 0;
|
||||
#endif // PS_PM_STATS
|
||||
TASKQUEUE_STATS_ONLY(reset_stats());
|
||||
}
|
||||
|
||||
|
||||
@ -423,14 +432,9 @@ oop PSPromotionManager::copy_to_survivor_space(oop o, bool depth_first) {
|
||||
new_obj->is_objArray() &&
|
||||
PSChunkLargeArrays) {
|
||||
// we'll chunk it
|
||||
#if PS_PM_STATS
|
||||
++_arrays_chunked;
|
||||
#endif // PS_PM_STATS
|
||||
oop* const masked_o = mask_chunked_array_oop(o);
|
||||
push_depth(masked_o);
|
||||
#if PS_PM_STATS
|
||||
++_masked_pushes;
|
||||
#endif // PS_PM_STATS
|
||||
TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
|
||||
} else {
|
||||
// we'll just push its contents
|
||||
new_obj->push_contents(this);
|
||||
@ -494,9 +498,7 @@ void PSPromotionManager::process_array_chunk(oop old) {
|
||||
assert(old->is_objArray(), "invariant");
|
||||
assert(old->is_forwarded(), "invariant");
|
||||
|
||||
#if PS_PM_STATS
|
||||
++_array_chunks_processed;
|
||||
#endif // PS_PM_STATS
|
||||
TASKQUEUE_STATS_ONLY(++_array_chunks_processed);
|
||||
|
||||
oop const obj = old->forwardee();
|
||||
|
||||
@ -508,9 +510,7 @@ void PSPromotionManager::process_array_chunk(oop old) {
|
||||
assert(start > 0, "invariant");
|
||||
arrayOop(old)->set_length(start);
|
||||
push_depth(mask_chunked_array_oop(old));
|
||||
#if PS_PM_STATS
|
||||
++_masked_pushes;
|
||||
#endif // PS_PM_STATS
|
||||
TASKQUEUE_STATS_ONLY(++_masked_pushes);
|
||||
} else {
|
||||
// this is the final chunk for this array
|
||||
start = 0;
|
||||
|
||||
@ -42,8 +42,6 @@ class MutableSpace;
|
||||
class PSOldGen;
|
||||
class ParCompactionManager;
|
||||
|
||||
#define PS_PM_STATS 0
|
||||
|
||||
class PSPromotionManager : public CHeapObj {
|
||||
friend class PSScavenge;
|
||||
friend class PSRefProcTaskExecutor;
|
||||
@ -54,22 +52,18 @@ class PSPromotionManager : public CHeapObj {
|
||||
static PSOldGen* _old_gen;
|
||||
static MutableSpace* _young_space;
|
||||
|
||||
#if PS_PM_STATS
|
||||
uint _total_pushes;
|
||||
uint _masked_pushes;
|
||||
#if TASKQUEUE_STATS
|
||||
size_t _masked_pushes;
|
||||
size_t _masked_steals;
|
||||
size_t _arrays_chunked;
|
||||
size_t _array_chunks_processed;
|
||||
|
||||
uint _overflow_pushes;
|
||||
uint _max_overflow_length;
|
||||
|
||||
uint _arrays_chunked;
|
||||
uint _array_chunks_processed;
|
||||
|
||||
uint _total_steals;
|
||||
uint _masked_steals;
|
||||
|
||||
void print_stats(uint i);
|
||||
void print_taskqueue_stats(uint i) const;
|
||||
void print_local_stats(uint i) const;
|
||||
static void print_stats();
|
||||
#endif // PS_PM_STATS
|
||||
|
||||
void reset_stats();
|
||||
#endif // TASKQUEUE_STATS
|
||||
|
||||
PSYoungPromotionLAB _young_lab;
|
||||
PSOldPromotionLAB _old_lab;
|
||||
@ -143,42 +137,12 @@ class PSPromotionManager : public CHeapObj {
|
||||
|
||||
template <class T> void push_depth(T* p) {
|
||||
assert(depth_first(), "pre-condition");
|
||||
|
||||
#if PS_PM_STATS
|
||||
++_total_pushes;
|
||||
int stack_length = claimed_stack_depth()->overflow_stack()->length();
|
||||
#endif // PS_PM_STATS
|
||||
|
||||
claimed_stack_depth()->push(p);
|
||||
|
||||
#if PS_PM_STATS
|
||||
if (claimed_stack_depth()->overflow_stack()->length() != stack_length) {
|
||||
++_overflow_pushes;
|
||||
if ((uint)stack_length + 1 > _max_overflow_length) {
|
||||
_max_overflow_length = (uint)stack_length + 1;
|
||||
}
|
||||
}
|
||||
#endif // PS_PM_STATS
|
||||
}
|
||||
|
||||
void push_breadth(oop o) {
|
||||
assert(!depth_first(), "pre-condition");
|
||||
|
||||
#if PS_PM_STATS
|
||||
++_total_pushes;
|
||||
int stack_length = claimed_stack_breadth()->overflow_stack()->length();
|
||||
#endif // PS_PM_STATS
|
||||
|
||||
claimed_stack_breadth()->push(o);
|
||||
|
||||
#if PS_PM_STATS
|
||||
if (claimed_stack_breadth()->overflow_stack()->length() != stack_length) {
|
||||
++_overflow_pushes;
|
||||
if ((uint)stack_length + 1 > _max_overflow_length) {
|
||||
_max_overflow_length = (uint)stack_length + 1;
|
||||
}
|
||||
}
|
||||
#endif // PS_PM_STATS
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -256,12 +220,5 @@ class PSPromotionManager : public CHeapObj {
|
||||
template <class T> inline void claim_or_forward_depth(T* p);
|
||||
template <class T> inline void claim_or_forward_breadth(T* p);
|
||||
|
||||
#if PS_PM_STATS
|
||||
void increment_steals(oop* p = NULL) {
|
||||
_total_steals += 1;
|
||||
if (p != NULL && is_oop_masked(p)) {
|
||||
_masked_steals += 1;
|
||||
}
|
||||
}
|
||||
#endif // PS_PM_STATS
|
||||
TASKQUEUE_STATS_ONLY(inline void record_steal(StarTask& p);)
|
||||
};
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -124,3 +124,11 @@ inline void PSPromotionManager::process_popped_location_depth(StarTask p) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if TASKQUEUE_STATS
|
||||
void PSPromotionManager::record_steal(StarTask& p) {
|
||||
if (is_oop_masked(p)) {
|
||||
++_masked_steals;
|
||||
}
|
||||
}
|
||||
#endif // TASKQUEUE_STATS
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -148,9 +148,7 @@ void StealTask::do_it(GCTaskManager* manager, uint which) {
|
||||
while(true) {
|
||||
StarTask p;
|
||||
if (PSPromotionManager::steal_depth(which, &random_seed, p)) {
|
||||
#if PS_PM_STATS
|
||||
pm->increment_steals(p);
|
||||
#endif // PS_PM_STATS
|
||||
TASKQUEUE_STATS_ONLY(pm->record_steal(p));
|
||||
pm->process_popped_location_depth(p);
|
||||
pm->drain_stacks_depth(true);
|
||||
} else {
|
||||
@ -163,9 +161,6 @@ void StealTask::do_it(GCTaskManager* manager, uint which) {
|
||||
while(true) {
|
||||
oop obj;
|
||||
if (PSPromotionManager::steal_breadth(which, &random_seed, obj)) {
|
||||
#if PS_PM_STATS
|
||||
pm->increment_steals();
|
||||
#endif // PS_PM_STATS
|
||||
obj->copy_contents(pm);
|
||||
pm->drain_stacks_breadth(true);
|
||||
} else {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -345,6 +345,35 @@ inline intptr_t align_object_offset(intptr_t offset) {
|
||||
return align_size_up(offset, HeapWordsPerLong);
|
||||
}
|
||||
|
||||
// The expected size in bytes of a cache line, used to pad data structures.
|
||||
#define DEFAULT_CACHE_LINE_SIZE 64
|
||||
|
||||
// Bytes needed to pad type to avoid cache-line sharing; alignment should be the
|
||||
// expected cache line size (a power of two). The first addend avoids sharing
|
||||
// when the start address is not a multiple of alignment; the second maintains
|
||||
// alignment of starting addresses that happen to be a multiple.
|
||||
#define PADDING_SIZE(type, alignment) \
|
||||
((alignment) + align_size_up_(sizeof(type), alignment))
|
||||
|
||||
// Templates to create a subclass padded to avoid cache line sharing. These are
|
||||
// effective only when applied to derived-most (leaf) classes.
|
||||
|
||||
// When no args are passed to the base ctor.
|
||||
template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
|
||||
class Padded: public T {
|
||||
private:
|
||||
char _pad_buf_[PADDING_SIZE(T, alignment)];
|
||||
};
|
||||
|
||||
// When either 0 or 1 args may be passed to the base ctor.
|
||||
template <class T, typename Arg1T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
|
||||
class Padded01: public T {
|
||||
public:
|
||||
Padded01(): T() { }
|
||||
Padded01(Arg1T arg1): T(arg1) { }
|
||||
private:
|
||||
char _pad_buf_[PADDING_SIZE(T, alignment)];
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
// Utility macros for compilers
|
||||
|
||||
@ -31,6 +31,48 @@ uint ParallelTaskTerminator::_total_spins = 0;
|
||||
uint ParallelTaskTerminator::_total_peeks = 0;
|
||||
#endif
|
||||
|
||||
#if TASKQUEUE_STATS
|
||||
const char * const TaskQueueStats::_names[last_stat_id] = {
|
||||
"qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax"
|
||||
};
|
||||
|
||||
void TaskQueueStats::print_header(unsigned int line, outputStream* const stream,
|
||||
unsigned int width)
|
||||
{
|
||||
// Use a width w: 1 <= w <= max_width
|
||||
const unsigned int max_width = 40;
|
||||
const unsigned int w = MAX2(MIN2(width, max_width), 1U);
|
||||
|
||||
if (line == 0) { // spaces equal in width to the header
|
||||
const unsigned int hdr_width = w * last_stat_id + last_stat_id - 1;
|
||||
stream->print("%*s", hdr_width, " ");
|
||||
} else if (line == 1) { // labels
|
||||
stream->print("%*s", w, _names[0]);
|
||||
for (unsigned int i = 1; i < last_stat_id; ++i) {
|
||||
stream->print(" %*s", w, _names[i]);
|
||||
}
|
||||
} else if (line == 2) { // dashed lines
|
||||
char dashes[max_width + 1];
|
||||
memset(dashes, '-', w);
|
||||
dashes[w] = '\0';
|
||||
stream->print("%s", dashes);
|
||||
for (unsigned int i = 1; i < last_stat_id; ++i) {
|
||||
stream->print(" %s", dashes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskQueueStats::print(outputStream* stream, unsigned int width) const
|
||||
{
|
||||
#define FMT SIZE_FORMAT_W(*)
|
||||
stream->print(FMT, width, _stats[0]);
|
||||
for (unsigned int i = 1; i < last_stat_id; ++i) {
|
||||
stream->print(" " FMT, width, _stats[i]);
|
||||
}
|
||||
#undef FMT
|
||||
}
|
||||
#endif // TASKQUEUE_STATS
|
||||
|
||||
int TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
|
||||
const int a = 16807;
|
||||
const int m = 2147483647;
|
||||
|
||||
@ -22,6 +22,72 @@
|
||||
*
|
||||
*/
|
||||
|
||||
// Simple TaskQueue stats that are collected by default in debug builds.
|
||||
|
||||
#if !defined(TASKQUEUE_STATS) && defined(ASSERT)
|
||||
#define TASKQUEUE_STATS 1
|
||||
#elif !defined(TASKQUEUE_STATS)
|
||||
#define TASKQUEUE_STATS 0
|
||||
#endif
|
||||
|
||||
#if TASKQUEUE_STATS
|
||||
#define TASKQUEUE_STATS_ONLY(code) code
|
||||
#else
|
||||
#define TASKQUEUE_STATS_ONLY(code)
|
||||
#endif // TASKQUEUE_STATS
|
||||
|
||||
#if TASKQUEUE_STATS
|
||||
class TaskQueueStats {
|
||||
public:
|
||||
enum StatId {
|
||||
push, // number of taskqueue pushes
|
||||
pop, // number of taskqueue pops
|
||||
pop_slow, // subset of taskqueue pops that were done slow-path
|
||||
steal_attempt, // number of taskqueue steal attempts
|
||||
steal, // number of taskqueue steals
|
||||
overflow, // number of overflow pushes
|
||||
overflow_max_len, // max length of overflow stack
|
||||
last_stat_id
|
||||
};
|
||||
|
||||
public:
|
||||
inline TaskQueueStats() { reset(); }
|
||||
|
||||
inline void record_push() { ++_stats[push]; }
|
||||
inline void record_pop() { ++_stats[pop]; }
|
||||
inline void record_pop_slow() { record_pop(); ++_stats[pop_slow]; }
|
||||
inline void record_steal(bool success);
|
||||
inline void record_overflow(size_t new_length);
|
||||
|
||||
inline size_t get(StatId id) const { return _stats[id]; }
|
||||
inline const size_t* get() const { return _stats; }
|
||||
|
||||
inline void reset();
|
||||
|
||||
static void print_header(unsigned int line, outputStream* const stream = tty,
|
||||
unsigned int width = 10);
|
||||
void print(outputStream* const stream = tty, unsigned int width = 10) const;
|
||||
|
||||
private:
|
||||
size_t _stats[last_stat_id];
|
||||
static const char * const _names[last_stat_id];
|
||||
};
|
||||
|
||||
void TaskQueueStats::record_steal(bool success) {
|
||||
++_stats[steal_attempt];
|
||||
if (success) ++_stats[steal];
|
||||
}
|
||||
|
||||
void TaskQueueStats::record_overflow(size_t new_len) {
|
||||
++_stats[overflow];
|
||||
if (new_len > _stats[overflow_max_len]) _stats[overflow_max_len] = new_len;
|
||||
}
|
||||
|
||||
void TaskQueueStats::reset() {
|
||||
memset(_stats, 0, sizeof(_stats));
|
||||
}
|
||||
#endif // TASKQUEUE_STATS
|
||||
|
||||
template <unsigned int N>
|
||||
class TaskQueueSuper: public CHeapObj {
|
||||
protected:
|
||||
@ -135,6 +201,8 @@ public:
|
||||
|
||||
// Total size of queue.
|
||||
static const uint total_size() { return N; }
|
||||
|
||||
TASKQUEUE_STATS_ONLY(TaskQueueStats stats;)
|
||||
};
|
||||
|
||||
template<class E, unsigned int N = TASKQUEUE_SIZE>
|
||||
@ -152,6 +220,7 @@ protected:
|
||||
public:
|
||||
using TaskQueueSuper<N>::max_elems;
|
||||
using TaskQueueSuper<N>::size;
|
||||
TASKQUEUE_STATS_ONLY(using TaskQueueSuper<N>::stats;)
|
||||
|
||||
private:
|
||||
// Slow paths for push, pop_local. (pop_global has no fast path.)
|
||||
@ -224,14 +293,14 @@ bool GenericTaskQueue<E, N>::push_slow(E t, uint dirty_n_elems) {
|
||||
// g++ complains if the volatile result of the assignment is unused.
|
||||
const_cast<E&>(_elems[localBot] = t);
|
||||
OrderAccess::release_store(&_bottom, increment_index(localBot));
|
||||
TASKQUEUE_STATS_ONLY(stats.record_push());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class E, unsigned int N>
|
||||
bool GenericTaskQueue<E, N>::
|
||||
pop_local_slow(uint localBot, Age oldAge) {
|
||||
bool GenericTaskQueue<E, N>::pop_local_slow(uint localBot, Age oldAge) {
|
||||
// This queue was observed to contain exactly one element; either this
|
||||
// thread will claim it, or a competing "pop_global". In either case,
|
||||
// the queue will be logically empty afterwards. Create a new Age value
|
||||
@ -251,6 +320,7 @@ pop_local_slow(uint localBot, Age oldAge) {
|
||||
if (tempAge == oldAge) {
|
||||
// We win.
|
||||
assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
|
||||
TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -306,6 +376,8 @@ public:
|
||||
typedef GrowableArray<E> overflow_t;
|
||||
typedef GenericTaskQueue<E, N> taskqueue_t;
|
||||
|
||||
TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;)
|
||||
|
||||
OverflowTaskQueue();
|
||||
~OverflowTaskQueue();
|
||||
void initialize();
|
||||
@ -356,6 +428,7 @@ bool OverflowTaskQueue<E, N>::push(E t)
|
||||
{
|
||||
if (!taskqueue_t::push(t)) {
|
||||
overflow_stack()->push(t);
|
||||
TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->length()));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -424,9 +497,13 @@ GenericTaskQueueSet<T>::queue(uint i) {
|
||||
|
||||
template<class T> bool
|
||||
GenericTaskQueueSet<T>::steal(uint queue_num, int* seed, E& t) {
|
||||
for (uint i = 0; i < 2 * _n; i++)
|
||||
if (steal_best_of_2(queue_num, seed, t))
|
||||
for (uint i = 0; i < 2 * _n; i++) {
|
||||
if (steal_best_of_2(queue_num, seed, t)) {
|
||||
TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -574,6 +651,7 @@ GenericTaskQueue<E, N>::push(E t) {
|
||||
// g++ complains if the volatile result of the assignment is unused.
|
||||
const_cast<E&>(_elems[localBot] = t);
|
||||
OrderAccess::release_store(&_bottom, increment_index(localBot));
|
||||
TASKQUEUE_STATS_ONLY(stats.record_push());
|
||||
return true;
|
||||
} else {
|
||||
return push_slow(t, dirty_n_elems);
|
||||
@ -603,6 +681,7 @@ GenericTaskQueue<E, N>::pop_local(E& t) {
|
||||
idx_t tp = _age.top(); // XXX
|
||||
if (size(localBot, tp) > 0) {
|
||||
assert(dirty_size(localBot, tp) != N - 1, "sanity");
|
||||
TASKQUEUE_STATS_ONLY(stats.record_pop());
|
||||
return true;
|
||||
} else {
|
||||
// Otherwise, the queue contained exactly one element; we take the slow
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user