mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8231153: Improve concurrent refinement statistics
8043505: G1RemSet::_conc_refine_cards is incremented unsynchronized Added refinement rates and counts, removed _conc_refine_cards Reviewed-by: tschatzl, sjohanss
This commit is contained in:
parent
8edf64d236
commit
d7fcd0ccac
@ -78,6 +78,8 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
|
||||
_alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
|
||||
_prev_collection_pause_end_ms(0.0),
|
||||
_rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
|
||||
_concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
|
||||
_logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
|
||||
_cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
|
||||
_cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
|
||||
_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
|
||||
@ -102,6 +104,10 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
|
||||
int index = MIN2(ParallelGCThreads - 1, 7u);
|
||||
|
||||
_rs_length_diff_seq->add(rs_length_diff_defaults[index]);
|
||||
// Start with inverse of maximum STW cost.
|
||||
_concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
|
||||
// Some applications have very low rates for logging cards.
|
||||
_logged_cards_rate_ms_seq->add(0.0);
|
||||
_cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]);
|
||||
_cost_scan_hcc_seq->add(0.0);
|
||||
_young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
|
||||
@ -159,6 +165,14 @@ void G1Analytics::compute_pause_time_ratio(double interval_ms, double pause_time
|
||||
(pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms;
|
||||
}
|
||||
|
||||
void G1Analytics::report_concurrent_refine_rate_ms(double cards_per_ms) {
|
||||
_concurrent_refine_rate_ms_seq->add(cards_per_ms);
|
||||
}
|
||||
|
||||
void G1Analytics::report_logged_cards_rate_ms(double cards_per_ms) {
|
||||
_logged_cards_rate_ms_seq->add(cards_per_ms);
|
||||
}
|
||||
|
||||
void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) {
|
||||
_cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms);
|
||||
}
|
||||
@ -223,6 +237,14 @@ double G1Analytics::predict_alloc_rate_ms() const {
|
||||
return get_new_prediction(_alloc_rate_ms_seq);
|
||||
}
|
||||
|
||||
double G1Analytics::predict_concurrent_refine_rate_ms() const {
|
||||
return get_new_prediction(_concurrent_refine_rate_ms_seq);
|
||||
}
|
||||
|
||||
double G1Analytics::predict_logged_cards_rate_ms() const {
|
||||
return get_new_prediction(_logged_cards_rate_ms_seq);
|
||||
}
|
||||
|
||||
double G1Analytics::predict_cost_per_logged_card_ms() const {
|
||||
return get_new_prediction(_cost_per_logged_card_ms_seq);
|
||||
}
|
||||
|
||||
@ -46,6 +46,8 @@ class G1Analytics: public CHeapObj<mtGC> {
|
||||
double _prev_collection_pause_end_ms;
|
||||
|
||||
TruncatedSeq* _rs_length_diff_seq;
|
||||
TruncatedSeq* _concurrent_refine_rate_ms_seq;
|
||||
TruncatedSeq* _logged_cards_rate_ms_seq;
|
||||
TruncatedSeq* _cost_per_logged_card_ms_seq;
|
||||
TruncatedSeq* _cost_scan_hcc_seq;
|
||||
TruncatedSeq* _young_cards_per_entry_ratio_seq;
|
||||
@ -99,6 +101,8 @@ public:
|
||||
void report_concurrent_mark_remark_times_ms(double ms);
|
||||
void report_concurrent_mark_cleanup_times_ms(double ms);
|
||||
void report_alloc_rate_ms(double alloc_rate);
|
||||
void report_concurrent_refine_rate_ms(double cards_per_ms);
|
||||
void report_logged_cards_rate_ms(double cards_per_ms);
|
||||
void report_cost_per_logged_card_ms(double cost_per_logged_card_ms);
|
||||
void report_cost_scan_hcc(double cost_scan_hcc);
|
||||
void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
|
||||
@ -116,6 +120,8 @@ public:
|
||||
double predict_alloc_rate_ms() const;
|
||||
int num_alloc_rate_ms() const;
|
||||
|
||||
double predict_concurrent_refine_rate_ms() const;
|
||||
double predict_logged_cards_rate_ms() const;
|
||||
double predict_cost_per_logged_card_ms() const;
|
||||
|
||||
double predict_scan_hcc_ms() const;
|
||||
|
||||
@ -409,7 +409,7 @@ double G1CollectionSet::finalize_young_part(double target_pause_time_ms, G1Survi
|
||||
guarantee(target_pause_time_ms > 0.0,
|
||||
"target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
|
||||
|
||||
size_t pending_cards = _policy->pending_cards();
|
||||
size_t pending_cards = _policy->pending_cards_at_gc_start();
|
||||
double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards);
|
||||
double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
|
||||
|
||||
|
||||
@ -412,6 +412,22 @@ void G1ConcurrentRefine::adjust(double logged_cards_scan_time,
|
||||
dcqs.notify_if_necessary();
|
||||
}
|
||||
|
||||
G1ConcurrentRefine::RefinementStats G1ConcurrentRefine::total_refinement_stats() const {
|
||||
struct CollectData : public ThreadClosure {
|
||||
Tickspan _total_time;
|
||||
size_t _total_cards;
|
||||
CollectData() : _total_time(), _total_cards(0) {}
|
||||
virtual void do_thread(Thread* t) {
|
||||
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
|
||||
_total_time += crt->total_refinement_time();
|
||||
_total_cards += crt->total_refined_cards();
|
||||
}
|
||||
} collector;
|
||||
// Cast away const so we can call non-modifying closure on threads.
|
||||
const_cast<G1ConcurrentRefine*>(this)->threads_do(&collector);
|
||||
return RefinementStats(collector._total_time, collector._total_cards);
|
||||
}
|
||||
|
||||
size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const {
|
||||
Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id);
|
||||
return activation_level(thresholds);
|
||||
@ -432,7 +448,8 @@ void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_
|
||||
}
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefine::do_refinement_step(uint worker_id) {
|
||||
bool G1ConcurrentRefine::do_refinement_step(uint worker_id,
|
||||
size_t* total_refined_cards) {
|
||||
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
|
||||
|
||||
size_t curr_cards = dcqs.num_cards();
|
||||
@ -448,5 +465,6 @@ bool G1ConcurrentRefine::do_refinement_step(uint worker_id) {
|
||||
|
||||
// Process the next buffer, if there are enough left.
|
||||
return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(),
|
||||
deactivation_threshold(worker_id));
|
||||
deactivation_threshold(worker_id),
|
||||
total_refined_cards);
|
||||
}
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
// Forward decl
|
||||
class G1ConcurrentRefine;
|
||||
@ -118,11 +119,22 @@ public:
|
||||
// Adjust refinement thresholds based on work done during the pause and the goal time.
|
||||
void adjust(double logged_cards_scan_time, size_t processed_logged_cards, double goal_ms);
|
||||
|
||||
struct RefinementStats {
|
||||
Tickspan _time;
|
||||
size_t _cards;
|
||||
RefinementStats(Tickspan time, size_t cards) : _time(time), _cards(cards) {}
|
||||
};
|
||||
|
||||
RefinementStats total_refinement_stats() const;
|
||||
|
||||
// Cards in the dirty card queue set.
|
||||
size_t activation_threshold(uint worker_id) const;
|
||||
size_t deactivation_threshold(uint worker_id) const;
|
||||
// Perform a single refinement step. Called by the refinement threads when woken up.
|
||||
bool do_refinement_step(uint worker_id);
|
||||
|
||||
// Perform a single refinement step; called by the refinement
|
||||
// threads. Returns true if there was refinement work available.
|
||||
// Increments *total_refined_cards.
|
||||
bool do_refinement_step(uint worker_id, size_t* total_refined_cards);
|
||||
|
||||
// Iterate over all concurrent refinement threads applying the given closure.
|
||||
void threads_do(ThreadClosure *tc);
|
||||
|
||||
@ -37,6 +37,8 @@ G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint
|
||||
ConcurrentGCThread(),
|
||||
_vtime_start(0.0),
|
||||
_vtime_accum(0.0),
|
||||
_total_refinement_time(),
|
||||
_total_refined_cards(0),
|
||||
_worker_id(worker_id),
|
||||
_active(false),
|
||||
_monitor(NULL),
|
||||
@ -101,11 +103,12 @@ void G1ConcurrentRefineThread::run_service() {
|
||||
break;
|
||||
}
|
||||
|
||||
size_t buffers_processed = 0;
|
||||
log_debug(gc, refine)("Activated worker %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
|
||||
_worker_id, _cr->activation_threshold(_worker_id),
|
||||
G1BarrierSet::dirty_card_queue_set().num_cards());
|
||||
|
||||
size_t start_total_refined_cards = _total_refined_cards; // For logging.
|
||||
|
||||
{
|
||||
SuspendibleThreadSetJoiner sts_join;
|
||||
|
||||
@ -115,20 +118,22 @@ void G1ConcurrentRefineThread::run_service() {
|
||||
continue; // Re-check for termination after yield delay.
|
||||
}
|
||||
|
||||
if (!_cr->do_refinement_step(_worker_id)) {
|
||||
break;
|
||||
Ticks start_time = Ticks::now();
|
||||
if (!_cr->do_refinement_step(_worker_id, &_total_refined_cards)) {
|
||||
break; // No cards to process.
|
||||
}
|
||||
++buffers_processed;
|
||||
_total_refinement_time += (Ticks::now() - start_time);
|
||||
}
|
||||
}
|
||||
|
||||
deactivate();
|
||||
log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT
|
||||
", current: " SIZE_FORMAT ", buffers processed: "
|
||||
SIZE_FORMAT,
|
||||
", current: " SIZE_FORMAT ", refined cards: "
|
||||
SIZE_FORMAT ", total refined cards: " SIZE_FORMAT,
|
||||
_worker_id, _cr->deactivation_threshold(_worker_id),
|
||||
G1BarrierSet::dirty_card_queue_set().num_cards(),
|
||||
buffers_processed);
|
||||
_total_refined_cards - start_total_refined_cards,
|
||||
_total_refined_cards);
|
||||
|
||||
if (os::supports_vtime()) {
|
||||
_vtime_accum = (os::elapsedVTime() - _vtime_start);
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
#define SHARE_GC_G1_G1CONCURRENTREFINETHREAD_HPP
|
||||
|
||||
#include "gc/shared/concurrentGCThread.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
// Forward Decl.
|
||||
class G1ConcurrentRefine;
|
||||
@ -38,6 +39,10 @@ class G1ConcurrentRefineThread: public ConcurrentGCThread {
|
||||
|
||||
double _vtime_start; // Initial virtual time.
|
||||
double _vtime_accum; // Accumulated virtual time.
|
||||
|
||||
Tickspan _total_refinement_time;
|
||||
size_t _total_refined_cards;
|
||||
|
||||
uint _worker_id;
|
||||
|
||||
bool _active;
|
||||
@ -61,6 +66,9 @@ public:
|
||||
// Activate this thread.
|
||||
void activate();
|
||||
|
||||
Tickspan total_refinement_time() const { return _total_refinement_time; }
|
||||
size_t total_refined_cards() const { return _total_refined_cards; }
|
||||
|
||||
// Total virtual time so far.
|
||||
double vtime_accum() { return _vtime_accum; }
|
||||
};
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
#include "runtime/atomic.hpp"
|
||||
#include "runtime/flags/flagSetting.hpp"
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
#include "runtime/thread.inline.hpp"
|
||||
#include "runtime/threadSMR.hpp"
|
||||
@ -62,6 +63,9 @@ void G1DirtyCardQueue::handle_completed_buffer() {
|
||||
}
|
||||
}
|
||||
|
||||
// Assumed to be zero by concurrent threads.
|
||||
static uint par_ids_start() { return 0; }
|
||||
|
||||
G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon,
|
||||
BufferNode::Allocator* allocator) :
|
||||
PtrQueueSet(allocator),
|
||||
@ -73,15 +77,16 @@ G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon,
|
||||
_process_completed_buffers(false),
|
||||
_max_cards(MaxCardsUnlimited),
|
||||
_max_cards_padding(0),
|
||||
_free_ids(0, num_par_ids()),
|
||||
_processed_buffers_mut(0),
|
||||
_processed_buffers_rs_thread(0)
|
||||
_free_ids(par_ids_start(), num_par_ids()),
|
||||
_mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
|
||||
{
|
||||
::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
|
||||
_all_active = true;
|
||||
}
|
||||
|
||||
G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
|
||||
abandon_completed_buffers();
|
||||
FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
|
||||
}
|
||||
|
||||
// Determines how many mutator threads can process the buffers in parallel.
|
||||
@ -89,6 +94,14 @@ uint G1DirtyCardQueueSet::num_par_ids() {
|
||||
return (uint)os::initial_active_processor_count();
|
||||
}
|
||||
|
||||
size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
|
||||
size_t sum = 0;
|
||||
for (uint i = 0; i < num_par_ids(); ++i) {
|
||||
sum += _mutator_refined_cards_counters[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
|
||||
G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
|
||||
}
|
||||
@ -213,7 +226,9 @@ G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
|
||||
return result;
|
||||
}
|
||||
|
||||
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id) {
|
||||
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
|
||||
uint worker_id,
|
||||
size_t* total_refined_cards) {
|
||||
G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
|
||||
size_t size = buffer_size();
|
||||
void** buffer = BufferNode::make_buffer_from_node(node);
|
||||
@ -223,6 +238,7 @@ bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id) {
|
||||
CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
|
||||
rem_set->refine_card_concurrently(cp, worker_id);
|
||||
}
|
||||
*total_refined_cards += (i - node->index());
|
||||
node->set_index(i);
|
||||
return i == size;
|
||||
}
|
||||
@ -260,25 +276,27 @@ bool G1DirtyCardQueueSet::process_or_enqueue_completed_buffer(BufferNode* node)
|
||||
|
||||
bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
|
||||
uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
|
||||
bool result = refine_buffer(node, worker_id);
|
||||
uint counter_index = worker_id - par_ids_start();
|
||||
size_t* counter = &_mutator_refined_cards_counters[counter_index];
|
||||
bool result = refine_buffer(node, worker_id, counter);
|
||||
_free_ids.release_par_id(worker_id); // release the id
|
||||
|
||||
if (result) {
|
||||
assert_fully_consumed(node, buffer_size());
|
||||
Atomic::inc(&_processed_buffers_mut);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, size_t stop_at) {
|
||||
bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
|
||||
size_t stop_at,
|
||||
size_t* total_refined_cards) {
|
||||
BufferNode* node = get_completed_buffer(stop_at);
|
||||
if (node == NULL) {
|
||||
return false;
|
||||
} else if (refine_buffer(node, worker_id)) {
|
||||
} else if (refine_buffer(node, worker_id, total_refined_cards)) {
|
||||
assert_fully_consumed(node, buffer_size());
|
||||
// Done with fully processed buffer.
|
||||
deallocate_buffer(node);
|
||||
Atomic::inc(&_processed_buffers_rs_thread);
|
||||
return true;
|
||||
} else {
|
||||
// Return partially processed buffer to the queue.
|
||||
|
||||
@ -78,14 +78,15 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
|
||||
|
||||
void abandon_completed_buffers();
|
||||
|
||||
// Refine the cards in "node" from it's index to buffer_size.
|
||||
// Refine the cards in "node" from its index to buffer_size.
|
||||
// Stops processing if SuspendibleThreadSet::should_yield() is true.
|
||||
// Returns true if the entire buffer was processed, false if there
|
||||
// is a pending yield request. The node's index is updated to exclude
|
||||
// the processed elements, e.g. up to the element before processing
|
||||
// stopped, or one past the last element if the entire buffer was
|
||||
// processed.
|
||||
bool refine_buffer(BufferNode* node, uint worker_id);
|
||||
// processed. Increments *total_refined_cards by the number of cards
|
||||
// processed and removed from the buffer.
|
||||
bool refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards);
|
||||
|
||||
bool mut_process_buffer(BufferNode* node);
|
||||
|
||||
@ -97,10 +98,9 @@ class G1DirtyCardQueueSet: public PtrQueueSet {
|
||||
|
||||
G1FreeIdSet _free_ids;
|
||||
|
||||
// The number of completed buffers processed by mutator and rs thread,
|
||||
// respectively.
|
||||
jint _processed_buffers_mut;
|
||||
jint _processed_buffers_rs_thread;
|
||||
// Array of cumulative dirty cards refined by mutator threads.
|
||||
// Array has an entry per id in _free_ids.
|
||||
size_t* _mutator_refined_cards_counters;
|
||||
|
||||
public:
|
||||
G1DirtyCardQueueSet(Monitor* cbl_mon, BufferNode::Allocator* allocator);
|
||||
@ -158,7 +158,12 @@ public:
|
||||
// Stops processing a buffer if SuspendibleThreadSet::should_yield(),
|
||||
// returning the incompletely processed buffer to the completed buffer
|
||||
// list, for later processing of the remainder.
|
||||
bool refine_completed_buffer_concurrently(uint worker_id, size_t stop_at);
|
||||
//
|
||||
// Increments *total_refined_cards by the number of cards processed and
|
||||
// removed from the buffer.
|
||||
bool refine_completed_buffer_concurrently(uint worker_id,
|
||||
size_t stop_at,
|
||||
size_t* total_refined_cards);
|
||||
|
||||
// If a full collection is happening, reset partial logs, and release
|
||||
// completed ones: the full collection will make them all irrelevant.
|
||||
@ -181,13 +186,8 @@ public:
|
||||
return _max_cards_padding;
|
||||
}
|
||||
|
||||
jint processed_buffers_mut() {
|
||||
return _processed_buffers_mut;
|
||||
}
|
||||
jint processed_buffers_rs_thread() {
|
||||
return _processed_buffers_rs_thread;
|
||||
}
|
||||
|
||||
// Total dirty cards refined by mutator threads.
|
||||
size_t total_mutator_refined_cards() const;
|
||||
};
|
||||
|
||||
inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const {
|
||||
|
||||
@ -70,7 +70,11 @@ G1Policy::G1Policy(STWGCTimer* gc_timer) :
|
||||
_free_regions_at_end_of_collection(0),
|
||||
_max_rs_length(0),
|
||||
_rs_length_prediction(0),
|
||||
_pending_cards(0),
|
||||
_pending_cards_at_gc_start(0),
|
||||
_pending_cards_at_prev_gc_end(0),
|
||||
_total_mutator_refined_cards(0),
|
||||
_total_concurrent_refined_cards(0),
|
||||
_total_concurrent_refinement_time(),
|
||||
_bytes_allocated_in_old_since_last_gc(0),
|
||||
_initial_mark_to_mixed(),
|
||||
_collection_set(NULL),
|
||||
@ -442,6 +446,7 @@ void G1Policy::record_full_collection_start() {
|
||||
collector_state()->set_in_young_only_phase(false);
|
||||
collector_state()->set_in_full_gc(true);
|
||||
_collection_set->clear_candidates();
|
||||
record_concurrent_refinement_data(true /* is_full_collection */);
|
||||
}
|
||||
|
||||
void G1Policy::record_full_collection_end() {
|
||||
@ -472,12 +477,67 @@ void G1Policy::record_full_collection_end() {
|
||||
_survivor_surv_rate_group->reset();
|
||||
update_young_list_max_and_target_length();
|
||||
update_rs_length_prediction();
|
||||
_pending_cards_at_prev_gc_end = _g1h->pending_card_num();
|
||||
|
||||
_bytes_allocated_in_old_since_last_gc = 0;
|
||||
|
||||
record_pause(FullGC, _full_collection_start_sec, end_sec);
|
||||
}
|
||||
|
||||
void G1Policy::record_concurrent_refinement_data(bool is_full_collection) {
|
||||
_pending_cards_at_gc_start = _g1h->pending_card_num();
|
||||
|
||||
// Record info about concurrent refinement thread processing.
|
||||
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
|
||||
G1ConcurrentRefine::RefinementStats cr_stats = cr->total_refinement_stats();
|
||||
|
||||
Tickspan cr_time = cr_stats._time - _total_concurrent_refinement_time;
|
||||
_total_concurrent_refinement_time = cr_stats._time;
|
||||
|
||||
size_t cr_cards = cr_stats._cards - _total_concurrent_refined_cards;
|
||||
_total_concurrent_refined_cards = cr_stats._cards;
|
||||
|
||||
// Don't update rate if full collection. We could be in an implicit full
|
||||
// collection after a non-full collection failure, in which case there
|
||||
// wasn't any mutator/cr-thread activity since last recording. And if
|
||||
// we're in an explicit full collection, the time since the last GC can
|
||||
// be arbitrarily short, so not a very good sample. Similarly, don't
|
||||
// update the rate if the current sample is empty or time is zero.
|
||||
if (!is_full_collection && (cr_cards > 0) && (cr_time > Tickspan())) {
|
||||
double rate = cr_cards / (cr_time.seconds() * MILLIUNITS);
|
||||
_analytics->report_concurrent_refine_rate_ms(rate);
|
||||
}
|
||||
|
||||
// Record info about mutator thread processing.
|
||||
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
|
||||
size_t mut_total_cards = dcqs.total_mutator_refined_cards();
|
||||
size_t mut_cards = mut_total_cards - _total_mutator_refined_cards;
|
||||
_total_mutator_refined_cards = mut_total_cards;
|
||||
|
||||
// Record mutator's card logging rate.
|
||||
// Don't update if full collection; see above.
|
||||
if (!is_full_collection) {
|
||||
size_t total_cards = _pending_cards_at_gc_start + cr_cards + mut_cards;
|
||||
assert(_pending_cards_at_prev_gc_end <= total_cards,
|
||||
"untracked cards: last pending: " SIZE_FORMAT
|
||||
", pending: " SIZE_FORMAT ", conc refine: " SIZE_FORMAT
|
||||
", mut refine:" SIZE_FORMAT,
|
||||
_pending_cards_at_prev_gc_end, _pending_cards_at_gc_start,
|
||||
cr_cards, mut_cards);
|
||||
size_t logged_cards = total_cards - _pending_cards_at_prev_gc_end;
|
||||
double logging_start_time = _analytics->prev_collection_pause_end_ms();
|
||||
double logging_end_time = Ticks::now().seconds() * MILLIUNITS;
|
||||
double logging_time = logging_end_time - logging_start_time;
|
||||
// Unlike above for conc-refine rate, here we should not require a
|
||||
// non-empty sample, since an application could go some time with only
|
||||
// young-gen or filtered out writes. But we'll ignore unusually short
|
||||
// sample periods, as they may just pollute the predictions.
|
||||
if (logging_time > 1.0) { // Require > 1ms sample time.
|
||||
_analytics->report_logged_cards_rate_ms(logged_cards / logging_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void G1Policy::record_collection_pause_start(double start_time_sec) {
|
||||
// We only need to do this here as the policy will only be applied
|
||||
// to the GC we're about to start. so, no point is calculating this
|
||||
@ -490,7 +550,8 @@ void G1Policy::record_collection_pause_start(double start_time_sec) {
|
||||
assert_used_and_recalculate_used_equal(_g1h);
|
||||
|
||||
phase_times()->record_cur_collection_start_sec(start_time_sec);
|
||||
_pending_cards = _g1h->pending_card_num();
|
||||
|
||||
record_concurrent_refinement_data(false /* is_full_collection */);
|
||||
|
||||
_collection_set->reset_bytes_used_before();
|
||||
_bytes_copied_during_gc = 0;
|
||||
@ -744,7 +805,7 @@ void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_use
|
||||
// after the mixed gc phase.
|
||||
// During mixed gc we do not use them for young gen sizing.
|
||||
if (this_pause_was_young_only) {
|
||||
_analytics->report_pending_cards((double) _pending_cards);
|
||||
_analytics->report_pending_cards((double) _pending_cards_at_gc_start);
|
||||
_analytics->report_rs_length((double) _max_rs_length);
|
||||
}
|
||||
}
|
||||
@ -798,6 +859,7 @@ void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_use
|
||||
scan_logged_cards_time_goal_ms -= scan_hcc_time_ms;
|
||||
}
|
||||
|
||||
_pending_cards_at_prev_gc_end = _g1h->pending_card_num();
|
||||
double const logged_cards_time = logged_cards_processing_time();
|
||||
|
||||
log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
|
||||
|
||||
@ -100,7 +100,11 @@ class G1Policy: public CHeapObj<mtGC> {
|
||||
|
||||
size_t _rs_length_prediction;
|
||||
|
||||
size_t _pending_cards;
|
||||
size_t _pending_cards_at_gc_start;
|
||||
size_t _pending_cards_at_prev_gc_end;
|
||||
size_t _total_mutator_refined_cards;
|
||||
size_t _total_concurrent_refined_cards;
|
||||
Tickspan _total_concurrent_refinement_time;
|
||||
|
||||
// The amount of allocated bytes in old gen during the last mutator and the following
|
||||
// young GC phase.
|
||||
@ -244,7 +248,15 @@ private:
|
||||
uint base_free_regions, double target_pause_time_ms) const;
|
||||
|
||||
public:
|
||||
size_t pending_cards() const { return _pending_cards; }
|
||||
size_t pending_cards_at_gc_start() const { return _pending_cards_at_gc_start; }
|
||||
|
||||
size_t total_concurrent_refined_cards() const {
|
||||
return _total_concurrent_refined_cards;
|
||||
}
|
||||
|
||||
size_t total_mutator_refined_cards() const {
|
||||
return _total_mutator_refined_cards;
|
||||
}
|
||||
|
||||
// Calculate the minimum number of old regions we'll add to the CSet
|
||||
// during a mixed GC.
|
||||
@ -283,6 +295,9 @@ private:
|
||||
void record_pause(PauseKind kind, double start, double end);
|
||||
// Indicate that we aborted marking before doing any mixed GCs.
|
||||
void abort_time_to_mixed_tracking();
|
||||
|
||||
void record_concurrent_refinement_data(bool is_full_collection);
|
||||
|
||||
public:
|
||||
|
||||
G1Policy(STWGCTimer* gc_timer);
|
||||
|
||||
@ -489,7 +489,6 @@ G1RemSet::G1RemSet(G1CollectedHeap* g1h,
|
||||
_scan_state(new G1RemSetScanState()),
|
||||
_prev_period_summary(),
|
||||
_g1h(g1h),
|
||||
_num_conc_refined_cards(0),
|
||||
_ct(ct),
|
||||
_g1p(_g1h->policy()),
|
||||
_hot_card_cache(hot_card_cache) {
|
||||
@ -1377,7 +1376,6 @@ void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
|
||||
|
||||
G1ConcurrentRefineOopClosure conc_refine_cl(_g1h, worker_id);
|
||||
if (r->oops_on_memregion_seq_iterate_careful<false>(dirty_region, &conc_refine_cl) != NULL) {
|
||||
_num_conc_refined_cards++; // Unsynchronized update, only used for logging.
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -61,7 +61,6 @@ private:
|
||||
G1RemSetSummary _prev_period_summary;
|
||||
|
||||
G1CollectedHeap* _g1h;
|
||||
size_t _num_conc_refined_cards; // Number of cards refined concurrently to the mutator.
|
||||
|
||||
G1CardTable* _ct;
|
||||
G1Policy* _g1p;
|
||||
@ -125,8 +124,6 @@ public:
|
||||
// Print accumulated summary info from the last time called.
|
||||
void print_periodic_summary_info(const char* header, uint period_count);
|
||||
|
||||
size_t num_conc_refined_cards() const { return _num_conc_refined_cards; }
|
||||
|
||||
// Rebuilds the remembered set by scanning from bottom to TARS for all regions
|
||||
// using the given work gang.
|
||||
void rebuild_rem_set(G1ConcurrentMark* cm, WorkGang* workers, uint worker_id_offset);
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineThread.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1Policy.hpp"
|
||||
#include "gc/g1/g1RemSet.hpp"
|
||||
#include "gc/g1/g1RemSetSummary.hpp"
|
||||
#include "gc/g1/g1YoungRemSetSamplingThread.hpp"
|
||||
@ -53,18 +54,17 @@ public:
|
||||
};
|
||||
|
||||
void G1RemSetSummary::update() {
|
||||
_num_conc_refined_cards = _rem_set->num_conc_refined_cards();
|
||||
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
|
||||
_num_processed_buf_mutator = dcqs.processed_buffers_mut();
|
||||
_num_processed_buf_rs_threads = dcqs.processed_buffers_rs_thread();
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
|
||||
const G1Policy* policy = g1h->policy();
|
||||
_total_mutator_refined_cards = policy->total_mutator_refined_cards();
|
||||
_total_concurrent_refined_cards = policy->total_concurrent_refined_cards();
|
||||
|
||||
_num_coarsenings = HeapRegionRemSet::n_coarsenings();
|
||||
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
G1ConcurrentRefine* cg1r = g1h->concurrent_refine();
|
||||
if (_rs_threads_vtimes != NULL) {
|
||||
GetRSThreadVTimeClosure p(this);
|
||||
cg1r->threads_do(&p);
|
||||
g1h->concurrent_refine()->threads_do(&p);
|
||||
}
|
||||
set_sampling_thread_vtime(g1h->sampling_thread()->vtime_accum());
|
||||
}
|
||||
@ -83,9 +83,8 @@ double G1RemSetSummary::rs_thread_vtime(uint thread) const {
|
||||
|
||||
G1RemSetSummary::G1RemSetSummary() :
|
||||
_rem_set(NULL),
|
||||
_num_conc_refined_cards(0),
|
||||
_num_processed_buf_mutator(0),
|
||||
_num_processed_buf_rs_threads(0),
|
||||
_total_mutator_refined_cards(0),
|
||||
_total_concurrent_refined_cards(0),
|
||||
_num_coarsenings(0),
|
||||
_num_vtimes(G1ConcurrentRefine::max_num_threads()),
|
||||
_rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
|
||||
@ -96,9 +95,8 @@ G1RemSetSummary::G1RemSetSummary() :
|
||||
|
||||
G1RemSetSummary::G1RemSetSummary(G1RemSet* rem_set) :
|
||||
_rem_set(rem_set),
|
||||
_num_conc_refined_cards(0),
|
||||
_num_processed_buf_mutator(0),
|
||||
_num_processed_buf_rs_threads(0),
|
||||
_total_mutator_refined_cards(0),
|
||||
_total_concurrent_refined_cards(0),
|
||||
_num_coarsenings(0),
|
||||
_num_vtimes(G1ConcurrentRefine::max_num_threads()),
|
||||
_rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
|
||||
@ -114,12 +112,10 @@ void G1RemSetSummary::set(G1RemSetSummary* other) {
|
||||
assert(other != NULL, "just checking");
|
||||
assert(_num_vtimes == other->_num_vtimes, "just checking");
|
||||
|
||||
_num_conc_refined_cards = other->num_conc_refined_cards();
|
||||
_total_mutator_refined_cards = other->total_mutator_refined_cards();
|
||||
_total_concurrent_refined_cards = other->total_concurrent_refined_cards();
|
||||
|
||||
_num_processed_buf_mutator = other->num_processed_buf_mutator();
|
||||
_num_processed_buf_rs_threads = other->num_processed_buf_rs_threads();
|
||||
|
||||
_num_coarsenings = other->_num_coarsenings;
|
||||
_num_coarsenings = other->num_coarsenings();
|
||||
|
||||
memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes);
|
||||
|
||||
@ -130,10 +126,8 @@ void G1RemSetSummary::subtract_from(G1RemSetSummary* other) {
|
||||
assert(other != NULL, "just checking");
|
||||
assert(_num_vtimes == other->_num_vtimes, "just checking");
|
||||
|
||||
_num_conc_refined_cards = other->num_conc_refined_cards() - _num_conc_refined_cards;
|
||||
|
||||
_num_processed_buf_mutator = other->num_processed_buf_mutator() - _num_processed_buf_mutator;
|
||||
_num_processed_buf_rs_threads = other->num_processed_buf_rs_threads() - _num_processed_buf_rs_threads;
|
||||
_total_mutator_refined_cards = other->total_mutator_refined_cards() - _total_mutator_refined_cards;
|
||||
_total_concurrent_refined_cards = other->total_concurrent_refined_cards() - _total_concurrent_refined_cards;
|
||||
|
||||
_num_coarsenings = other->num_coarsenings() - _num_coarsenings;
|
||||
|
||||
@ -356,16 +350,15 @@ public:
|
||||
|
||||
void G1RemSetSummary::print_on(outputStream* out) {
|
||||
out->print_cr(" Recent concurrent refinement statistics");
|
||||
out->print_cr(" Processed " SIZE_FORMAT " cards concurrently", num_conc_refined_cards());
|
||||
out->print_cr(" Of " SIZE_FORMAT " completed buffers:", num_processed_buf_total());
|
||||
out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent RS threads.",
|
||||
num_processed_buf_total(),
|
||||
percent_of(num_processed_buf_rs_threads(), num_processed_buf_total()));
|
||||
out->print_cr(" Of " SIZE_FORMAT " refined cards:", total_refined_cards());
|
||||
out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent refinement threads.",
|
||||
total_concurrent_refined_cards(),
|
||||
percent_of(total_concurrent_refined_cards(), total_refined_cards()));
|
||||
out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by mutator threads.",
|
||||
num_processed_buf_mutator(),
|
||||
percent_of(num_processed_buf_mutator(), num_processed_buf_total()));
|
||||
total_mutator_refined_cards(),
|
||||
percent_of(total_mutator_refined_cards(), total_refined_cards()));
|
||||
out->print_cr(" Did " SIZE_FORMAT " coarsenings.", num_coarsenings());
|
||||
out->print_cr(" Concurrent RS threads times (s)");
|
||||
out->print_cr(" Concurrent refinement threads times (s)");
|
||||
out->print(" ");
|
||||
for (uint i = 0; i < _num_vtimes; i++) {
|
||||
out->print(" %5.2f", rs_thread_vtime(i));
|
||||
|
||||
@ -38,9 +38,8 @@ private:
|
||||
|
||||
G1RemSet* _rem_set;
|
||||
|
||||
size_t _num_conc_refined_cards;
|
||||
size_t _num_processed_buf_mutator;
|
||||
size_t _num_processed_buf_rs_threads;
|
||||
size_t _total_mutator_refined_cards;
|
||||
size_t _total_concurrent_refined_cards;
|
||||
|
||||
size_t _num_coarsenings;
|
||||
|
||||
@ -76,20 +75,16 @@ public:
|
||||
return _sampling_thread_vtime;
|
||||
}
|
||||
|
||||
size_t num_conc_refined_cards() const {
|
||||
return _num_conc_refined_cards;
|
||||
size_t total_mutator_refined_cards() const {
|
||||
return _total_mutator_refined_cards;
|
||||
}
|
||||
|
||||
size_t num_processed_buf_mutator() const {
|
||||
return _num_processed_buf_mutator;
|
||||
size_t total_concurrent_refined_cards() const {
|
||||
return _total_concurrent_refined_cards;
|
||||
}
|
||||
|
||||
size_t num_processed_buf_rs_threads() const {
|
||||
return _num_processed_buf_rs_threads;
|
||||
}
|
||||
|
||||
size_t num_processed_buf_total() const {
|
||||
return num_processed_buf_mutator() + num_processed_buf_rs_threads();
|
||||
size_t total_refined_cards() const {
|
||||
return total_mutator_refined_cards() + total_concurrent_refined_cards();
|
||||
}
|
||||
|
||||
size_t num_coarsenings() const {
|
||||
|
||||
@ -54,7 +54,7 @@ public class TestRemsetLoggingThreads {
|
||||
|
||||
OutputAnalyzer output = new OutputAnalyzer(pb.start());
|
||||
|
||||
String pattern = "Concurrent RS threads times \\(s\\)$";
|
||||
String pattern = "Concurrent refinement threads times \\(s\\)$";
|
||||
Matcher m = Pattern.compile(pattern, Pattern.MULTILINE).matcher(output.getStdout());
|
||||
|
||||
if (!m.find()) {
|
||||
|
||||
@ -110,7 +110,7 @@ public class TestRemsetLoggingTools {
|
||||
}
|
||||
|
||||
public static void expectRSetSummaries(String result, int expectedCumulative, int expectedPeriodic) throws Exception {
|
||||
int actualTotal = result.split("concurrent refinement").length - 1;
|
||||
int actualTotal = result.split("concurrent refinement statistics").length - 1;
|
||||
int actualCumulative = result.split("Cumulative RS summary").length - 1;
|
||||
|
||||
if (expectedCumulative != actualCumulative) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user