mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 03:58:21 +00:00
8342382: Implement JEP 522: G1 GC: Improve Throughput by Reducing Synchronization
Co-authored-by: Amit Kumar <amitkumar@openjdk.org> Co-authored-by: Martin Doerr <mdoerr@openjdk.org> Co-authored-by: Carlo Refice <carlo.refice@oracle.com> Co-authored-by: Fei Yang <fyang@openjdk.org> Reviewed-by: iwalulya, rcastanedalo, aph, ayang
This commit is contained in:
parent
ca182912a3
commit
8d5c005642
@ -86,15 +86,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
|
||||
}
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
||||
Register start, Register count, Register scratch, RegSet saved_regs) {
|
||||
__ push(saved_regs, sp);
|
||||
assert_different_registers(start, count, scratch);
|
||||
assert_different_registers(c_rarg0, count);
|
||||
__ mov(c_rarg0, start);
|
||||
__ mov(c_rarg1, count);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
|
||||
__ pop(saved_regs, sp);
|
||||
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
Register start,
|
||||
Register count,
|
||||
Register scratch,
|
||||
RegSet saved_regs) {
|
||||
|
||||
Label done;
|
||||
Label loop;
|
||||
Label next;
|
||||
|
||||
__ cbz(count, done);
|
||||
|
||||
// Calculate the number of card marks to set. Since the object might start and
|
||||
// end within a card, we need to calculate this via the card table indexes of
|
||||
// the actual start and last addresses covered by the object.
|
||||
// Temporarily use the count register for the last element address.
|
||||
__ lea(count, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop
|
||||
__ sub(count, count, BytesPerHeapOop); // Use last element address for end.
|
||||
|
||||
__ lsr(start, start, CardTable::card_shift());
|
||||
__ lsr(count, count, CardTable::card_shift());
|
||||
__ sub(count, count, start); // Number of bytes to mark - 1.
|
||||
|
||||
// Add card table base offset to start.
|
||||
__ ldr(scratch, Address(rthread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
|
||||
__ add(start, start, scratch);
|
||||
|
||||
__ bind(loop);
|
||||
if (UseCondCardMark) {
|
||||
__ ldrb(scratch, Address(start, count));
|
||||
// Instead of loading clean_card_val and comparing, we exploit the fact that
|
||||
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
|
||||
__ tbz(scratch, 0, next);
|
||||
}
|
||||
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
|
||||
__ strb(zr, Address(start, count));
|
||||
__ bind(next);
|
||||
__ subs(count, count, 1);
|
||||
__ br(Assembler::GE, loop);
|
||||
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
|
||||
@ -202,10 +235,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
const Register store_addr,
|
||||
const Register new_val,
|
||||
const Register thread,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
bool new_val_may_be_null) {
|
||||
assert(thread == rthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1);
|
||||
|
||||
// Does store cross heap regions?
|
||||
__ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
|
||||
__ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
|
||||
@ -214,33 +251,19 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
if (new_val_may_be_null) {
|
||||
__ cbz(new_val, done);
|
||||
}
|
||||
// Storing region crossing non-null, is card young?
|
||||
// Storing region crossing non-null.
|
||||
__ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
|
||||
__ load_byte_map_base(tmp2); // tmp2 := card table base address
|
||||
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
|
||||
__ ldrb(tmp2, Address(tmp1)); // tmp2 := card
|
||||
__ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val?
|
||||
}
|
||||
|
||||
static void generate_post_barrier_slow_path(MacroAssembler* masm,
|
||||
const Register thread,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
Label& runtime) {
|
||||
__ membar(Assembler::StoreLoad); // StoreLoad membar
|
||||
__ ldrb(tmp2, Address(tmp1)); // tmp2 := card
|
||||
__ cbzw(tmp2, done);
|
||||
// Storing a region crossing, non-null oop, card is clean.
|
||||
// Dirty card and log.
|
||||
STATIC_ASSERT(CardTable::dirty_card_val() == 0);
|
||||
__ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val
|
||||
generate_queue_test_and_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
runtime,
|
||||
thread, tmp1, tmp2, rscratch1);
|
||||
__ b(done);
|
||||
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
|
||||
__ ldr(tmp2, card_table_addr); // tmp2 := card table base address
|
||||
if (UseCondCardMark) {
|
||||
__ ldrb(rscratch1, Address(tmp1, tmp2)); // rscratch1 := card
|
||||
// Instead of loading clean_card_val and comparing, we exploit the fact that
|
||||
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
|
||||
__ tbz(rscratch1, 0, done);
|
||||
}
|
||||
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
|
||||
__ strb(zr, Address(tmp1, tmp2)); // *(card address) := dirty_card_val
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
||||
@ -249,27 +272,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
assert(thread == rthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
|
||||
rscratch1);
|
||||
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
|
||||
&& tmp2 != noreg, "expecting a register");
|
||||
|
||||
Label done;
|
||||
Label runtime;
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
// If card is young, jump to done
|
||||
__ br(Assembler::EQ, done);
|
||||
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
// save the live input values
|
||||
RegSet saved = RegSet::of(store_addr);
|
||||
__ push(saved, sp);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
|
||||
__ pop(saved, sp);
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_may_be_null */);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
@ -329,38 +333,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* stub) {
|
||||
assert(thread == rthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
|
||||
rscratch1);
|
||||
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
|
||||
&& tmp2 != noreg, "expecting a register");
|
||||
|
||||
stub->initialize_registers(thread, tmp1, tmp2);
|
||||
|
||||
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
|
||||
// If card is not young, jump to stub (slow path)
|
||||
__ br(Assembler::NE, *stub->entry());
|
||||
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const {
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
Label runtime;
|
||||
Register thread = stub->thread();
|
||||
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
|
||||
Register tmp2 = stub->tmp2();
|
||||
assert(stub->tmp3() == noreg, "not needed in this platform");
|
||||
|
||||
__ bind(*stub->entry());
|
||||
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
|
||||
__ b(*stub->continuation());
|
||||
bool new_val_may_be_null) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
@ -456,20 +432,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
|
||||
__ b(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
|
||||
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
||||
__ bind(*stub->entry());
|
||||
assert(stub->addr()->is_register(), "Precondition.");
|
||||
assert(stub->new_val()->is_register(), "Precondition.");
|
||||
Register new_val_reg = stub->new_val()->as_register();
|
||||
__ cbz(new_val_reg, *stub->continuation());
|
||||
ce->store_parameter(stub->addr()->as_pointer_register(), 0);
|
||||
__ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
|
||||
__ b(*stub->continuation());
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
masm->bind(done);
|
||||
}
|
||||
|
||||
#define __ sasm->
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
@ -521,74 +496,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
__ epilogue();
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
__ prologue("g1_post_barrier", false);
|
||||
|
||||
// arg0: store_address
|
||||
Address store_addr(rfp, 2*BytesPerWord);
|
||||
|
||||
BarrierSet* bs = BarrierSet::barrier_set();
|
||||
CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
|
||||
CardTable* ct = ctbs->card_table();
|
||||
|
||||
Label done;
|
||||
Label runtime;
|
||||
|
||||
// At this point we know new_value is non-null and the new_value crosses regions.
|
||||
// Must check to see if card is already dirty
|
||||
|
||||
const Register thread = rthread;
|
||||
|
||||
Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
|
||||
Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
|
||||
|
||||
const Register card_offset = rscratch2;
|
||||
// LR is free here, so we can use it to hold the byte_map_base.
|
||||
const Register byte_map_base = lr;
|
||||
|
||||
assert_different_registers(card_offset, byte_map_base, rscratch1);
|
||||
|
||||
__ load_parameter(0, card_offset);
|
||||
__ lsr(card_offset, card_offset, CardTable::card_shift());
|
||||
__ load_byte_map_base(byte_map_base);
|
||||
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
|
||||
__ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
|
||||
__ br(Assembler::EQ, done);
|
||||
|
||||
assert((int)CardTable::dirty_card_val() == 0, "must be 0");
|
||||
|
||||
__ membar(Assembler::StoreLoad);
|
||||
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
|
||||
__ cbzw(rscratch1, done);
|
||||
|
||||
// storing region crossing non-null, card is clean.
|
||||
// dirty card and log.
|
||||
__ strb(zr, Address(byte_map_base, card_offset));
|
||||
|
||||
// Convert card offset into an address in card_addr
|
||||
Register card_addr = card_offset;
|
||||
__ add(card_addr, byte_map_base, card_addr);
|
||||
|
||||
__ ldr(rscratch1, queue_index);
|
||||
__ cbz(rscratch1, runtime);
|
||||
__ sub(rscratch1, rscratch1, wordSize);
|
||||
__ str(rscratch1, queue_index);
|
||||
|
||||
// Reuse LR to hold buffer_addr
|
||||
const Register buffer_addr = lr;
|
||||
|
||||
__ ldr(buffer_addr, buffer);
|
||||
__ str(card_addr, Address(buffer_addr, rscratch1));
|
||||
__ b(done);
|
||||
|
||||
__ bind(runtime);
|
||||
__ push_call_clobbered_registers();
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
|
||||
__ pop_call_clobbered_registers();
|
||||
__ bind(done);
|
||||
__ epilogue();
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -32,9 +32,7 @@
|
||||
class LIR_Assembler;
|
||||
class StubAssembler;
|
||||
class G1PreBarrierStub;
|
||||
class G1PostBarrierStub;
|
||||
class G1PreBarrierStubC2;
|
||||
class G1PostBarrierStubC2;
|
||||
|
||||
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
protected:
|
||||
@ -65,10 +63,15 @@ protected:
|
||||
public:
|
||||
#ifdef COMPILER1
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
||||
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
|
||||
|
||||
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
||||
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
|
||||
|
||||
void g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2);
|
||||
#endif
|
||||
|
||||
#ifdef COMPILER2
|
||||
@ -87,9 +90,7 @@ public:
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* c2_stub);
|
||||
void generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const;
|
||||
bool new_val_may_be_null);
|
||||
#endif
|
||||
|
||||
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -62,13 +62,13 @@ static void write_barrier_post(MacroAssembler* masm,
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
if (!G1PostBarrierStubC2::needs_barrier(node)) {
|
||||
if (!G1BarrierStubC2::needs_post_barrier(node)) {
|
||||
return;
|
||||
}
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub);
|
||||
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, new_val_may_be_null);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
@ -201,12 +201,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
const Register store_addr,
|
||||
const Register new_val,
|
||||
const Register thread,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
bool new_val_may_be_null) {
|
||||
// Does store cross heap regions?
|
||||
assert(thread == Rthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
|
||||
|
||||
// Does store cross heap regions?
|
||||
__ eor(tmp1, store_addr, new_val);
|
||||
__ movs(tmp1, AsmOperand(tmp1, lsr, G1HeapRegion::LogOfHRGrainBytes));
|
||||
__ b(done, eq);
|
||||
@ -215,76 +218,34 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
if (new_val_may_be_null) {
|
||||
__ cbz(new_val, done);
|
||||
}
|
||||
// storing region crossing non-null, is card already dirty?
|
||||
const Register card_addr = tmp1;
|
||||
|
||||
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
|
||||
__ mov_address(tmp2, (address)ct->card_table()->byte_map_base());
|
||||
__ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
|
||||
// storing region crossing non-null, is card already non-clean?
|
||||
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
|
||||
__ ldr(tmp2, card_table_addr);
|
||||
__ add(tmp1, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
|
||||
|
||||
__ ldrb(tmp2, Address(card_addr));
|
||||
__ cmp(tmp2, (int)G1CardTable::g1_young_card_val());
|
||||
if (UseCondCardMark) {
|
||||
__ ldrb(tmp2, Address(tmp1));
|
||||
// Instead of loading clean_card_val and comparing, we exploit the fact that
|
||||
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
|
||||
__ tbz(tmp2, 0, done);
|
||||
}
|
||||
|
||||
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zero_register()");
|
||||
__ zero_register(tmp2);
|
||||
__ strb(tmp2, Address(tmp1)); // *(card address) := dirty_card_val
|
||||
}
|
||||
|
||||
static void generate_post_barrier_slow_path(MacroAssembler* masm,
|
||||
const Register thread,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
const Register tmp3,
|
||||
Label& done,
|
||||
Label& runtime) {
|
||||
__ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
|
||||
assert(CardTable::dirty_card_val() == 0, "adjust this code");
|
||||
// card_addr is loaded by generate_post_barrier_fast_path
|
||||
const Register card_addr = tmp1;
|
||||
__ ldrb(tmp2, Address(card_addr));
|
||||
__ cbz(tmp2, done);
|
||||
|
||||
// storing a region crossing, non-null oop, card is clean.
|
||||
// dirty card and log.
|
||||
|
||||
__ strb(__ zero_register(tmp2), Address(card_addr));
|
||||
generate_queue_test_and_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
runtime,
|
||||
thread, card_addr, tmp2, tmp3);
|
||||
__ b(done);
|
||||
}
|
||||
|
||||
|
||||
// G1 post-barrier.
|
||||
// Blows all volatile registers R0-R3, LR).
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3) {
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3) {
|
||||
Label done;
|
||||
Label runtime;
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
// If card is young, jump to done
|
||||
// card_addr and card are loaded by generate_post_barrier_fast_path
|
||||
const Register card = tmp2;
|
||||
const Register card_addr = tmp1;
|
||||
__ b(done, eq);
|
||||
generate_post_barrier_slow_path(masm, Rthread, card_addr, tmp2, tmp3, done, runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
|
||||
RegisterSet set = RegisterSet(store_addr) | RegisterSet(R0, R3) | RegisterSet(R12);
|
||||
__ push(set);
|
||||
|
||||
if (card_addr != R0) {
|
||||
__ mov(R0, card_addr);
|
||||
}
|
||||
__ mov(R1, Rthread);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), R0, R1);
|
||||
|
||||
__ pop(set);
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, Rthread, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
@ -344,35 +305,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3,
|
||||
G1PostBarrierStubC2* stub) {
|
||||
assert(thread == Rthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
|
||||
|
||||
stub->initialize_registers(thread, tmp1, tmp2, tmp3);
|
||||
|
||||
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
|
||||
// If card is not young, jump to stub (slow path)
|
||||
__ b(*stub->entry(), ne);
|
||||
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const {
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
Label runtime;
|
||||
Register thread = stub->thread();
|
||||
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
|
||||
Register tmp2 = stub->tmp2();
|
||||
Register tmp3 = stub->tmp3();
|
||||
|
||||
__ bind(*stub->entry());
|
||||
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, tmp3, *stub->continuation(), runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp2);
|
||||
__ b(*stub->continuation());
|
||||
bool new_val_may_be_null) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
@ -463,20 +399,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
|
||||
__ b(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
|
||||
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
||||
__ bind(*stub->entry());
|
||||
assert(stub->addr()->is_register(), "Precondition.");
|
||||
assert(stub->new_val()->is_register(), "Precondition.");
|
||||
Register new_val_reg = stub->new_val()->as_register();
|
||||
__ cbz(new_val_reg, *stub->continuation());
|
||||
ce->verify_reserved_argument_area_size(1);
|
||||
__ str(stub->addr()->as_pointer_register(), Address(SP));
|
||||
__ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
|
||||
__ b(*stub->continuation());
|
||||
#undef __
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
masm->bind(done);
|
||||
}
|
||||
|
||||
#undef __
|
||||
#define __ sasm->
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
@ -536,102 +471,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
__ b(done);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
// Input:
|
||||
// - store_addr, pushed on the stack
|
||||
|
||||
__ set_info("g1_post_barrier_slow_id", false);
|
||||
|
||||
Label done;
|
||||
Label recheck;
|
||||
Label runtime;
|
||||
|
||||
Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
|
||||
Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
|
||||
|
||||
AddressLiteral cardtable(ci_card_table_address_as<address>(), relocInfo::none);
|
||||
|
||||
// save at least the registers that need saving if the runtime is called
|
||||
const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
|
||||
const int nb_saved_regs = 6;
|
||||
assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
|
||||
__ push(saved_regs);
|
||||
|
||||
const Register r_card_addr_0 = R0; // must be R0 for the slow case
|
||||
const Register r_obj_0 = R0;
|
||||
const Register r_card_base_1 = R1;
|
||||
const Register r_tmp2 = R2;
|
||||
const Register r_index_2 = R2;
|
||||
const Register r_buffer_3 = R3;
|
||||
const Register tmp1 = Rtemp;
|
||||
|
||||
__ ldr(r_obj_0, Address(SP, nb_saved_regs*wordSize));
|
||||
// Note: there is a comment in x86 code about not using
|
||||
// ExternalAddress / lea, due to relocation not working
|
||||
// properly for that address. Should be OK for arm, where we
|
||||
// explicitly specify that 'cardtable' has a relocInfo::none
|
||||
// type.
|
||||
__ lea(r_card_base_1, cardtable);
|
||||
__ add(r_card_addr_0, r_card_base_1, AsmOperand(r_obj_0, lsr, CardTable::card_shift()));
|
||||
|
||||
// first quick check without barrier
|
||||
__ ldrb(r_tmp2, Address(r_card_addr_0));
|
||||
|
||||
__ cmp(r_tmp2, (int)G1CardTable::g1_young_card_val());
|
||||
__ b(recheck, ne);
|
||||
|
||||
__ bind(done);
|
||||
|
||||
__ pop(saved_regs);
|
||||
|
||||
__ ret();
|
||||
|
||||
__ bind(recheck);
|
||||
|
||||
__ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp1);
|
||||
|
||||
// reload card state after the barrier that ensures the stored oop was visible
|
||||
__ ldrb(r_tmp2, Address(r_card_addr_0));
|
||||
|
||||
assert(CardTable::dirty_card_val() == 0, "adjust this code");
|
||||
__ cbz(r_tmp2, done);
|
||||
|
||||
// storing region crossing non-null, card is clean.
|
||||
// dirty card and log.
|
||||
|
||||
assert(0 == (int)CardTable::dirty_card_val(), "adjust this code");
|
||||
if ((ci_card_table_address_as<intptr_t>() & 0xff) == 0) {
|
||||
// Card table is aligned so the lowest byte of the table address base is zero.
|
||||
__ strb(r_card_base_1, Address(r_card_addr_0));
|
||||
} else {
|
||||
__ strb(__ zero_register(r_tmp2), Address(r_card_addr_0));
|
||||
}
|
||||
|
||||
__ ldr(r_index_2, queue_index);
|
||||
__ ldr(r_buffer_3, buffer);
|
||||
|
||||
__ subs(r_index_2, r_index_2, wordSize);
|
||||
__ b(runtime, lt); // go to runtime if now negative
|
||||
|
||||
__ str(r_index_2, queue_index);
|
||||
|
||||
__ str(r_card_addr_0, Address(r_buffer_3, r_index_2));
|
||||
|
||||
__ b(done);
|
||||
|
||||
__ bind(runtime);
|
||||
|
||||
__ save_live_registers();
|
||||
|
||||
assert(r_card_addr_0 == c_rarg0, "card_addr should be in R0");
|
||||
__ mov(c_rarg1, Rthread);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), c_rarg0, c_rarg1);
|
||||
|
||||
__ restore_live_registers_without_return();
|
||||
|
||||
__ b(done);
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -32,9 +32,7 @@
|
||||
class LIR_Assembler;
|
||||
class StubAssembler;
|
||||
class G1PreBarrierStub;
|
||||
class G1PostBarrierStub;
|
||||
class G1PreBarrierStubC2;
|
||||
class G1PostBarrierStubC2;
|
||||
|
||||
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
protected:
|
||||
@ -66,10 +64,15 @@ public:
|
||||
#ifdef COMPILER1
|
||||
public:
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
||||
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
|
||||
|
||||
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
||||
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
|
||||
|
||||
void g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2);
|
||||
#endif
|
||||
|
||||
#ifdef COMPILER2
|
||||
@ -89,9 +92,7 @@ public:
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3,
|
||||
G1PostBarrierStubC2* c2_stub);
|
||||
void generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const;
|
||||
bool new_val_may_be_null);
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -63,13 +63,13 @@ static void write_barrier_post(MacroAssembler* masm,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3) {
|
||||
if (!G1PostBarrierStubC2::needs_barrier(node)) {
|
||||
if (!G1BarrierStubC2::needs_post_barrier(node)) {
|
||||
return;
|
||||
}
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, stub);
|
||||
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, new_val_may_be_null);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
@ -28,7 +28,6 @@
|
||||
#include "gc/g1/g1BarrierSetAssembler.hpp"
|
||||
#include "gc/g1/g1BarrierSetRuntime.hpp"
|
||||
#include "gc/g1/g1CardTable.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1HeapRegion.hpp"
|
||||
#include "gc/g1/g1SATBMarkQueueSet.hpp"
|
||||
#include "gc/g1/g1ThreadLocalData.hpp"
|
||||
@ -230,78 +229,52 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
|
||||
__ bind(filtered);
|
||||
}
|
||||
|
||||
static void generate_region_crossing_test(MacroAssembler* masm, const Register store_addr, const Register new_val) {
|
||||
__ xorr(R0, store_addr, new_val); // tmp1 := store address ^ new value
|
||||
__ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
|
||||
}
|
||||
static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
const Register store_addr,
|
||||
const Register new_val,
|
||||
const Register thread,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
bool new_val_may_be_null) {
|
||||
assert_different_registers(store_addr, new_val, tmp1, R0);
|
||||
assert_different_registers(store_addr, tmp1, tmp2, R0);
|
||||
|
||||
static Address generate_card_young_test(MacroAssembler* masm, const Register store_addr, const Register tmp1, const Register tmp2) {
|
||||
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
|
||||
__ load_const_optimized(tmp1, (address)(ct->card_table()->byte_map_base()), tmp2);
|
||||
__ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
|
||||
__ lbzx(R0, tmp1, tmp2); // tmp1 := card address
|
||||
__ cmpwi(CR0, R0, (int)G1CardTable::g1_young_card_val());
|
||||
return Address(tmp1, tmp2); // return card address
|
||||
}
|
||||
__ xorr(R0, store_addr, new_val); // R0 := store address ^ new value
|
||||
__ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // R0 := ((store address ^ new value) >> LogOfHRGrainBytes)
|
||||
__ beq(CR0, done);
|
||||
|
||||
static void generate_card_dirty_test(MacroAssembler* masm, Address card_addr) {
|
||||
__ membar(Assembler::StoreLoad); // Must reload after StoreLoad membar due to concurrent refinement
|
||||
__ lbzx(R0, card_addr.base(), card_addr.index()); // tmp2 := card
|
||||
__ cmpwi(CR0, R0, (int)G1CardTable::dirty_card_val()); // tmp2 := card == dirty_card_val?
|
||||
// Crosses regions, storing null?
|
||||
if (!new_val_may_be_null) {
|
||||
#ifdef ASSERT
|
||||
__ cmpdi(CR0, new_val, 0);
|
||||
__ asm_assert_ne("null oop not allowed (G1 post)"); // Checked by caller.
|
||||
#endif
|
||||
} else {
|
||||
__ cmpdi(CR0, new_val, 0);
|
||||
__ beq(CR0, done);
|
||||
}
|
||||
|
||||
__ ld(tmp1, G1ThreadLocalData::card_table_base_offset(), thread);
|
||||
__ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp2 := card address relative to card table base
|
||||
if (UseCondCardMark) {
|
||||
__ lbzx(R0, tmp1, tmp2);
|
||||
__ cmpwi(CR0, R0, (int)G1CardTable::clean_card_val());
|
||||
__ bne(CR0, done);
|
||||
}
|
||||
|
||||
__ li(R0, G1CardTable::dirty_card_val());
|
||||
__ stbx(R0, tmp1, tmp2);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
|
||||
Register store_addr, Register new_val,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
MacroAssembler::PreservationLevel preservation_level) {
|
||||
Register tmp1, Register tmp2) {
|
||||
bool not_null = (decorators & IS_NOT_NULL) != 0;
|
||||
|
||||
Label runtime, filtered;
|
||||
assert_different_registers(store_addr, new_val, tmp1, tmp2);
|
||||
|
||||
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
|
||||
|
||||
generate_region_crossing_test(masm, store_addr, new_val);
|
||||
__ beq(CR0, filtered);
|
||||
|
||||
// Crosses regions, storing null?
|
||||
if (not_null) {
|
||||
#ifdef ASSERT
|
||||
__ cmpdi(CR0, new_val, 0);
|
||||
__ asm_assert_ne("null oop not allowed (G1 post)"); // Checked by caller.
|
||||
#endif
|
||||
} else {
|
||||
__ cmpdi(CR0, new_val, 0);
|
||||
__ beq(CR0, filtered);
|
||||
}
|
||||
|
||||
Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
|
||||
__ beq(CR0, filtered);
|
||||
|
||||
generate_card_dirty_test(masm, card_addr);
|
||||
__ beq(CR0, filtered);
|
||||
|
||||
__ li(R0, (int)G1CardTable::dirty_card_val());
|
||||
__ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
|
||||
|
||||
Register Rcard_addr = tmp3;
|
||||
__ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
|
||||
|
||||
generate_queue_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
runtime, Rcard_addr, tmp1);
|
||||
__ b(filtered);
|
||||
|
||||
__ bind(runtime);
|
||||
|
||||
assert(preservation_level == MacroAssembler::PRESERVATION_NONE,
|
||||
"g1_write_barrier_post doesn't support preservation levels higher than PRESERVATION_NONE");
|
||||
|
||||
// Save the live input values.
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, R16_thread);
|
||||
|
||||
__ bind(filtered);
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, R16_thread, tmp1, tmp2, done, !not_null);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
@ -333,8 +306,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
|
||||
}
|
||||
g1_write_barrier_post(masm, decorators,
|
||||
base, val,
|
||||
tmp1, tmp2, tmp3,
|
||||
preservation_level);
|
||||
tmp1, tmp2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -457,70 +429,29 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* stub,
|
||||
bool new_val_may_be_null,
|
||||
bool decode_new_val) {
|
||||
assert_different_registers(store_addr, new_val, tmp1, R0);
|
||||
assert_different_registers(store_addr, tmp1, tmp2, R0);
|
||||
|
||||
stub->initialize_registers(R16_thread, tmp1, tmp2);
|
||||
Label done;
|
||||
|
||||
bool null_check_required = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
|
||||
Register new_val_decoded = new_val;
|
||||
|
||||
if (decode_new_val) {
|
||||
assert(UseCompressedOops, "or should not be here");
|
||||
if (null_check_required && CompressedOops::base() != nullptr) {
|
||||
if (new_val_may_be_null && CompressedOops::base() != nullptr) {
|
||||
// We prefer doing the null check after the region crossing check.
|
||||
// Only compressed oop modes with base != null require a null check here.
|
||||
__ cmpwi(CR0, new_val, 0);
|
||||
__ beq(CR0, *stub->continuation());
|
||||
null_check_required = false;
|
||||
__ beq(CR0, done);
|
||||
new_val_may_be_null = false;
|
||||
}
|
||||
new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val);
|
||||
}
|
||||
|
||||
generate_region_crossing_test(masm, store_addr, new_val_decoded);
|
||||
__ beq(CR0, *stub->continuation());
|
||||
|
||||
// crosses regions, storing null?
|
||||
if (null_check_required) {
|
||||
__ cmpdi(CR0, new_val_decoded, 0);
|
||||
__ beq(CR0, *stub->continuation());
|
||||
}
|
||||
|
||||
Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
|
||||
assert(card_addr.base() == tmp1 && card_addr.index() == tmp2, "needed by post barrier stub");
|
||||
__ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CR0, Assembler::equal), *stub->entry());
|
||||
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const {
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
Label runtime;
|
||||
Address card_addr(stub->tmp1(), stub->tmp2()); // See above.
|
||||
|
||||
__ bind(*stub->entry());
|
||||
|
||||
generate_card_dirty_test(masm, card_addr);
|
||||
__ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
|
||||
|
||||
__ li(R0, (int)G1CardTable::dirty_card_val());
|
||||
__ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
|
||||
|
||||
Register Rcard_addr = stub->tmp1();
|
||||
__ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
|
||||
|
||||
generate_queue_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
runtime, Rcard_addr, stub->tmp2());
|
||||
__ b(*stub->continuation());
|
||||
|
||||
__ bind(runtime);
|
||||
generate_c2_barrier_runtime_call(masm, stub, Rcard_addr, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
|
||||
__ b(*stub->continuation());
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val_decoded, R16_thread, tmp1, tmp2, done, new_val_may_be_null);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
@ -558,28 +489,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
|
||||
__ b(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
|
||||
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
||||
__ bind(*stub->entry());
|
||||
#undef __
|
||||
|
||||
assert(stub->addr()->is_register(), "Precondition.");
|
||||
assert(stub->new_val()->is_register(), "Precondition.");
|
||||
Register addr_reg = stub->addr()->as_pointer_register();
|
||||
Register new_val_reg = stub->new_val()->as_register();
|
||||
|
||||
__ cmpdi(CR0, new_val_reg, 0);
|
||||
__ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
|
||||
|
||||
address c_code = bs->post_barrier_c1_runtime_code_blob()->code_begin();
|
||||
//__ load_const_optimized(R0, c_code);
|
||||
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
|
||||
__ mtctr(R0);
|
||||
__ mr(R0, addr_reg); // Pass addr in R0.
|
||||
__ bctrl();
|
||||
__ b(*stub->continuation());
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
masm->bind(done);
|
||||
}
|
||||
|
||||
#undef __
|
||||
#define __ sasm->
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
@ -642,86 +564,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
__ b(restart);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
|
||||
|
||||
__ set_info("g1_post_barrier_slow_id", false);
|
||||
|
||||
// Using stack slots: spill addr, spill tmp2
|
||||
const int stack_slots = 2;
|
||||
Register tmp = R0;
|
||||
Register addr = R14;
|
||||
Register tmp2 = R15;
|
||||
CardTable::CardValue* byte_map_base = bs->card_table()->byte_map_base();
|
||||
|
||||
Label restart, refill, ret;
|
||||
|
||||
// Spill
|
||||
__ std(addr, -8, R1_SP);
|
||||
__ std(tmp2, -16, R1_SP);
|
||||
|
||||
__ srdi(addr, R0, CardTable::card_shift()); // Addr is passed in R0.
|
||||
__ load_const_optimized(/*cardtable*/ tmp2, byte_map_base, tmp);
|
||||
__ add(addr, tmp2, addr);
|
||||
__ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
|
||||
|
||||
// Return if young card.
|
||||
__ cmpwi(CR0, tmp, G1CardTable::g1_young_card_val());
|
||||
__ beq(CR0, ret);
|
||||
|
||||
// Return if sequential consistent value is already dirty.
|
||||
__ membar(Assembler::StoreLoad);
|
||||
__ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
|
||||
|
||||
__ cmpwi(CR0, tmp, G1CardTable::dirty_card_val());
|
||||
__ beq(CR0, ret);
|
||||
|
||||
// Not dirty.
|
||||
|
||||
// First, dirty it.
|
||||
__ li(tmp, G1CardTable::dirty_card_val());
|
||||
__ stb(tmp, 0, addr);
|
||||
|
||||
int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
|
||||
int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
|
||||
|
||||
__ bind(restart);
|
||||
|
||||
// Get the index into the update buffer. G1DirtyCardQueue::_index is
|
||||
// a size_t so ld_ptr is appropriate here.
|
||||
__ ld(tmp2, dirty_card_q_index_byte_offset, R16_thread);
|
||||
|
||||
// index == 0?
|
||||
__ cmpdi(CR0, tmp2, 0);
|
||||
__ beq(CR0, refill);
|
||||
|
||||
__ ld(tmp, dirty_card_q_buf_byte_offset, R16_thread);
|
||||
__ addi(tmp2, tmp2, -oopSize);
|
||||
|
||||
__ std(tmp2, dirty_card_q_index_byte_offset, R16_thread);
|
||||
__ add(tmp2, tmp, tmp2);
|
||||
__ std(addr, 0, tmp2); // [_buf + index] := <address_of_card>
|
||||
|
||||
// Restore temp registers and return-from-leaf.
|
||||
__ bind(ret);
|
||||
__ ld(tmp2, -16, R1_SP);
|
||||
__ ld(addr, -8, R1_SP);
|
||||
__ blr();
|
||||
|
||||
__ bind(refill);
|
||||
const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
|
||||
__ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
|
||||
__ mflr(R0);
|
||||
__ std(R0, _abi0(lr), R1_SP);
|
||||
__ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1DirtyCardQueueSet::handle_zero_index_for_thread), R16_thread);
|
||||
__ pop_frame();
|
||||
__ ld(R0, _abi0(lr), R1_SP);
|
||||
__ mtlr(R0);
|
||||
__ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
|
||||
__ b(restart);
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2021 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -37,9 +37,7 @@
|
||||
class LIR_Assembler;
|
||||
class StubAssembler;
|
||||
class G1PreBarrierStub;
|
||||
class G1PostBarrierStub;
|
||||
class G1PreBarrierStubC2;
|
||||
class G1PostBarrierStubC2;
|
||||
|
||||
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
protected:
|
||||
@ -56,8 +54,7 @@ protected:
|
||||
MacroAssembler::PreservationLevel preservation_level);
|
||||
void g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
|
||||
Register store_addr, Register new_val,
|
||||
Register tmp1, Register tmp2, Register tmp3,
|
||||
MacroAssembler::PreservationLevel preservation_level);
|
||||
Register tmp1, Register tmp2);
|
||||
|
||||
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
Register base, RegisterOrConstant ind_or_offs, Register val,
|
||||
@ -79,17 +76,21 @@ public:
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* c2_stub,
|
||||
bool new_val_may_be_null,
|
||||
bool decode_new_val);
|
||||
void generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const;
|
||||
#endif
|
||||
#ifdef COMPILER1
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
||||
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
|
||||
|
||||
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
||||
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
|
||||
|
||||
void g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2);
|
||||
|
||||
#endif
|
||||
|
||||
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2025 SAP SE. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
@ -64,13 +64,13 @@ static void post_write_barrier(MacroAssembler* masm,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
bool decode_new_val = false) {
|
||||
if (!G1PostBarrierStubC2::needs_barrier(node)) {
|
||||
if (!G1BarrierStubC2::needs_post_barrier(node)) {
|
||||
return;
|
||||
}
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub, decode_new_val);
|
||||
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, new_val_may_be_null, decode_new_val);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
@ -87,15 +87,54 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
|
||||
}
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
||||
Register start, Register count, Register tmp, RegSet saved_regs) {
|
||||
__ push_reg(saved_regs, sp);
|
||||
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
Register start,
|
||||
Register count,
|
||||
Register tmp,
|
||||
RegSet saved_regs) {
|
||||
assert_different_registers(start, count, tmp);
|
||||
assert_different_registers(c_rarg0, count);
|
||||
__ mv(c_rarg0, start);
|
||||
__ mv(c_rarg1, count);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
|
||||
__ pop_reg(saved_regs, sp);
|
||||
|
||||
Label loop, next, done;
|
||||
|
||||
// Zero count? Nothing to do.
|
||||
__ beqz(count, done);
|
||||
|
||||
// Calculate the number of card marks to set. Since the object might start and
|
||||
// end within a card, we need to calculate this via the card table indexes of
|
||||
// the actual start and last addresses covered by the object.
|
||||
// Temporarily use the count register for the last element address.
|
||||
__ shadd(count, count, start, tmp, LogBytesPerHeapOop); // end = start + count << LogBytesPerHeapOop
|
||||
__ subi(count, count, BytesPerHeapOop); // Use last element address for end.
|
||||
|
||||
__ srli(start, start, CardTable::card_shift());
|
||||
__ srli(count, count, CardTable::card_shift());
|
||||
__ sub(count, count, start); // Number of bytes to mark - 1.
|
||||
|
||||
// Add card table base offset to start.
|
||||
Address card_table_address(xthread, G1ThreadLocalData::card_table_base_offset());
|
||||
__ ld(tmp, card_table_address);
|
||||
__ add(start, start, tmp);
|
||||
|
||||
__ bind(loop);
|
||||
if (UseCondCardMark) {
|
||||
__ add(tmp, start, count);
|
||||
__ lbu(tmp, Address(tmp, 0));
|
||||
static_assert((uint)G1CardTable::clean_card_val() == 0xff, "must be");
|
||||
__ subi(tmp, tmp, G1CardTable::clean_card_val()); // Convert to clean_card_value() to a comparison
|
||||
// against zero to avoid use of an extra temp.
|
||||
__ bnez(tmp, next);
|
||||
}
|
||||
|
||||
__ add(tmp, start, count);
|
||||
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
|
||||
__ sb(zr, Address(tmp, 0));
|
||||
|
||||
__ bind(next);
|
||||
__ subi(count, count, 1);
|
||||
__ bgez(count, loop);
|
||||
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
|
||||
@ -192,44 +231,37 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
const Register store_addr,
|
||||
const Register new_val,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
bool new_val_may_be_null) {
|
||||
// Does store cross heap regions?
|
||||
__ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
|
||||
__ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
|
||||
__ beqz(tmp1, done);
|
||||
// Crosses regions, storing null?
|
||||
if (new_val_may_be_null) {
|
||||
__ beqz(new_val, done);
|
||||
}
|
||||
// Storing region crossing non-null, is card young?
|
||||
__ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
|
||||
__ load_byte_map_base(tmp2); // tmp2 := card table base address
|
||||
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
|
||||
__ lbu(tmp2, Address(tmp1)); // tmp2 := card
|
||||
}
|
||||
|
||||
static void generate_post_barrier_slow_path(MacroAssembler* masm,
|
||||
const Register thread,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
Label& runtime) {
|
||||
__ membar(MacroAssembler::StoreLoad); // StoreLoad membar
|
||||
__ lbu(tmp2, Address(tmp1)); // tmp2 := card
|
||||
__ beqz(tmp2, done, true);
|
||||
// Storing a region crossing, non-null oop, card is clean.
|
||||
// Dirty card and log.
|
||||
STATIC_ASSERT(CardTable::dirty_card_val() == 0);
|
||||
__ sb(zr, Address(tmp1)); // *(card address) := dirty_card_val
|
||||
generate_queue_test_and_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
runtime,
|
||||
thread, tmp1, tmp2, t0);
|
||||
__ j(done);
|
||||
bool new_val_may_be_null) {
|
||||
assert(thread == xthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
|
||||
// Does store cross heap regions?
|
||||
__ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
|
||||
__ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
|
||||
__ beqz(tmp1, done);
|
||||
|
||||
// Crosses regions, storing null?
|
||||
if (new_val_may_be_null) {
|
||||
__ beqz(new_val, done);
|
||||
}
|
||||
// Storing region crossing non-null, is card clean?
|
||||
__ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
|
||||
|
||||
Address card_table_address(xthread, G1ThreadLocalData::card_table_base_offset());
|
||||
__ ld(tmp2, card_table_address); // tmp2 := card table base address
|
||||
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
|
||||
if (UseCondCardMark) {
|
||||
static_assert((uint)G1CardTable::clean_card_val() == 0xff, "must be");
|
||||
__ lbu(tmp2, Address(tmp1, 0)); // tmp2 := card
|
||||
__ subi(tmp2, tmp2, G1CardTable::clean_card_val()); // Convert to clean_card_value() to a comparison
|
||||
// against zero to avoid use of an extra temp.
|
||||
__ bnez(tmp2, done);
|
||||
}
|
||||
static_assert((uint)G1CardTable::dirty_card_val() == 0, "must be to use zr");
|
||||
__ sb(zr, Address(tmp1, 0));
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
||||
@ -238,27 +270,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
assert(thread == xthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
|
||||
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
|
||||
"expecting a register");
|
||||
|
||||
Label done;
|
||||
Label runtime;
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
// If card is young, jump to done (tmp2 holds the card value)
|
||||
__ mv(t0, (int)G1CardTable::g1_young_card_val());
|
||||
__ beq(tmp2, t0, done); // card == young_card_val?
|
||||
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
// save the live input values
|
||||
RegSet saved = RegSet::of(store_addr);
|
||||
__ push_reg(saved, sp);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
|
||||
__ pop_reg(saved, sp);
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
@ -318,37 +331,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* stub) {
|
||||
assert(thread == xthread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
|
||||
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
|
||||
"expecting a register");
|
||||
|
||||
stub->initialize_registers(thread, tmp1, tmp2);
|
||||
|
||||
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
|
||||
// If card is not young, jump to stub (slow path) (tmp2 holds the card value)
|
||||
__ mv(t0, (int)G1CardTable::g1_young_card_val());
|
||||
__ bne(tmp2, t0, *stub->entry(), true);
|
||||
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const {
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
Label runtime;
|
||||
Register thread = stub->thread();
|
||||
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
|
||||
Register tmp2 = stub->tmp2();
|
||||
|
||||
__ bind(*stub->entry());
|
||||
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
|
||||
__ j(*stub->continuation());
|
||||
bool new_val_may_be_null) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
@ -443,20 +429,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
|
||||
__ j(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
|
||||
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
||||
__ bind(*stub->entry());
|
||||
assert(stub->addr()->is_register(), "Precondition");
|
||||
assert(stub->new_val()->is_register(), "Precondition");
|
||||
Register new_val_reg = stub->new_val()->as_register();
|
||||
__ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
|
||||
ce->store_parameter(stub->addr()->as_pointer_register(), 0);
|
||||
__ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
|
||||
__ j(*stub->continuation());
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
masm->bind(done);
|
||||
}
|
||||
|
||||
#define __ sasm->
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
@ -507,74 +492,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
__ epilogue();
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
__ prologue("g1_post_barrier", false);
|
||||
|
||||
// arg0 : store_address
|
||||
Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
|
||||
|
||||
BarrierSet* bs = BarrierSet::barrier_set();
|
||||
CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
|
||||
|
||||
Label done;
|
||||
Label runtime;
|
||||
|
||||
// At this point we know new_value is non-null and the new_value crosses regions.
|
||||
// Must check to see if card is already dirty
|
||||
const Register thread = xthread;
|
||||
|
||||
Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
|
||||
Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
|
||||
|
||||
const Register card_offset = t1;
|
||||
// RA is free here, so we can use it to hold the byte_map_base.
|
||||
const Register byte_map_base = ra;
|
||||
|
||||
assert_different_registers(card_offset, byte_map_base, t0);
|
||||
|
||||
__ load_parameter(0, card_offset);
|
||||
__ srli(card_offset, card_offset, CardTable::card_shift());
|
||||
__ load_byte_map_base(byte_map_base);
|
||||
|
||||
// Convert card offset into an address in card_addr
|
||||
Register card_addr = card_offset;
|
||||
__ add(card_addr, byte_map_base, card_addr);
|
||||
|
||||
__ lbu(t0, Address(card_addr, 0));
|
||||
__ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
|
||||
__ beqz(t0, done);
|
||||
|
||||
assert((int)CardTable::dirty_card_val() == 0, "must be 0");
|
||||
|
||||
__ membar(MacroAssembler::StoreLoad);
|
||||
__ lbu(t0, Address(card_addr, 0));
|
||||
__ beqz(t0, done);
|
||||
|
||||
// storing region crossing non-null, card is clean.
|
||||
// dirty card and log.
|
||||
__ sb(zr, Address(card_addr, 0));
|
||||
|
||||
__ ld(t0, queue_index);
|
||||
__ beqz(t0, runtime);
|
||||
__ subi(t0, t0, wordSize);
|
||||
__ sd(t0, queue_index);
|
||||
|
||||
// Reuse RA to hold buffer_addr
|
||||
const Register buffer_addr = ra;
|
||||
|
||||
__ ld(buffer_addr, buffer);
|
||||
__ add(t0, buffer_addr, t0);
|
||||
__ sd(card_addr, Address(t0, 0));
|
||||
__ j(done);
|
||||
|
||||
__ bind(runtime);
|
||||
__ push_call_clobbered_registers();
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
|
||||
__ pop_call_clobbered_registers();
|
||||
__ bind(done);
|
||||
__ epilogue();
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -35,9 +35,7 @@ class LIR_Assembler;
|
||||
#endif
|
||||
class StubAssembler;
|
||||
class G1PreBarrierStub;
|
||||
class G1PostBarrierStub;
|
||||
class G1PreBarrierStubC2;
|
||||
class G1PostBarrierStubC2;
|
||||
|
||||
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
protected:
|
||||
@ -68,10 +66,16 @@ protected:
|
||||
public:
|
||||
#ifdef COMPILER1
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
||||
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
|
||||
|
||||
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
||||
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
|
||||
|
||||
void g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef COMPILER2
|
||||
@ -90,9 +94,7 @@ public:
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* c2_stub);
|
||||
void generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const;
|
||||
bool new_val_may_be_null);
|
||||
#endif
|
||||
|
||||
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
@ -63,13 +63,13 @@ static void write_barrier_post(MacroAssembler* masm,
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
if (!G1PostBarrierStubC2::needs_barrier(node)) {
|
||||
if (!G1BarrierStubC2::needs_post_barrier(node)) {
|
||||
return;
|
||||
}
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, stub);
|
||||
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, new_val_may_be_null);
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
@ -28,7 +28,6 @@
|
||||
#include "gc/g1/g1BarrierSetAssembler.hpp"
|
||||
#include "gc/g1/g1BarrierSetRuntime.hpp"
|
||||
#include "gc/g1/g1CardTable.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1HeapRegion.hpp"
|
||||
#include "gc/g1/g1SATBMarkQueueSet.hpp"
|
||||
#include "gc/g1/g1ThreadLocalData.hpp"
|
||||
@ -205,104 +204,71 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
|
||||
BLOCK_COMMENT("} generate_c2_pre_barrier_stub");
|
||||
}
|
||||
|
||||
static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
const Register store_addr,
|
||||
const Register new_val,
|
||||
const Register thread,
|
||||
const Register tmp1,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
bool new_val_may_be_null) {
|
||||
|
||||
__ block_comment("generate_post_barrier_fast_path {");
|
||||
|
||||
assert(thread == Z_thread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
|
||||
|
||||
// Does store cross heap regions?
|
||||
if (VM_Version::has_DistinctOpnds()) {
|
||||
__ z_xgrk(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
|
||||
} else {
|
||||
__ z_lgr(tmp1, store_addr);
|
||||
__ z_xgr(tmp1, new_val);
|
||||
}
|
||||
__ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
|
||||
__ branch_optimized(Assembler::bcondEqual, done);
|
||||
|
||||
// Crosses regions, storing null?
|
||||
if (new_val_may_be_null) {
|
||||
__ z_ltgr(new_val, new_val);
|
||||
__ z_bre(done);
|
||||
} else {
|
||||
#ifdef ASSERT
|
||||
__ z_ltgr(new_val, new_val);
|
||||
__ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
|
||||
#endif
|
||||
}
|
||||
|
||||
__ z_srag(tmp1, store_addr, CardTable::card_shift());
|
||||
|
||||
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
|
||||
__ z_alg(tmp1, card_table_addr); // tmp1 := card address
|
||||
|
||||
if(UseCondCardMark) {
|
||||
__ z_cli(0, tmp1, G1CardTable::clean_card_val());
|
||||
__ branch_optimized(Assembler::bcondNotEqual, done);
|
||||
}
|
||||
|
||||
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use z_mvi");
|
||||
__ z_mvi(0, tmp1, G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
|
||||
|
||||
__ block_comment("} generate_post_barrier_fast_path");
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* stub) {
|
||||
bool new_val_may_be_null) {
|
||||
BLOCK_COMMENT("g1_write_barrier_post_c2 {");
|
||||
|
||||
assert(thread == Z_thread, "must be");
|
||||
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, Z_R1_scratch);
|
||||
|
||||
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
|
||||
|
||||
stub->initialize_registers(thread, tmp1, tmp2);
|
||||
|
||||
BLOCK_COMMENT("generate_region_crossing_test {");
|
||||
if (VM_Version::has_DistinctOpnds()) {
|
||||
__ z_xgrk(tmp1, store_addr, new_val);
|
||||
} else {
|
||||
__ z_lgr(tmp1, store_addr);
|
||||
__ z_xgr(tmp1, new_val);
|
||||
}
|
||||
__ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
|
||||
__ branch_optimized(Assembler::bcondEqual, *stub->continuation());
|
||||
BLOCK_COMMENT("} generate_region_crossing_test");
|
||||
|
||||
// crosses regions, storing null?
|
||||
if ((stub->barrier_data() & G1C2BarrierPostNotNull) == 0) {
|
||||
__ z_ltgr(new_val, new_val);
|
||||
__ branch_optimized(Assembler::bcondEqual, *stub->continuation());
|
||||
}
|
||||
|
||||
BLOCK_COMMENT("generate_card_young_test {");
|
||||
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
|
||||
// calculate address of card
|
||||
__ load_const_optimized(tmp2, (address)ct->card_table()->byte_map_base()); // Card table base.
|
||||
__ z_srlg(tmp1, store_addr, CardTable::card_shift()); // Index into card table.
|
||||
__ z_algr(tmp1, tmp2); // Explicit calculation needed for cli.
|
||||
|
||||
// Filter young.
|
||||
__ z_cli(0, tmp1, G1CardTable::g1_young_card_val());
|
||||
|
||||
BLOCK_COMMENT("} generate_card_young_test");
|
||||
|
||||
// From here on, tmp1 holds the card address.
|
||||
__ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
|
||||
|
||||
__ bind(*stub->continuation());
|
||||
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
|
||||
__ bind(done);
|
||||
BLOCK_COMMENT("} g1_write_barrier_post_c2");
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const {
|
||||
|
||||
BLOCK_COMMENT("generate_c2_post_barrier_stub {");
|
||||
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
Label runtime;
|
||||
|
||||
Register thread = stub->thread();
|
||||
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
|
||||
Register tmp2 = stub->tmp2();
|
||||
Register Rcard_addr = tmp1;
|
||||
|
||||
__ bind(*stub->entry());
|
||||
|
||||
BLOCK_COMMENT("generate_card_clean_test {");
|
||||
__ z_sync(); // Required to support concurrent cleaning.
|
||||
__ z_cli(0, Rcard_addr, 0); // Reload after membar.
|
||||
__ branch_optimized(Assembler::bcondEqual, *stub->continuation());
|
||||
BLOCK_COMMENT("} generate_card_clean_test");
|
||||
|
||||
BLOCK_COMMENT("generate_dirty_card {");
|
||||
// Storing a region crossing, non-null oop, card is clean.
|
||||
// Dirty card and log.
|
||||
STATIC_ASSERT(CardTable::dirty_card_val() == 0);
|
||||
__ z_mvi(0, Rcard_addr, CardTable::dirty_card_val());
|
||||
BLOCK_COMMENT("} generate_dirty_card");
|
||||
|
||||
generate_queue_test_and_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
runtime,
|
||||
Z_thread, tmp1, tmp2);
|
||||
|
||||
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
|
||||
|
||||
__ bind(runtime);
|
||||
|
||||
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
|
||||
|
||||
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
|
||||
|
||||
BLOCK_COMMENT("} generate_c2_post_barrier_stub");
|
||||
}
|
||||
|
||||
#endif //COMPILER2
|
||||
|
||||
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
@ -451,99 +417,9 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
|
||||
Register Rtmp1, Register Rtmp2, Register Rtmp3) {
|
||||
bool not_null = (decorators & IS_NOT_NULL) != 0;
|
||||
|
||||
assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
|
||||
|
||||
Label callRuntime, filtered;
|
||||
|
||||
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
|
||||
|
||||
BLOCK_COMMENT("g1_write_barrier_post {");
|
||||
|
||||
// Does store cross heap regions?
|
||||
// It does if the two addresses specify different grain addresses.
|
||||
if (VM_Version::has_DistinctOpnds()) {
|
||||
__ z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
|
||||
} else {
|
||||
__ z_lgr(Rtmp1, Rstore_addr);
|
||||
__ z_xgr(Rtmp1, Rnew_val);
|
||||
}
|
||||
__ z_srag(Rtmp1, Rtmp1, G1HeapRegion::LogOfHRGrainBytes);
|
||||
__ z_bre(filtered);
|
||||
|
||||
// Crosses regions, storing null?
|
||||
if (not_null) {
|
||||
#ifdef ASSERT
|
||||
__ z_ltgr(Rnew_val, Rnew_val);
|
||||
__ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
|
||||
#endif
|
||||
} else {
|
||||
__ z_ltgr(Rnew_val, Rnew_val);
|
||||
__ z_bre(filtered);
|
||||
}
|
||||
|
||||
Rnew_val = noreg; // end of lifetime
|
||||
|
||||
// Storing region crossing non-null, is card already dirty?
|
||||
assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
|
||||
// Make sure not to use Z_R0 for any of these registers.
|
||||
Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
|
||||
Register Rbase = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
|
||||
|
||||
// calculate address of card
|
||||
__ load_const_optimized(Rbase, (address)ct->card_table()->byte_map_base()); // Card table base.
|
||||
__ z_srlg(Rcard_addr, Rstore_addr, CardTable::card_shift()); // Index into card table.
|
||||
__ z_algr(Rcard_addr, Rbase); // Explicit calculation needed for cli.
|
||||
Rbase = noreg; // end of lifetime
|
||||
|
||||
// Filter young.
|
||||
__ z_cli(0, Rcard_addr, G1CardTable::g1_young_card_val());
|
||||
__ z_bre(filtered);
|
||||
|
||||
// Check the card value. If dirty, we're done.
|
||||
// This also avoids false sharing of the (already dirty) card.
|
||||
__ z_sync(); // Required to support concurrent cleaning.
|
||||
__ z_cli(0, Rcard_addr, G1CardTable::dirty_card_val()); // Reload after membar.
|
||||
__ z_bre(filtered);
|
||||
|
||||
// Storing a region crossing, non-null oop, card is clean.
|
||||
// Dirty card and log.
|
||||
__ z_mvi(0, Rcard_addr, G1CardTable::dirty_card_val());
|
||||
|
||||
Register Rcard_addr_x = Rcard_addr;
|
||||
Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
|
||||
if (Rcard_addr == Rqueue_index) {
|
||||
Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0.
|
||||
}
|
||||
__ lgr_if_needed(Rcard_addr_x, Rcard_addr);
|
||||
|
||||
generate_queue_test_and_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
callRuntime,
|
||||
Z_thread, Rcard_addr_x, Rqueue_index);
|
||||
__ z_bru(filtered);
|
||||
|
||||
__ bind(callRuntime);
|
||||
|
||||
// TODO: do we need a frame? Introduced to be on the safe side.
|
||||
bool needs_frame = true;
|
||||
__ lgr_if_needed(Rcard_addr, Rcard_addr_x); // copy back asap. push_frame will destroy Z_R0_scratch!
|
||||
|
||||
// VM call need frame to access(write) O register.
|
||||
if (needs_frame) {
|
||||
__ save_return_pc();
|
||||
__ push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
|
||||
}
|
||||
|
||||
// Save the live input values.
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, Z_thread);
|
||||
|
||||
if (needs_frame) {
|
||||
__ pop_frame();
|
||||
__ restore_return_pc();
|
||||
}
|
||||
|
||||
__ bind(filtered);
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, Rstore_addr, Rnew_val, Z_thread, Rtmp1, Rtmp2, done, !not_null);
|
||||
__ bind(done);
|
||||
|
||||
BLOCK_COMMENT("} g1_write_barrier_post");
|
||||
}
|
||||
@ -615,22 +491,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
|
||||
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
|
||||
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
||||
__ bind(*stub->entry());
|
||||
ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots.
|
||||
assert(stub->addr()->is_register(), "Precondition.");
|
||||
assert(stub->new_val()->is_register(), "Precondition.");
|
||||
Register new_val_reg = stub->new_val()->as_register();
|
||||
__ z_ltgr(new_val_reg, new_val_reg);
|
||||
__ branch_optimized(Assembler::bcondZero, *stub->continuation());
|
||||
__ z_lgr(Z_R1_scratch, stub->addr()->as_pointer_register());
|
||||
ce->emit_call_c(bs->post_barrier_c1_runtime_code_blob()->code_begin());
|
||||
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
|
||||
masm->bind(done);
|
||||
}
|
||||
|
||||
#define __ sasm->
|
||||
|
||||
static OopMap* save_volatile_registers(StubAssembler* sasm, Register return_pc = Z_R14) {
|
||||
@ -705,92 +578,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
__ z_bru(restart);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
// Z_R1_scratch: oop address, address of updated memory slot
|
||||
|
||||
BarrierSet* bs = BarrierSet::barrier_set();
|
||||
__ set_info("g1_post_barrier_slow_id", false);
|
||||
|
||||
Register addr_oop = Z_R1_scratch;
|
||||
Register addr_card = Z_R1_scratch;
|
||||
Register r1 = Z_R6; // Must be saved/restored.
|
||||
Register r2 = Z_R7; // Must be saved/restored.
|
||||
Register cardtable = r1; // Must be non-volatile, because it is used to save addr_card.
|
||||
CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
|
||||
CardTable* ct = ctbs->card_table();
|
||||
CardTable::CardValue* byte_map_base = ct->byte_map_base();
|
||||
|
||||
// Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
|
||||
__ z_stg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
|
||||
|
||||
Label not_already_dirty, restart, refill, young_card;
|
||||
|
||||
// Calculate address of card corresponding to the updated oop slot.
|
||||
AddressLiteral rs(byte_map_base);
|
||||
__ z_srlg(addr_card, addr_oop, CardTable::card_shift());
|
||||
addr_oop = noreg; // dead now
|
||||
__ load_const_optimized(cardtable, rs); // cardtable := <card table base>
|
||||
__ z_agr(addr_card, cardtable); // addr_card := addr_oop>>card_shift + cardtable
|
||||
|
||||
__ z_cli(0, addr_card, (int)G1CardTable::g1_young_card_val());
|
||||
__ z_bre(young_card);
|
||||
|
||||
__ z_sync(); // Required to support concurrent cleaning.
|
||||
|
||||
__ z_cli(0, addr_card, (int)CardTable::dirty_card_val());
|
||||
__ z_brne(not_already_dirty);
|
||||
|
||||
__ bind(young_card);
|
||||
// We didn't take the branch, so we're already dirty: restore
|
||||
// used registers and return.
|
||||
__ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
|
||||
__ z_br(Z_R14);
|
||||
|
||||
// Not dirty.
|
||||
__ bind(not_already_dirty);
|
||||
|
||||
// First, dirty it: [addr_card] := 0
|
||||
__ z_mvi(0, addr_card, CardTable::dirty_card_val());
|
||||
|
||||
Register idx = cardtable; // Must be non-volatile, because it is used to save addr_card.
|
||||
Register buf = r2;
|
||||
cardtable = noreg; // now dead
|
||||
|
||||
// Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
|
||||
__ z_stg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
|
||||
|
||||
ByteSize dirty_card_q_index_byte_offset = G1ThreadLocalData::dirty_card_queue_index_offset();
|
||||
ByteSize dirty_card_q_buf_byte_offset = G1ThreadLocalData::dirty_card_queue_buffer_offset();
|
||||
|
||||
__ bind(restart);
|
||||
|
||||
// Get the index into the update buffer. G1DirtyCardQueue::_index is
|
||||
// a size_t so z_ltg is appropriate here.
|
||||
__ z_ltg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
|
||||
|
||||
// index == 0?
|
||||
__ z_brz(refill);
|
||||
|
||||
__ z_lg(buf, Address(Z_thread, dirty_card_q_buf_byte_offset));
|
||||
__ add2reg(idx, -oopSize);
|
||||
|
||||
__ z_stg(addr_card, 0, idx, buf); // [_buf + index] := <address_of_card>
|
||||
__ z_stg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
|
||||
// Restore killed registers and return.
|
||||
__ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
|
||||
__ z_lg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
|
||||
__ z_br(Z_R14);
|
||||
|
||||
__ bind(refill);
|
||||
save_volatile_registers(sasm);
|
||||
__ z_lgr(idx, addr_card); // Save addr_card, tmp3 must be non-volatile.
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1DirtyCardQueueSet::handle_zero_index_for_thread),
|
||||
Z_thread);
|
||||
__ z_lgr(addr_card, idx);
|
||||
restore_volatile_registers(sasm); // Restore addr_card.
|
||||
__ z_bru(restart);
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2024 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -33,9 +33,7 @@
|
||||
class LIR_Assembler;
|
||||
class StubAssembler;
|
||||
class G1PreBarrierStub;
|
||||
class G1PostBarrierStub;
|
||||
class G1PreBarrierStubC2;
|
||||
class G1PostBarrierStubC2;
|
||||
|
||||
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
protected:
|
||||
@ -60,10 +58,16 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
public:
|
||||
#ifdef COMPILER1
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
||||
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
|
||||
|
||||
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
||||
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
|
||||
|
||||
void g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2);
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
#ifdef COMPILER2
|
||||
@ -81,9 +85,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* c2_stub);
|
||||
void generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const;
|
||||
bool new_val_may_be_null);
|
||||
#endif // COMPILER2
|
||||
|
||||
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright 2024 IBM Corporation. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
@ -62,13 +62,13 @@ static void write_barrier_post(MacroAssembler* masm,
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
if (!G1PostBarrierStubC2::needs_barrier(node)) {
|
||||
if (!G1BarrierStubC2::needs_post_barrier(node)) {
|
||||
return;
|
||||
}
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, stub);
|
||||
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, new_val_may_be_null);
|
||||
}
|
||||
|
||||
%} // source
|
||||
|
||||
@ -89,19 +89,53 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
|
||||
|
||||
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
||||
Register addr, Register count, Register tmp) {
|
||||
__ push_call_clobbered_registers(false /* save_fpu */);
|
||||
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
|
||||
assert_different_registers(c_rarg1, addr);
|
||||
__ mov(c_rarg1, count);
|
||||
__ mov(c_rarg0, addr);
|
||||
} else {
|
||||
assert_different_registers(c_rarg0, count);
|
||||
__ mov(c_rarg0, addr);
|
||||
__ mov(c_rarg1, count);
|
||||
}
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
|
||||
__ pop_call_clobbered_registers(false /* save_fpu */);
|
||||
Label done;
|
||||
|
||||
__ testptr(count, count);
|
||||
__ jcc(Assembler::zero, done);
|
||||
|
||||
// Calculate end address in "count".
|
||||
Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8;
|
||||
__ leaq(count, Address(addr, count, scale));
|
||||
|
||||
// Calculate start card address in "addr".
|
||||
__ shrptr(addr, CardTable::card_shift());
|
||||
|
||||
Register thread = r15_thread;
|
||||
|
||||
__ movptr(tmp, Address(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
|
||||
__ addptr(addr, tmp);
|
||||
|
||||
// Calculate address of card of last word in the array.
|
||||
__ subptr(count, 1);
|
||||
__ shrptr(count, CardTable::card_shift());
|
||||
__ addptr(count, tmp);
|
||||
|
||||
Label loop;
|
||||
// Iterate from start card to end card (inclusive).
|
||||
__ bind(loop);
|
||||
|
||||
Label is_clean_card;
|
||||
if (UseCondCardMark) {
|
||||
__ cmpb(Address(addr, 0), G1CardTable::clean_card_val());
|
||||
__ jcc(Assembler::equal, is_clean_card);
|
||||
} else {
|
||||
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
|
||||
}
|
||||
|
||||
Label next_card;
|
||||
__ bind(next_card);
|
||||
__ addptr(addr, sizeof(CardTable::CardValue));
|
||||
__ cmpptr(addr, count);
|
||||
__ jcc(Assembler::belowEqual, loop);
|
||||
__ jmp(done);
|
||||
|
||||
__ bind(is_clean_card);
|
||||
// Card was clean. Dirty card and go to next..
|
||||
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
|
||||
__ jmp(next_card);
|
||||
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
@ -182,7 +216,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
// If expand_call is true then we expand the call_VM_leaf macro
|
||||
// directly to skip generating the check by
|
||||
// InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
|
||||
|
||||
const Register thread = r15_thread;
|
||||
|
||||
Label done;
|
||||
@ -238,73 +271,46 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
||||
static void generate_post_barrier_fast_path(MacroAssembler* masm,
|
||||
const Register store_addr,
|
||||
const Register new_val,
|
||||
const Register tmp,
|
||||
const Register tmp2,
|
||||
const Register tmp1,
|
||||
Label& done,
|
||||
bool new_val_may_be_null) {
|
||||
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
|
||||
|
||||
assert_different_registers(store_addr, new_val, tmp1, noreg);
|
||||
|
||||
Register thread = r15_thread;
|
||||
|
||||
// Does store cross heap regions?
|
||||
__ movptr(tmp, store_addr); // tmp := store address
|
||||
__ xorptr(tmp, new_val); // tmp := store address ^ new value
|
||||
__ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
|
||||
__ movptr(tmp1, store_addr); // tmp1 := store address
|
||||
__ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
|
||||
__ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
|
||||
__ jcc(Assembler::equal, done);
|
||||
|
||||
// Crosses regions, storing null?
|
||||
if (new_val_may_be_null) {
|
||||
__ cmpptr(new_val, NULL_WORD); // new value == null?
|
||||
__ cmpptr(new_val, NULL_WORD); // new value == null?
|
||||
__ jcc(Assembler::equal, done);
|
||||
}
|
||||
// Storing region crossing non-null, is card young?
|
||||
__ movptr(tmp, store_addr); // tmp := store address
|
||||
__ shrptr(tmp, CardTable::card_shift()); // tmp := card address relative to card table base
|
||||
// Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
|
||||
// a valid address and therefore is not properly handled by the relocation code.
|
||||
__ movptr(tmp2, (intptr_t)ct->card_table()->byte_map_base()); // tmp2 := card table base address
|
||||
__ addptr(tmp, tmp2); // tmp := card address
|
||||
__ cmpb(Address(tmp, 0), G1CardTable::g1_young_card_val()); // *(card address) == young_card_val?
|
||||
}
|
||||
|
||||
static void generate_post_barrier_slow_path(MacroAssembler* masm,
|
||||
const Register thread,
|
||||
const Register tmp,
|
||||
const Register tmp2,
|
||||
Label& done,
|
||||
Label& runtime) {
|
||||
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); // StoreLoad membar
|
||||
__ cmpb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) == dirty_card_val?
|
||||
__ jcc(Assembler::equal, done);
|
||||
__ movptr(tmp1, store_addr); // tmp1 := store address
|
||||
__ shrptr(tmp1, CardTable::card_shift()); // tmp1 := card address relative to card table base
|
||||
|
||||
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
|
||||
__ addptr(tmp1, card_table_addr); // tmp1 := card address
|
||||
if (UseCondCardMark) {
|
||||
__ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val?
|
||||
__ jcc(Assembler::notEqual, done);
|
||||
}
|
||||
// Storing a region crossing, non-null oop, card is clean.
|
||||
// Dirty card and log.
|
||||
__ movb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
|
||||
generate_queue_insertion(masm,
|
||||
G1ThreadLocalData::dirty_card_queue_index_offset(),
|
||||
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
|
||||
runtime,
|
||||
thread, tmp, tmp2);
|
||||
__ jmp(done);
|
||||
// Dirty card.
|
||||
__ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp,
|
||||
Register tmp2) {
|
||||
const Register thread = r15_thread;
|
||||
|
||||
Register tmp) {
|
||||
Label done;
|
||||
Label runtime;
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, done, true /* new_val_may_be_null */);
|
||||
// If card is young, jump to done
|
||||
__ jcc(Assembler::equal, done);
|
||||
generate_post_barrier_slow_path(masm, thread, tmp, tmp2, done, runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
// save the live input values
|
||||
RegSet saved = RegSet::of(store_addr);
|
||||
__ push_set(saved);
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread);
|
||||
__ pop_set(saved);
|
||||
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
@ -367,34 +373,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* stub) {
|
||||
const Register thread = r15_thread;
|
||||
stub->initialize_registers(thread, tmp, tmp2);
|
||||
|
||||
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, *stub->continuation(), new_val_may_be_null);
|
||||
// If card is not young, jump to stub (slow path)
|
||||
__ jcc(Assembler::notEqual, *stub->entry());
|
||||
|
||||
__ bind(*stub->continuation());
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const {
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
Label runtime;
|
||||
Register thread = stub->thread();
|
||||
Register tmp = stub->tmp1(); // tmp holds the card address.
|
||||
Register tmp2 = stub->tmp2();
|
||||
assert(stub->tmp3() == noreg, "not needed in this platform");
|
||||
|
||||
__ bind(*stub->entry());
|
||||
generate_post_barrier_slow_path(masm, thread, tmp, tmp2, *stub->continuation(), runtime);
|
||||
|
||||
__ bind(runtime);
|
||||
generate_c2_barrier_runtime_call(masm, stub, tmp, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
|
||||
__ jmp(*stub->continuation());
|
||||
bool new_val_may_be_null) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, done, new_val_may_be_null);
|
||||
__ bind(done);
|
||||
}
|
||||
|
||||
#endif // COMPILER2
|
||||
@ -441,8 +423,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
|
||||
g1_write_barrier_post(masm /*masm*/,
|
||||
tmp1 /* store_adr */,
|
||||
new_val /* new_val */,
|
||||
tmp3 /* tmp */,
|
||||
tmp2 /* tmp2 */);
|
||||
tmp3 /* tmp */);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -476,21 +457,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
|
||||
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
|
||||
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
||||
__ bind(*stub->entry());
|
||||
assert(stub->addr()->is_register(), "Precondition.");
|
||||
assert(stub->new_val()->is_register(), "Precondition.");
|
||||
Register new_val_reg = stub->new_val()->as_register();
|
||||
__ cmpptr(new_val_reg, NULL_WORD);
|
||||
__ jcc(Assembler::equal, *stub->continuation());
|
||||
ce->store_parameter(stub->addr()->as_pointer_register(), 0);
|
||||
__ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
|
||||
__ jmp(*stub->continuation());
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2 /* unused on x86 */) {
|
||||
Label done;
|
||||
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */);
|
||||
masm->bind(done);
|
||||
}
|
||||
|
||||
#define __ sasm->
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
@ -555,78 +534,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
|
||||
__ epilogue();
|
||||
}
|
||||
|
||||
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
|
||||
__ prologue("g1_post_barrier", false);
|
||||
|
||||
CardTableBarrierSet* ct =
|
||||
barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
|
||||
|
||||
Label done;
|
||||
Label enqueued;
|
||||
Label runtime;
|
||||
|
||||
// At this point we know new_value is non-null and the new_value crosses regions.
|
||||
// Must check to see if card is already dirty
|
||||
|
||||
const Register thread = r15_thread;
|
||||
|
||||
Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
|
||||
Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
|
||||
|
||||
__ push_ppx(rax);
|
||||
__ push_ppx(rcx);
|
||||
|
||||
const Register cardtable = rax;
|
||||
const Register card_addr = rcx;
|
||||
|
||||
__ load_parameter(0, card_addr);
|
||||
__ shrptr(card_addr, CardTable::card_shift());
|
||||
// Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
|
||||
// a valid address and therefore is not properly handled by the relocation code.
|
||||
__ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
|
||||
__ addptr(card_addr, cardtable);
|
||||
|
||||
__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
|
||||
__ jcc(Assembler::equal, done);
|
||||
|
||||
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
|
||||
__ cmpb(Address(card_addr, 0), CardTable::dirty_card_val());
|
||||
__ jcc(Assembler::equal, done);
|
||||
|
||||
// storing region crossing non-null, card is clean.
|
||||
// dirty card and log.
|
||||
|
||||
__ movb(Address(card_addr, 0), CardTable::dirty_card_val());
|
||||
|
||||
const Register tmp = rdx;
|
||||
__ push_ppx(rdx);
|
||||
|
||||
__ movptr(tmp, queue_index);
|
||||
__ testptr(tmp, tmp);
|
||||
__ jcc(Assembler::zero, runtime);
|
||||
__ subptr(tmp, wordSize);
|
||||
__ movptr(queue_index, tmp);
|
||||
__ addptr(tmp, buffer);
|
||||
__ movptr(Address(tmp, 0), card_addr);
|
||||
__ jmp(enqueued);
|
||||
|
||||
__ bind(runtime);
|
||||
__ push_call_clobbered_registers();
|
||||
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
|
||||
|
||||
__ pop_call_clobbered_registers();
|
||||
|
||||
__ bind(enqueued);
|
||||
__ pop_ppx(rdx);
|
||||
|
||||
__ bind(done);
|
||||
__ pop_ppx(rcx);
|
||||
__ pop_ppx(rax);
|
||||
|
||||
__ epilogue();
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
#endif // COMPILER1
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -31,10 +31,8 @@
|
||||
class LIR_Assembler;
|
||||
class StubAssembler;
|
||||
class G1PreBarrierStub;
|
||||
class G1PostBarrierStub;
|
||||
class G1BarrierStubC2;
|
||||
class G1PreBarrierStubC2;
|
||||
class G1PostBarrierStubC2;
|
||||
|
||||
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
protected:
|
||||
@ -51,22 +49,28 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
void g1_write_barrier_post(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp,
|
||||
Register tmp2);
|
||||
Register tmp);
|
||||
|
||||
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
public:
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
||||
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
|
||||
|
||||
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
||||
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
|
||||
|
||||
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
Register dst, Address src, Register tmp1);
|
||||
|
||||
#ifdef COMPILER1
|
||||
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
||||
|
||||
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
||||
|
||||
void g1_write_barrier_post_c1(MacroAssembler* masm,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register thread,
|
||||
Register tmp1,
|
||||
Register tmp2);
|
||||
#endif
|
||||
|
||||
#ifdef COMPILER2
|
||||
void g1_write_barrier_pre_c2(MacroAssembler* masm,
|
||||
Register obj,
|
||||
@ -79,10 +83,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp,
|
||||
Register tmp2,
|
||||
G1PostBarrierStubC2* c2_stub);
|
||||
void generate_c2_post_barrier_stub(MacroAssembler* masm,
|
||||
G1PostBarrierStubC2* stub) const;
|
||||
bool new_val_may_be_null);
|
||||
#endif // COMPILER2
|
||||
};
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -59,15 +59,14 @@ static void write_barrier_post(MacroAssembler* masm,
|
||||
const MachNode* node,
|
||||
Register store_addr,
|
||||
Register new_val,
|
||||
Register tmp1,
|
||||
Register tmp2) {
|
||||
if (!G1PostBarrierStubC2::needs_barrier(node)) {
|
||||
Register tmp1) {
|
||||
if (!G1BarrierStubC2::needs_post_barrier(node)) {
|
||||
return;
|
||||
}
|
||||
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
|
||||
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub);
|
||||
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
|
||||
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, new_val_may_be_null);
|
||||
}
|
||||
|
||||
%}
|
||||
@ -95,8 +94,7 @@ instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3,
|
||||
write_barrier_post(masm, this,
|
||||
$tmp1$$Register /* store_addr */,
|
||||
$src$$Register /* new_val */,
|
||||
$tmp3$$Register /* tmp1 */,
|
||||
$tmp2$$Register /* tmp2 */);
|
||||
$tmp3$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(ialu_mem_reg);
|
||||
%}
|
||||
@ -127,8 +125,7 @@ instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFl
|
||||
write_barrier_post(masm, this,
|
||||
$tmp1$$Register /* store_addr */,
|
||||
$tmp2$$Register /* new_val */,
|
||||
$tmp3$$Register /* tmp1 */,
|
||||
$tmp2$$Register /* tmp2 */);
|
||||
$tmp3$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(ialu_mem_reg);
|
||||
%}
|
||||
@ -158,8 +155,7 @@ instruct g1EncodePAndStoreN(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rR
|
||||
write_barrier_post(masm, this,
|
||||
$tmp1$$Register /* store_addr */,
|
||||
$src$$Register /* new_val */,
|
||||
$tmp3$$Register /* tmp1 */,
|
||||
$tmp2$$Register /* tmp2 */);
|
||||
$tmp3$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(ialu_mem_reg);
|
||||
%}
|
||||
@ -187,8 +183,7 @@ instruct g1CompareAndExchangeP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp
|
||||
write_barrier_post(masm, this,
|
||||
$mem$$Register /* store_addr */,
|
||||
$tmp1$$Register /* new_val */,
|
||||
$tmp2$$Register /* tmp1 */,
|
||||
$tmp3$$Register /* tmp2 */);
|
||||
$tmp2$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
@ -214,8 +209,7 @@ instruct g1CompareAndExchangeN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp
|
||||
write_barrier_post(masm, this,
|
||||
$mem$$Register /* store_addr */,
|
||||
$tmp1$$Register /* new_val */,
|
||||
$tmp2$$Register /* tmp1 */,
|
||||
$tmp3$$Register /* tmp2 */);
|
||||
$tmp2$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
@ -246,8 +240,7 @@ instruct g1CompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp1, rR
|
||||
write_barrier_post(masm, this,
|
||||
$mem$$Register /* store_addr */,
|
||||
$tmp1$$Register /* new_val */,
|
||||
$tmp2$$Register /* tmp1 */,
|
||||
$tmp3$$Register /* tmp2 */);
|
||||
$tmp2$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
@ -279,8 +272,7 @@ instruct g1CompareAndSwapN(rRegI res, indirect mem, rRegN newval, rRegP tmp1, rR
|
||||
write_barrier_post(masm, this,
|
||||
$mem$$Register /* store_addr */,
|
||||
$tmp1$$Register /* new_val */,
|
||||
$tmp2$$Register /* tmp1 */,
|
||||
$tmp3$$Register /* tmp2 */);
|
||||
$tmp2$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
@ -303,8 +295,7 @@ instruct g1GetAndSetP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP
|
||||
write_barrier_post(masm, this,
|
||||
$mem$$Register /* store_addr */,
|
||||
$tmp1$$Register /* new_val */,
|
||||
$tmp2$$Register /* tmp1 */,
|
||||
$tmp3$$Register /* tmp2 */);
|
||||
$tmp2$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
@ -328,8 +319,7 @@ instruct g1GetAndSetN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP
|
||||
write_barrier_post(masm, this,
|
||||
$mem$$Register /* store_addr */,
|
||||
$tmp1$$Register /* new_val */,
|
||||
$tmp2$$Register /* tmp1 */,
|
||||
$tmp3$$Register /* tmp2 */);
|
||||
$tmp2$$Register /* tmp1 */);
|
||||
%}
|
||||
ins_pipe(pipe_cmpxchg);
|
||||
%}
|
||||
|
||||
@ -1365,7 +1365,6 @@ void AOTCodeAddressTable::init_extrs() {
|
||||
#endif // COMPILER2
|
||||
|
||||
#if INCLUDE_G1GC
|
||||
SET_ADDRESS(_extrs, G1BarrierSetRuntime::write_ref_field_post_entry);
|
||||
SET_ADDRESS(_extrs, G1BarrierSetRuntime::write_ref_field_pre_entry);
|
||||
#endif
|
||||
#if INCLUDE_SHENANDOAHGC
|
||||
|
||||
@ -23,12 +23,15 @@
|
||||
*/
|
||||
|
||||
#include "c1/c1_CodeStubs.hpp"
|
||||
#include "c1/c1_LIRAssembler.hpp"
|
||||
#include "c1/c1_LIRGenerator.hpp"
|
||||
#include "c1/c1_MacroAssembler.hpp"
|
||||
#include "gc/g1/c1/g1BarrierSetC1.hpp"
|
||||
#include "gc/g1/g1BarrierSet.hpp"
|
||||
#include "gc/g1/g1BarrierSetAssembler.hpp"
|
||||
#include "gc/g1/g1HeapRegion.hpp"
|
||||
#include "gc/g1/g1ThreadLocalData.hpp"
|
||||
#include "utilities/formatBuffer.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
#ifdef ASSERT
|
||||
@ -42,11 +45,6 @@ void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
|
||||
bs->gen_pre_barrier_stub(ce, this);
|
||||
}
|
||||
|
||||
void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
|
||||
G1BarrierSetAssembler* bs = (G1BarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->gen_post_barrier_stub(ce, this);
|
||||
}
|
||||
|
||||
void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
|
||||
LIR_Opr pre_val, CodeEmitInfo* info) {
|
||||
LIRGenerator* gen = access.gen();
|
||||
@ -114,6 +112,87 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
|
||||
__ branch_destination(slow->continuation());
|
||||
}
|
||||
|
||||
class LIR_OpG1PostBarrier : public LIR_Op {
|
||||
friend class LIR_OpVisitState;
|
||||
|
||||
private:
|
||||
LIR_Opr _addr;
|
||||
LIR_Opr _new_val;
|
||||
LIR_Opr _thread;
|
||||
LIR_Opr _tmp1;
|
||||
LIR_Opr _tmp2;
|
||||
|
||||
public:
|
||||
LIR_OpG1PostBarrier(LIR_Opr addr,
|
||||
LIR_Opr new_val,
|
||||
LIR_Opr thread,
|
||||
LIR_Opr tmp1,
|
||||
LIR_Opr tmp2)
|
||||
: LIR_Op(lir_none, lir_none, nullptr),
|
||||
_addr(addr),
|
||||
_new_val(new_val),
|
||||
_thread(thread),
|
||||
_tmp1(tmp1),
|
||||
_tmp2(tmp2)
|
||||
{}
|
||||
|
||||
virtual void visit(LIR_OpVisitState* state) {
|
||||
state->do_input(_addr);
|
||||
state->do_input(_new_val);
|
||||
state->do_input(_thread);
|
||||
|
||||
// Use temps to enforce different registers.
|
||||
state->do_temp(_addr);
|
||||
state->do_temp(_new_val);
|
||||
state->do_temp(_thread);
|
||||
state->do_temp(_tmp1);
|
||||
state->do_temp(_tmp2);
|
||||
|
||||
if (_info != nullptr) {
|
||||
state->do_info(_info);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void emit_code(LIR_Assembler* ce) {
|
||||
if (_info != nullptr) {
|
||||
ce->add_debug_info_for_null_check_here(_info);
|
||||
}
|
||||
|
||||
Register addr = _addr->as_pointer_register();
|
||||
Register new_val = _new_val->as_pointer_register();
|
||||
Register thread = _thread->as_pointer_register();
|
||||
Register tmp1 = _tmp1->as_pointer_register();
|
||||
Register tmp2 = _tmp2->as_pointer_register();
|
||||
|
||||
// This may happen for a store of x.a = x - we do not need a post barrier for those
|
||||
// as the cross-region test will always exit early anyway.
|
||||
// The post barrier implementations can assume that addr and new_val are different
|
||||
// then.
|
||||
if (addr == new_val) {
|
||||
ce->masm()->block_comment(err_msg("same addr/new_val due to self-referential store with imprecise card mark %s", addr->name()));
|
||||
return;
|
||||
}
|
||||
|
||||
G1BarrierSetAssembler* bs_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
bs_asm->g1_write_barrier_post_c1(ce->masm(), addr, new_val, thread, tmp1, tmp2);
|
||||
}
|
||||
|
||||
virtual void print_instr(outputStream* out) const {
|
||||
_addr->print(out); out->print(" ");
|
||||
_new_val->print(out); out->print(" ");
|
||||
_thread->print(out); out->print(" ");
|
||||
_tmp1->print(out); out->print(" ");
|
||||
_tmp2->print(out); out->print(" ");
|
||||
out->cr();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
virtual const char* name() const {
|
||||
return "lir_g1_post_barrier";
|
||||
}
|
||||
#endif // PRODUCT
|
||||
};
|
||||
|
||||
void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val) {
|
||||
LIRGenerator* gen = access.gen();
|
||||
DecoratorSet decorators = access.decorators();
|
||||
@ -150,29 +229,11 @@ void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_v
|
||||
}
|
||||
assert(addr->is_register(), "must be a register at this point");
|
||||
|
||||
LIR_Opr xor_res = gen->new_pointer_register();
|
||||
LIR_Opr xor_shift_res = gen->new_pointer_register();
|
||||
if (two_operand_lir_form) {
|
||||
__ move(addr, xor_res);
|
||||
__ logical_xor(xor_res, new_val, xor_res);
|
||||
__ move(xor_res, xor_shift_res);
|
||||
__ unsigned_shift_right(xor_shift_res,
|
||||
LIR_OprFact::intConst(checked_cast<jint>(G1HeapRegion::LogOfHRGrainBytes)),
|
||||
xor_shift_res,
|
||||
LIR_Opr::illegalOpr());
|
||||
} else {
|
||||
__ logical_xor(addr, new_val, xor_res);
|
||||
__ unsigned_shift_right(xor_res,
|
||||
LIR_OprFact::intConst(checked_cast<jint>(G1HeapRegion::LogOfHRGrainBytes)),
|
||||
xor_shift_res,
|
||||
LIR_Opr::illegalOpr());
|
||||
}
|
||||
|
||||
__ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
|
||||
|
||||
CodeStub* slow = new G1PostBarrierStub(addr, new_val);
|
||||
__ branch(lir_cond_notEqual, slow);
|
||||
__ branch_destination(slow->continuation());
|
||||
__ append(new LIR_OpG1PostBarrier(addr,
|
||||
new_val,
|
||||
gen->getThreadPointer() /* thread */,
|
||||
gen->new_pointer_register() /* tmp1 */,
|
||||
gen->new_pointer_register() /* tmp2 */));
|
||||
}
|
||||
|
||||
void G1BarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) {
|
||||
@ -207,20 +268,9 @@ class C1G1PreBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
|
||||
}
|
||||
};
|
||||
|
||||
class C1G1PostBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
|
||||
virtual OopMapSet* generate_code(StubAssembler* sasm) {
|
||||
G1BarrierSetAssembler* bs = (G1BarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->generate_c1_post_barrier_runtime_stub(sasm);
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
bool G1BarrierSetC1::generate_c1_runtime_stubs(BufferBlob* buffer_blob) {
|
||||
C1G1PreBarrierCodeGenClosure pre_code_gen_cl;
|
||||
C1G1PostBarrierCodeGenClosure post_code_gen_cl;
|
||||
_pre_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, StubId::NO_STUBID, "g1_pre_barrier_slow",
|
||||
false, &pre_code_gen_cl);
|
||||
_post_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, StubId::NO_STUBID, "g1_post_barrier_slow",
|
||||
false, &post_code_gen_cl);
|
||||
return _pre_barrier_c1_runtime_code_blob != nullptr && _post_barrier_c1_runtime_code_blob != nullptr;
|
||||
return _pre_barrier_c1_runtime_code_blob != nullptr;
|
||||
}
|
||||
|
||||
@ -91,40 +91,11 @@ class G1PreBarrierStub: public CodeStub {
|
||||
#endif // PRODUCT
|
||||
};
|
||||
|
||||
class G1PostBarrierStub: public CodeStub {
|
||||
friend class G1BarrierSetC1;
|
||||
private:
|
||||
LIR_Opr _addr;
|
||||
LIR_Opr _new_val;
|
||||
|
||||
public:
|
||||
// addr (the address of the object head) and new_val must be registers.
|
||||
G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) {
|
||||
FrameMap* f = Compilation::current()->frame_map();
|
||||
f->update_reserved_argument_area_size(2 * BytesPerWord);
|
||||
}
|
||||
|
||||
LIR_Opr addr() const { return _addr; }
|
||||
LIR_Opr new_val() const { return _new_val; }
|
||||
|
||||
virtual void emit_code(LIR_Assembler* e);
|
||||
virtual void visit(LIR_OpVisitState* visitor) {
|
||||
// don't pass in the code emit info since it's processed in the fast path
|
||||
visitor->do_slow_case();
|
||||
visitor->do_input(_addr);
|
||||
visitor->do_input(_new_val);
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
|
||||
#endif // PRODUCT
|
||||
};
|
||||
|
||||
class CodeBlob;
|
||||
|
||||
class G1BarrierSetC1 : public ModRefBarrierSetC1 {
|
||||
protected:
|
||||
CodeBlob* _pre_barrier_c1_runtime_code_blob;
|
||||
CodeBlob* _post_barrier_c1_runtime_code_blob;
|
||||
|
||||
virtual void pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
|
||||
LIR_Opr pre_val, CodeEmitInfo* info);
|
||||
@ -134,11 +105,9 @@ class G1BarrierSetC1 : public ModRefBarrierSetC1 {
|
||||
|
||||
public:
|
||||
G1BarrierSetC1()
|
||||
: _pre_barrier_c1_runtime_code_blob(nullptr),
|
||||
_post_barrier_c1_runtime_code_blob(nullptr) {}
|
||||
: _pre_barrier_c1_runtime_code_blob(nullptr) {}
|
||||
|
||||
CodeBlob* pre_barrier_c1_runtime_code_blob() { return _pre_barrier_c1_runtime_code_blob; }
|
||||
CodeBlob* post_barrier_c1_runtime_code_blob() { return _post_barrier_c1_runtime_code_blob; }
|
||||
|
||||
virtual bool generate_c1_runtime_stubs(BufferBlob* buffer_blob);
|
||||
};
|
||||
|
||||
@ -298,7 +298,13 @@ uint G1BarrierSetC2::estimated_barrier_size(const Node* node) const {
|
||||
nodes += 6;
|
||||
}
|
||||
if ((barrier_data & G1C2BarrierPost) != 0) {
|
||||
nodes += 60;
|
||||
// Approximate the number of nodes needed; an if costs 4 nodes (Cmp, Bool,
|
||||
// If, If projection), any other (Assembly) instruction is approximated with
|
||||
// a cost of 1.
|
||||
nodes += 4 // base cost for the card write containing getting base offset, address calculation and the card write;
|
||||
+ 6 // same region check: Uncompress (new_val) oop, xor, shr, (cmp), jmp
|
||||
+ 4 // new_val is null check
|
||||
+ (UseCondCardMark ? 4 : 0); // card not clean check.
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
@ -386,8 +392,9 @@ public:
|
||||
}
|
||||
|
||||
bool needs_liveness_data(const MachNode* mach) const {
|
||||
return G1PreBarrierStubC2::needs_barrier(mach) ||
|
||||
G1PostBarrierStubC2::needs_barrier(mach);
|
||||
// Liveness data is only required to compute registers that must be preserved
|
||||
// across the runtime call in the pre-barrier stub.
|
||||
return G1BarrierStubC2::needs_pre_barrier(mach);
|
||||
}
|
||||
|
||||
bool needs_livein_data() const {
|
||||
@ -401,10 +408,22 @@ static G1BarrierSetC2State* barrier_set_state() {
|
||||
|
||||
G1BarrierStubC2::G1BarrierStubC2(const MachNode* node) : BarrierStubC2(node) {}
|
||||
|
||||
bool G1BarrierStubC2::needs_pre_barrier(const MachNode* node) {
|
||||
return (node->barrier_data() & G1C2BarrierPre) != 0;
|
||||
}
|
||||
|
||||
bool G1BarrierStubC2::needs_post_barrier(const MachNode* node) {
|
||||
return (node->barrier_data() & G1C2BarrierPost) != 0;
|
||||
}
|
||||
|
||||
bool G1BarrierStubC2::post_new_val_may_be_null(const MachNode* node) {
|
||||
return (node->barrier_data() & G1C2BarrierPostNotNull) == 0;
|
||||
}
|
||||
|
||||
G1PreBarrierStubC2::G1PreBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
|
||||
|
||||
bool G1PreBarrierStubC2::needs_barrier(const MachNode* node) {
|
||||
return (node->barrier_data() & G1C2BarrierPre) != 0;
|
||||
return needs_pre_barrier(node);
|
||||
}
|
||||
|
||||
G1PreBarrierStubC2* G1PreBarrierStubC2::create(const MachNode* node) {
|
||||
@ -448,48 +467,6 @@ void G1PreBarrierStubC2::emit_code(MacroAssembler& masm) {
|
||||
bs->generate_c2_pre_barrier_stub(&masm, this);
|
||||
}
|
||||
|
||||
G1PostBarrierStubC2::G1PostBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
|
||||
|
||||
bool G1PostBarrierStubC2::needs_barrier(const MachNode* node) {
|
||||
return (node->barrier_data() & G1C2BarrierPost) != 0;
|
||||
}
|
||||
|
||||
G1PostBarrierStubC2* G1PostBarrierStubC2::create(const MachNode* node) {
|
||||
G1PostBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PostBarrierStubC2(node);
|
||||
if (!Compile::current()->output()->in_scratch_emit_size()) {
|
||||
barrier_set_state()->stubs()->append(stub);
|
||||
}
|
||||
return stub;
|
||||
}
|
||||
|
||||
void G1PostBarrierStubC2::initialize_registers(Register thread, Register tmp1, Register tmp2, Register tmp3) {
|
||||
_thread = thread;
|
||||
_tmp1 = tmp1;
|
||||
_tmp2 = tmp2;
|
||||
_tmp3 = tmp3;
|
||||
}
|
||||
|
||||
Register G1PostBarrierStubC2::thread() const {
|
||||
return _thread;
|
||||
}
|
||||
|
||||
Register G1PostBarrierStubC2::tmp1() const {
|
||||
return _tmp1;
|
||||
}
|
||||
|
||||
Register G1PostBarrierStubC2::tmp2() const {
|
||||
return _tmp2;
|
||||
}
|
||||
|
||||
Register G1PostBarrierStubC2::tmp3() const {
|
||||
return _tmp3;
|
||||
}
|
||||
|
||||
void G1PostBarrierStubC2::emit_code(MacroAssembler& masm) {
|
||||
G1BarrierSetAssembler* bs = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
|
||||
bs->generate_c2_post_barrier_stub(&masm, this);
|
||||
}
|
||||
|
||||
void* G1BarrierSetC2::create_barrier_state(Arena* comp_arena) const {
|
||||
return new (comp_arena) G1BarrierSetC2State(comp_arena);
|
||||
}
|
||||
|
||||
@ -37,6 +37,10 @@ const int G1C2BarrierPostNotNull = 4;
|
||||
|
||||
class G1BarrierStubC2 : public BarrierStubC2 {
|
||||
public:
|
||||
static bool needs_pre_barrier(const MachNode* node);
|
||||
static bool needs_post_barrier(const MachNode* node);
|
||||
static bool post_new_val_may_be_null(const MachNode* node);
|
||||
|
||||
G1BarrierStubC2(const MachNode* node);
|
||||
virtual void emit_code(MacroAssembler& masm) = 0;
|
||||
};
|
||||
@ -64,27 +68,6 @@ public:
|
||||
virtual void emit_code(MacroAssembler& masm);
|
||||
};
|
||||
|
||||
class G1PostBarrierStubC2 : public G1BarrierStubC2 {
|
||||
private:
|
||||
Register _thread;
|
||||
Register _tmp1;
|
||||
Register _tmp2;
|
||||
Register _tmp3;
|
||||
|
||||
protected:
|
||||
G1PostBarrierStubC2(const MachNode* node);
|
||||
|
||||
public:
|
||||
static bool needs_barrier(const MachNode* node);
|
||||
static G1PostBarrierStubC2* create(const MachNode* node);
|
||||
void initialize_registers(Register thread, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg);
|
||||
Register thread() const;
|
||||
Register tmp1() const;
|
||||
Register tmp2() const;
|
||||
Register tmp3() const;
|
||||
virtual void emit_code(MacroAssembler& masm);
|
||||
};
|
||||
|
||||
class G1BarrierSetC2: public CardTableBarrierSetC2 {
|
||||
private:
|
||||
void analyze_dominating_barriers() const;
|
||||
|
||||
@ -262,9 +262,6 @@ HeapWord* G1Allocator::survivor_attempt_allocation(uint node_index,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (result != nullptr) {
|
||||
_g1h->dirty_young_block(result, *actual_word_size);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -37,12 +37,10 @@
|
||||
// They were chosen by running GCOld and SPECjbb on debris with different
|
||||
// numbers of GC threads and choosing them based on the results
|
||||
|
||||
static double cost_per_logged_card_ms_defaults[] = {
|
||||
0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
|
||||
};
|
||||
static double cost_per_pending_card_ms_default = 0.01;
|
||||
|
||||
// all the same
|
||||
static double young_card_scan_to_merge_ratio_defaults[] = {
|
||||
static double young_card_merge_to_scan_ratio_defaults[] = {
|
||||
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
};
|
||||
|
||||
@ -78,8 +76,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
|
||||
_concurrent_gc_cpu_time_ms(),
|
||||
_concurrent_refine_rate_ms_seq(TruncatedSeqLength),
|
||||
_dirtied_cards_rate_ms_seq(TruncatedSeqLength),
|
||||
_dirtied_cards_in_thread_buffers_seq(TruncatedSeqLength),
|
||||
_card_scan_to_merge_ratio_seq(TruncatedSeqLength),
|
||||
_card_merge_to_scan_ratio_seq(TruncatedSeqLength),
|
||||
_cost_per_card_scan_ms_seq(TruncatedSeqLength),
|
||||
_cost_per_card_merge_ms_seq(TruncatedSeqLength),
|
||||
_cost_per_code_root_ms_seq(TruncatedSeqLength),
|
||||
@ -87,6 +84,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
|
||||
_pending_cards_seq(TruncatedSeqLength),
|
||||
_card_rs_length_seq(TruncatedSeqLength),
|
||||
_code_root_rs_length_seq(TruncatedSeqLength),
|
||||
_merge_refinement_table_ms_seq(TruncatedSeqLength),
|
||||
_constant_other_time_ms_seq(TruncatedSeqLength),
|
||||
_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
|
||||
_non_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
|
||||
@ -100,17 +98,17 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
|
||||
|
||||
uint index = MIN2(ParallelGCThreads - 1, 7u);
|
||||
|
||||
// Start with inverse of maximum STW cost.
|
||||
_concurrent_refine_rate_ms_seq.add(1/cost_per_logged_card_ms_defaults[0]);
|
||||
// Some applications have very low rates for logging cards.
|
||||
_concurrent_refine_rate_ms_seq.add(1 / cost_per_pending_card_ms_default);
|
||||
// Some applications have very low rates for dirtying cards.
|
||||
_dirtied_cards_rate_ms_seq.add(0.0);
|
||||
|
||||
_card_scan_to_merge_ratio_seq.set_initial(young_card_scan_to_merge_ratio_defaults[index]);
|
||||
_card_merge_to_scan_ratio_seq.set_initial(young_card_merge_to_scan_ratio_defaults[index]);
|
||||
_cost_per_card_scan_ms_seq.set_initial(young_only_cost_per_card_scan_ms_defaults[index]);
|
||||
_card_rs_length_seq.set_initial(0);
|
||||
_code_root_rs_length_seq.set_initial(0);
|
||||
_cost_per_byte_copied_ms_seq.set_initial(cost_per_byte_ms_defaults[index]);
|
||||
|
||||
_merge_refinement_table_ms_seq.add(0);
|
||||
_constant_other_time_ms_seq.add(constant_other_time_ms_defaults[index]);
|
||||
_young_other_cost_per_region_ms_seq.add(young_other_cost_per_region_ms_defaults[index]);
|
||||
_non_young_other_cost_per_region_ms_seq.add(non_young_other_cost_per_region_ms_defaults[index]);
|
||||
@ -196,10 +194,6 @@ void G1Analytics::report_dirtied_cards_rate_ms(double cards_per_ms) {
|
||||
_dirtied_cards_rate_ms_seq.add(cards_per_ms);
|
||||
}
|
||||
|
||||
void G1Analytics::report_dirtied_cards_in_thread_buffers(size_t cards) {
|
||||
_dirtied_cards_in_thread_buffers_seq.add(double(cards));
|
||||
}
|
||||
|
||||
void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_only_phase) {
|
||||
_cost_per_card_scan_ms_seq.add(cost_per_card_ms, for_young_only_phase);
|
||||
}
|
||||
@ -212,8 +206,8 @@ void G1Analytics::report_cost_per_code_root_scan_ms(double cost_per_code_root_ms
|
||||
_cost_per_code_root_ms_seq.add(cost_per_code_root_ms, for_young_only_phase);
|
||||
}
|
||||
|
||||
void G1Analytics::report_card_scan_to_merge_ratio(double merge_to_scan_ratio, bool for_young_only_phase) {
|
||||
_card_scan_to_merge_ratio_seq.add(merge_to_scan_ratio, for_young_only_phase);
|
||||
void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_only_phase) {
|
||||
_card_merge_to_scan_ratio_seq.add(merge_to_scan_ratio, for_young_only_phase);
|
||||
}
|
||||
|
||||
void G1Analytics::report_cost_per_byte_ms(double cost_per_byte_ms, bool for_young_only_phase) {
|
||||
@ -228,6 +222,10 @@ void G1Analytics::report_non_young_other_cost_per_region_ms(double other_cost_pe
|
||||
_non_young_other_cost_per_region_ms_seq.add(other_cost_per_region_ms);
|
||||
}
|
||||
|
||||
void G1Analytics::report_merge_refinement_table_time_ms(double merge_refinement_table_time_ms) {
|
||||
_merge_refinement_table_ms_seq.add(merge_refinement_table_time_ms);
|
||||
}
|
||||
|
||||
void G1Analytics::report_constant_other_time_ms(double constant_other_time_ms) {
|
||||
_constant_other_time_ms_seq.add(constant_other_time_ms);
|
||||
}
|
||||
@ -260,12 +258,8 @@ double G1Analytics::predict_dirtied_cards_rate_ms() const {
|
||||
return predict_zero_bounded(&_dirtied_cards_rate_ms_seq);
|
||||
}
|
||||
|
||||
size_t G1Analytics::predict_dirtied_cards_in_thread_buffers() const {
|
||||
return predict_size(&_dirtied_cards_in_thread_buffers_seq);
|
||||
}
|
||||
|
||||
size_t G1Analytics::predict_scan_card_num(size_t card_rs_length, bool for_young_only_phase) const {
|
||||
return card_rs_length * predict_in_unit_interval(&_card_scan_to_merge_ratio_seq, for_young_only_phase);
|
||||
return card_rs_length * predict_in_unit_interval(&_card_merge_to_scan_ratio_seq, for_young_only_phase);
|
||||
}
|
||||
|
||||
double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_only_phase) const {
|
||||
@ -284,6 +278,10 @@ double G1Analytics::predict_object_copy_time_ms(size_t bytes_to_copy, bool for_y
|
||||
return bytes_to_copy * predict_zero_bounded(&_cost_per_byte_copied_ms_seq, for_young_only_phase);
|
||||
}
|
||||
|
||||
double G1Analytics::predict_merge_refinement_table_time_ms() const {
|
||||
return predict_zero_bounded(&_merge_refinement_table_ms_seq);
|
||||
}
|
||||
|
||||
double G1Analytics::predict_constant_other_time_ms() const {
|
||||
return predict_zero_bounded(&_constant_other_time_ms_seq);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -56,14 +56,13 @@ class G1Analytics: public CHeapObj<mtGC> {
|
||||
|
||||
TruncatedSeq _concurrent_refine_rate_ms_seq;
|
||||
TruncatedSeq _dirtied_cards_rate_ms_seq;
|
||||
TruncatedSeq _dirtied_cards_in_thread_buffers_seq;
|
||||
// The ratio between the number of scanned cards and actually merged cards, for
|
||||
// young-only and mixed gcs.
|
||||
G1PhaseDependentSeq _card_scan_to_merge_ratio_seq;
|
||||
// The ratio between the number of merged cards to actually scanned cards for
|
||||
// card based remembered sets, for young-only and mixed gcs.
|
||||
G1PhaseDependentSeq _card_merge_to_scan_ratio_seq;
|
||||
|
||||
// The cost to scan a card during young-only and mixed gcs in ms.
|
||||
G1PhaseDependentSeq _cost_per_card_scan_ms_seq;
|
||||
// The cost to merge a card during young-only and mixed gcs in ms.
|
||||
// The cost to merge a card from the remembered sets for non-young regions in ms.
|
||||
G1PhaseDependentSeq _cost_per_card_merge_ms_seq;
|
||||
// The cost to scan entries in the code root remembered set in ms.
|
||||
G1PhaseDependentSeq _cost_per_code_root_ms_seq;
|
||||
@ -74,6 +73,8 @@ class G1Analytics: public CHeapObj<mtGC> {
|
||||
G1PhaseDependentSeq _card_rs_length_seq;
|
||||
G1PhaseDependentSeq _code_root_rs_length_seq;
|
||||
|
||||
// Prediction for merging the refinement table to the card table during GC.
|
||||
TruncatedSeq _merge_refinement_table_ms_seq;
|
||||
TruncatedSeq _constant_other_time_ms_seq;
|
||||
TruncatedSeq _young_other_cost_per_region_ms_seq;
|
||||
TruncatedSeq _non_young_other_cost_per_region_ms_seq;
|
||||
@ -149,14 +150,14 @@ public:
|
||||
void report_alloc_rate_ms(double alloc_rate);
|
||||
void report_concurrent_refine_rate_ms(double cards_per_ms);
|
||||
void report_dirtied_cards_rate_ms(double cards_per_ms);
|
||||
void report_dirtied_cards_in_thread_buffers(size_t num_cards);
|
||||
void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_only_phase);
|
||||
void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_only_phase);
|
||||
void report_cost_per_code_root_scan_ms(double cost_per_code_root_ms, bool for_young_only_phase);
|
||||
void report_card_scan_to_merge_ratio(double cards_per_entry_ratio, bool for_young_only_phase);
|
||||
void report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_only_phase);
|
||||
void report_cost_per_byte_ms(double cost_per_byte_ms, bool for_young_only_phase);
|
||||
void report_young_other_cost_per_region_ms(double other_cost_per_region_ms);
|
||||
void report_non_young_other_cost_per_region_ms(double other_cost_per_region_ms);
|
||||
void report_merge_refinement_table_time_ms(double pending_card_merge_time_ms);
|
||||
void report_constant_other_time_ms(double constant_other_time_ms);
|
||||
void report_pending_cards(double pending_cards, bool for_young_only_phase);
|
||||
void report_card_rs_length(double card_rs_length, bool for_young_only_phase);
|
||||
@ -167,7 +168,6 @@ public:
|
||||
|
||||
double predict_concurrent_refine_rate_ms() const;
|
||||
double predict_dirtied_cards_rate_ms() const;
|
||||
size_t predict_dirtied_cards_in_thread_buffers() const;
|
||||
|
||||
// Predict how many of the given remembered set of length card_rs_length will add to
|
||||
// the number of total cards scanned.
|
||||
@ -180,6 +180,7 @@ public:
|
||||
|
||||
double predict_object_copy_time_ms(size_t bytes_to_copy, bool for_young_only_phase) const;
|
||||
|
||||
double predict_merge_refinement_table_time_ms() const;
|
||||
double predict_constant_other_time_ms() const;
|
||||
|
||||
double predict_young_other_time_ms(size_t young_num) const;
|
||||
|
||||
@ -68,6 +68,12 @@ void G1Arguments::initialize_alignments() {
|
||||
if (FLAG_IS_DEFAULT(G1EagerReclaimRemSetThreshold)) {
|
||||
FLAG_SET_ERGO(G1EagerReclaimRemSetThreshold, G1RemSetArrayOfCardsEntries);
|
||||
}
|
||||
// G1 prefers to use conditional card marking to avoid overwriting cards that
|
||||
// have already been found to contain a to-collection set reference. This reduces
|
||||
// refinement effort.
|
||||
if (FLAG_IS_DEFAULT(UseCondCardMark)) {
|
||||
FLAG_SET_ERGO(UseCondCardMark, true);
|
||||
}
|
||||
}
|
||||
|
||||
size_t G1Arguments::conservative_max_heap_alignment() {
|
||||
@ -241,9 +247,8 @@ void G1Arguments::initialize() {
|
||||
|
||||
// Verify that the maximum parallelism isn't too high to eventually overflow
|
||||
// the refcount in G1CardSetContainer.
|
||||
uint max_parallel_refinement_threads = G1ConcRefinementThreads + G1DirtyCardQueueSet::num_par_ids();
|
||||
uint const divisor = 3; // Safe divisor; we increment by 2 for each claim, but there is a small initial value.
|
||||
if (max_parallel_refinement_threads > UINT_MAX / divisor) {
|
||||
if (G1ConcRefinementThreads > UINT_MAX / divisor) {
|
||||
vm_exit_during_initialization("Too large parallelism for remembered sets.");
|
||||
}
|
||||
|
||||
|
||||
@ -32,12 +32,14 @@
|
||||
#include "gc/g1/g1ThreadLocalData.hpp"
|
||||
#include "gc/shared/satbMarkQueue.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "memory/iterator.hpp"
|
||||
#include "oops/access.inline.hpp"
|
||||
#include "oops/compressedOops.inline.hpp"
|
||||
#include "oops/oop.inline.hpp"
|
||||
#include "runtime/interfaceSupport.inline.hpp"
|
||||
#include "runtime/javaThread.hpp"
|
||||
#include "runtime/orderAccess.hpp"
|
||||
#include "runtime/threads.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#ifdef COMPILER1
|
||||
#include "gc/g1/c1/g1BarrierSetC1.hpp"
|
||||
@ -49,18 +51,38 @@
|
||||
class G1BarrierSetC1;
|
||||
class G1BarrierSetC2;
|
||||
|
||||
G1BarrierSet::G1BarrierSet(G1CardTable* card_table) :
|
||||
G1BarrierSet::G1BarrierSet(G1CardTable* card_table,
|
||||
G1CardTable* refinement_table) :
|
||||
CardTableBarrierSet(make_barrier_set_assembler<G1BarrierSetAssembler>(),
|
||||
make_barrier_set_c1<G1BarrierSetC1>(),
|
||||
make_barrier_set_c2<G1BarrierSetC2>(),
|
||||
card_table,
|
||||
BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)),
|
||||
_satb_mark_queue_buffer_allocator("SATB Buffer Allocator", G1SATBBufferSize),
|
||||
_dirty_card_queue_buffer_allocator("DC Buffer Allocator", G1UpdateBufferSize),
|
||||
_satb_mark_queue_set(&_satb_mark_queue_buffer_allocator),
|
||||
_dirty_card_queue_set(&_dirty_card_queue_buffer_allocator)
|
||||
_refinement_table(refinement_table)
|
||||
{}
|
||||
|
||||
G1BarrierSet::~G1BarrierSet() {
|
||||
delete _refinement_table;
|
||||
}
|
||||
|
||||
void G1BarrierSet::swap_global_card_table() {
|
||||
G1CardTable* temp = static_cast<G1CardTable*>(_card_table);
|
||||
_card_table = _refinement_table;
|
||||
_refinement_table = temp;
|
||||
}
|
||||
|
||||
void G1BarrierSet::update_card_table_base(Thread* thread) {
|
||||
#ifdef ASSERT
|
||||
{
|
||||
ResourceMark rm;
|
||||
assert(thread->is_Java_thread(), "may only update card table base of JavaThreads, not %s", thread->name());
|
||||
}
|
||||
#endif
|
||||
G1ThreadLocalData::set_byte_map_base(thread, _card_table->byte_map_base());
|
||||
}
|
||||
|
||||
template <class T> void
|
||||
G1BarrierSet::write_ref_array_pre_work(T* dst, size_t count) {
|
||||
G1SATBMarkQueueSet& queue_set = G1BarrierSet::satb_mark_queue_set();
|
||||
@ -89,28 +111,14 @@ void G1BarrierSet::write_ref_array_pre(narrowOop* dst, size_t count, bool dest_u
|
||||
}
|
||||
}
|
||||
|
||||
void G1BarrierSet::write_ref_field_post_slow(volatile CardValue* byte) {
|
||||
// In the slow path, we know a card is not young
|
||||
assert(*byte != G1CardTable::g1_young_card_val(), "slow path invoked without filtering");
|
||||
OrderAccess::storeload();
|
||||
if (*byte != G1CardTable::dirty_card_val()) {
|
||||
*byte = G1CardTable::dirty_card_val();
|
||||
Thread* thr = Thread::current();
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thr);
|
||||
G1BarrierSet::dirty_card_queue_set().enqueue(queue, byte);
|
||||
}
|
||||
}
|
||||
|
||||
void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
|
||||
if (mr.is_empty()) {
|
||||
return;
|
||||
}
|
||||
volatile CardValue* byte = _card_table->byte_for(mr.start());
|
||||
CardValue* last_byte = _card_table->byte_for(mr.last());
|
||||
|
||||
// skip young gen cards
|
||||
if (*byte == G1CardTable::g1_young_card_val()) {
|
||||
// MemRegion should not span multiple regions for the young gen.
|
||||
// Skip writes to young gen.
|
||||
if (G1CollectedHeap::heap()->heap_region_containing(mr.start())->is_young()) {
|
||||
// MemRegion should not span multiple regions for arrays in young gen.
|
||||
DEBUG_ONLY(G1HeapRegion* containing_hr = G1CollectedHeap::heap()->heap_region_containing(mr.start());)
|
||||
assert(containing_hr->is_young(), "it should be young");
|
||||
assert(containing_hr->is_in(mr.start()), "it should contain start");
|
||||
@ -118,16 +126,25 @@ void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
|
||||
return;
|
||||
}
|
||||
|
||||
OrderAccess::storeload();
|
||||
// Enqueue if necessary.
|
||||
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
|
||||
// We need to make sure that we get the start/end byte information for the area
|
||||
// to mark from the same card table to avoid getting confused in the mark loop
|
||||
// further below - we might execute while the global card table is being switched.
|
||||
//
|
||||
// It does not matter which card table we write to: at worst we may write to the
|
||||
// new card table (after the switching), which means that we will catch the
|
||||
// marks next time.
|
||||
// If we write to the old card table (after the switching, then the refinement
|
||||
// table) the oncoming handshake will do the memory synchronization.
|
||||
CardTable* card_table = AtomicAccess::load(&_card_table);
|
||||
|
||||
volatile CardValue* byte = card_table->byte_for(mr.start());
|
||||
CardValue* last_byte = card_table->byte_for(mr.last());
|
||||
|
||||
// Dirty cards only if necessary.
|
||||
for (; byte <= last_byte; byte++) {
|
||||
CardValue bv = *byte;
|
||||
assert(bv != G1CardTable::g1_young_card_val(), "Invalid card");
|
||||
if (bv != G1CardTable::dirty_card_val()) {
|
||||
if (bv == G1CardTable::clean_card_val()) {
|
||||
*byte = G1CardTable::dirty_card_val();
|
||||
qset.enqueue(queue, byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -148,14 +165,15 @@ void G1BarrierSet::on_thread_attach(Thread* thread) {
|
||||
assert(!satbq.is_active(), "SATB queue should not be active");
|
||||
assert(satbq.buffer() == nullptr, "SATB queue should not have a buffer");
|
||||
assert(satbq.index() == 0, "SATB queue index should be zero");
|
||||
G1DirtyCardQueue& dirtyq = G1ThreadLocalData::dirty_card_queue(thread);
|
||||
assert(dirtyq.buffer() == nullptr, "Dirty Card queue should not have a buffer");
|
||||
assert(dirtyq.index() == 0, "Dirty Card queue index should be zero");
|
||||
|
||||
// If we are creating the thread during a marking cycle, we should
|
||||
// set the active field of the SATB queue to true. That involves
|
||||
// copying the global is_active value to this thread's queue.
|
||||
satbq.set_active(_satb_mark_queue_set.is_active());
|
||||
|
||||
if (thread->is_Java_thread()) {
|
||||
assert(Threads_lock->is_locked(), "must be, synchronization with refinement.");
|
||||
update_card_table_base(thread);
|
||||
}
|
||||
}
|
||||
|
||||
void G1BarrierSet::on_thread_detach(Thread* thread) {
|
||||
@ -165,14 +183,13 @@ void G1BarrierSet::on_thread_detach(Thread* thread) {
|
||||
SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(thread);
|
||||
G1BarrierSet::satb_mark_queue_set().flush_queue(queue);
|
||||
}
|
||||
{
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
|
||||
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
|
||||
qset.flush_queue(queue);
|
||||
qset.record_detached_refinement_stats(queue.refinement_stats());
|
||||
}
|
||||
{
|
||||
G1RegionPinCache& cache = G1ThreadLocalData::pin_count_cache(thread);
|
||||
cache.flush();
|
||||
}
|
||||
}
|
||||
|
||||
void G1BarrierSet::print_on(outputStream* st) const {
|
||||
_card_table->print_on(st, "Card");
|
||||
_refinement_table->print_on(st, "Refinement");
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -25,32 +25,65 @@
|
||||
#ifndef SHARE_GC_G1_G1BARRIERSET_HPP
|
||||
#define SHARE_GC_G1_G1BARRIERSET_HPP
|
||||
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1SATBMarkQueueSet.hpp"
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "gc/shared/cardTable.hpp"
|
||||
#include "gc/shared/cardTableBarrierSet.hpp"
|
||||
|
||||
class G1CardTable;
|
||||
class Thread;
|
||||
|
||||
// This barrier is specialized to use a logging barrier to support
|
||||
// snapshot-at-the-beginning marking.
|
||||
|
||||
// This barrier set is specialized to manage two card tables:
|
||||
// * one the mutator is currently working on ("card table")
|
||||
// * one the refinement threads or GC during pause are working on ("refinement table")
|
||||
//
|
||||
// The card table acts like a regular card table where the mutator dirties cards
|
||||
// containing potentially interesting references.
|
||||
//
|
||||
// When the amount of dirty cards on the card table exceeds a threshold, G1 swaps
|
||||
// the card tables and has the refinement threads reduce them by "refining"
|
||||
// them.
|
||||
// I.e. refinement looks at all dirty cards on the refinement table, and updates
|
||||
// the remembered sets accordingly, clearing the cards on the refinement table.
|
||||
//
|
||||
// Meanwhile the mutator continues dirtying the now empty card table.
|
||||
//
|
||||
// This separation of data the mutator and refinement threads are working on
|
||||
// removes the need for any fine-grained (per mutator write) synchronization between
|
||||
// them, keeping the write barrier simple.
|
||||
//
|
||||
// The refinement threads mark cards in the current collection set specially on the
|
||||
// card table - this is fine wrt synchronization with the mutator, because at
|
||||
// most the mutator will overwrite it again if there is a race, as G1 will scan the
|
||||
// entire card either way during the GC pause.
|
||||
//
|
||||
// During garbage collection, if the refinement table is known to be non-empty, G1
|
||||
// merges it back (and cleaning it) to the card table which is scanned for dirty
|
||||
// cards.
|
||||
//
|
||||
class G1BarrierSet: public CardTableBarrierSet {
|
||||
friend class VMStructs;
|
||||
private:
|
||||
BufferNode::Allocator _satb_mark_queue_buffer_allocator;
|
||||
BufferNode::Allocator _dirty_card_queue_buffer_allocator;
|
||||
G1SATBMarkQueueSet _satb_mark_queue_set;
|
||||
G1DirtyCardQueueSet _dirty_card_queue_set;
|
||||
|
||||
G1CardTable* _refinement_table;
|
||||
|
||||
public:
|
||||
G1BarrierSet(G1CardTable* card_table, G1CardTable* refinement_table);
|
||||
virtual ~G1BarrierSet();
|
||||
|
||||
static G1BarrierSet* g1_barrier_set() {
|
||||
return barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
|
||||
}
|
||||
|
||||
public:
|
||||
G1BarrierSet(G1CardTable* table);
|
||||
~G1BarrierSet() { }
|
||||
G1CardTable* refinement_table() const { return _refinement_table; }
|
||||
|
||||
// Swap the global card table references, without synchronization.
|
||||
void swap_global_card_table();
|
||||
|
||||
// Update the given thread's card table (byte map) base to the current card table's.
|
||||
void update_card_table_base(Thread* thread);
|
||||
|
||||
virtual bool card_mark_must_follow_store() const {
|
||||
return true;
|
||||
@ -74,9 +107,8 @@ class G1BarrierSet: public CardTableBarrierSet {
|
||||
inline void write_region(MemRegion mr);
|
||||
void write_region(JavaThread* thread, MemRegion mr);
|
||||
|
||||
template <DecoratorSet decorators, typename T>
|
||||
template <DecoratorSet decorators = DECORATORS_NONE, typename T>
|
||||
void write_ref_field_post(T* field);
|
||||
void write_ref_field_post_slow(volatile CardValue* byte);
|
||||
|
||||
virtual void on_thread_create(Thread* thread);
|
||||
virtual void on_thread_destroy(Thread* thread);
|
||||
@ -87,9 +119,7 @@ class G1BarrierSet: public CardTableBarrierSet {
|
||||
return g1_barrier_set()->_satb_mark_queue_set;
|
||||
}
|
||||
|
||||
static G1DirtyCardQueueSet& dirty_card_queue_set() {
|
||||
return g1_barrier_set()->_dirty_card_queue_set;
|
||||
}
|
||||
virtual void print_on(outputStream* st) const;
|
||||
|
||||
// Callbacks for runtime accesses.
|
||||
template <DecoratorSet decorators, typename BarrierSetT = G1BarrierSet>
|
||||
|
||||
@ -75,9 +75,8 @@ inline void G1BarrierSet::write_region(MemRegion mr) {
|
||||
template <DecoratorSet decorators, typename T>
|
||||
inline void G1BarrierSet::write_ref_field_post(T* field) {
|
||||
volatile CardValue* byte = _card_table->byte_for(field);
|
||||
if (*byte != G1CardTable::g1_young_card_val()) {
|
||||
// Take a slow path for cards in old
|
||||
write_ref_field_post_slow(byte);
|
||||
if (*byte == G1CardTable::clean_card_val()) {
|
||||
*byte = G1CardTable::dirty_card_val();
|
||||
}
|
||||
}
|
||||
|
||||
@ -127,7 +126,7 @@ inline void G1BarrierSet::AccessBarrier<decorators, BarrierSetT>::
|
||||
oop_store_not_in_heap(T* addr, oop new_value) {
|
||||
// Apply SATB barriers for all non-heap references, to allow
|
||||
// concurrent scanning of such references.
|
||||
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
|
||||
G1BarrierSet *bs = g1_barrier_set();
|
||||
bs->write_ref_field_pre<decorators>(addr);
|
||||
Raw::oop_store(addr, new_value);
|
||||
}
|
||||
|
||||
@ -29,17 +29,17 @@
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
void G1BarrierSetRuntime::write_ref_array_pre_oop_entry(oop* dst, size_t length) {
|
||||
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
|
||||
G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
|
||||
bs->write_ref_array_pre(dst, length, false);
|
||||
}
|
||||
|
||||
void G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry(narrowOop* dst, size_t length) {
|
||||
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
|
||||
G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
|
||||
bs->write_ref_array_pre(dst, length, false);
|
||||
}
|
||||
|
||||
void G1BarrierSetRuntime::write_ref_array_post_entry(HeapWord* dst, size_t length) {
|
||||
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
|
||||
G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
|
||||
bs->G1BarrierSet::write_ref_array(dst, length);
|
||||
}
|
||||
|
||||
@ -53,14 +53,6 @@ JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_pre_entry(oopDesc* orig, Jav
|
||||
G1BarrierSet::satb_mark_queue_set().enqueue_known_active(queue, orig);
|
||||
JRT_END
|
||||
|
||||
// G1 post write barrier slowpath
|
||||
JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_post_entry(volatile G1CardTable::CardValue* card_addr,
|
||||
JavaThread* thread))
|
||||
assert(thread == JavaThread::current(), "pre-condition");
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
|
||||
G1BarrierSet::dirty_card_queue_set().enqueue(queue, card_addr);
|
||||
JRT_END
|
||||
|
||||
JRT_LEAF(void, G1BarrierSetRuntime::clone(oopDesc* src, oopDesc* dst, size_t size))
|
||||
HeapAccess<>::clone(src, dst, size);
|
||||
JRT_END
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -47,7 +47,6 @@ public:
|
||||
|
||||
// C2 slow-path runtime calls.
|
||||
static void write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread);
|
||||
static void write_ref_field_post_entry(volatile CardValue* card_addr, JavaThread* thread);
|
||||
|
||||
static address clone_addr();
|
||||
};
|
||||
|
||||
@ -28,18 +28,37 @@
|
||||
#include "logging/log.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
|
||||
void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
|
||||
CardValue *const first = byte_for(mr.start());
|
||||
CardValue *const last = byte_after(mr.last());
|
||||
void G1CardTable::verify_region(MemRegion mr, CardValue val, bool val_equals) {
|
||||
if (mr.is_empty()) {
|
||||
return;
|
||||
}
|
||||
CardValue* start = byte_for(mr.start());
|
||||
CardValue* end = byte_for(mr.last());
|
||||
|
||||
memset_with_concurrent_readers(first, g1_young_gen, pointer_delta(last, first, sizeof(CardValue)));
|
||||
}
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
G1HeapRegion* r = g1h->heap_region_containing(mr.start());
|
||||
|
||||
#ifndef PRODUCT
|
||||
void G1CardTable::verify_g1_young_region(MemRegion mr) {
|
||||
verify_region(mr, g1_young_gen, true);
|
||||
assert(r == g1h->heap_region_containing(mr.last()), "MemRegion crosses region");
|
||||
|
||||
bool failures = false;
|
||||
for (CardValue* curr = start; curr <= end; ++curr) {
|
||||
CardValue curr_val = *curr;
|
||||
bool failed = (val_equals) ? (curr_val != val) : (curr_val == val);
|
||||
if (failed) {
|
||||
if (!failures) {
|
||||
log_error(gc, verify)("== CT verification failed: [" PTR_FORMAT "," PTR_FORMAT "] r: %d (%s) %sexpecting value: %d",
|
||||
p2i(start), p2i(end), r->hrm_index(), r->get_short_type_str(),
|
||||
(val_equals) ? "" : "not ", val);
|
||||
failures = true;
|
||||
}
|
||||
log_error(gc, verify)("== card " PTR_FORMAT " [" PTR_FORMAT "," PTR_FORMAT "], val: %d",
|
||||
p2i(curr), p2i(addr_for(curr)),
|
||||
p2i((HeapWord*) (((size_t) addr_for(curr)) + _card_size)),
|
||||
(int) curr_val);
|
||||
}
|
||||
}
|
||||
guarantee(!failures, "there should not have been any failures");
|
||||
}
|
||||
#endif
|
||||
|
||||
void G1CardTableChangedListener::on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
|
||||
// Default value for a clean card on the card table is -1. So we cannot take advantage of the zero_filled parameter.
|
||||
@ -74,6 +93,5 @@ void G1CardTable::initialize(G1RegionToSpaceMapper* mapper) {
|
||||
}
|
||||
|
||||
bool G1CardTable::is_in_young(const void* p) const {
|
||||
volatile CardValue* card = byte_for(p);
|
||||
return *card == G1CardTable::g1_young_card_val();
|
||||
return G1CollectedHeap::heap()->heap_region_containing(p)->is_young();
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -52,8 +52,6 @@ class G1CardTable : public CardTable {
|
||||
|
||||
public:
|
||||
enum G1CardValues {
|
||||
g1_young_gen = CT_MR_BS_last_reserved << 1,
|
||||
|
||||
// During evacuation we use the card table to consolidate the cards we need to
|
||||
// scan for roots onto the card table from the various sources. Further it is
|
||||
// used to record already completely scanned cards to avoid re-scanning them
|
||||
@ -63,18 +61,43 @@ public:
|
||||
// The merge at the start of each evacuation round simply sets cards to dirty
|
||||
// that are clean; scanned cards are set to 0x1.
|
||||
//
|
||||
// This means that the LSB determines what to do with the card during evacuation
|
||||
// given the following possible values:
|
||||
// This means that the LSB determines whether the card is clean or non-clean
|
||||
// (LSB is 1 -> clean, LSB is 0 -> non-clean) given the following possible values:
|
||||
//
|
||||
// 11111111 - clean, do not scan
|
||||
// 00000001 - already scanned, do not scan
|
||||
// xxxxxxx1 - clean, already scanned, do not scan again (during GC only).
|
||||
// 00000100 - dirty, needs to be scanned, dirty from remembered set (during GC only)
|
||||
// 00000010 - dirty, needs to be scanned, contains reference to collection set.
|
||||
// 00000000 - dirty, needs to be scanned.
|
||||
//
|
||||
g1_card_already_scanned = 0x1
|
||||
// g1_to_cset_card and g1_from_remset_card are both used for optimization and
|
||||
// needed for more accurate prediction of card generation rate.
|
||||
//
|
||||
// g1_to_cset_card allows to separate dirty card generation rate by the mutator
|
||||
// (which just dirties cards) from cards that will be scanned during next garbage
|
||||
// collection anyway.
|
||||
// Further it allows the optimization to not refine them, assuming that their
|
||||
// references to young gen does not change, and not add this card to any other
|
||||
// remembered set.
|
||||
// This color is sticky during mutator time: refinement threads encountering
|
||||
// this card on the refinement table will just copy it over to the regular card
|
||||
// table without re-refining this card. This saves on refinement effort spent
|
||||
// on that card because most of the time already found interesting references
|
||||
// stay interesting.
|
||||
//
|
||||
// g1_from_remset_card allows separation of cards generated by the mutator from
|
||||
// cards in the remembered set, again to make mutator dirty card generation
|
||||
// prediction more accurate.
|
||||
//
|
||||
// More accurate prediction allow better (less wasteful) refinement control.
|
||||
g1_dirty_card = dirty_card,
|
||||
g1_card_already_scanned = 0x1,
|
||||
g1_to_cset_card = 0x2,
|
||||
g1_from_remset_card = 0x4
|
||||
};
|
||||
|
||||
static const size_t WordAllClean = SIZE_MAX;
|
||||
static const size_t WordAllDirty = 0;
|
||||
static const size_t WordAllFromRemset = (SIZE_MAX / 255) * g1_from_remset_card;
|
||||
|
||||
STATIC_ASSERT(BitsPerByte == 8);
|
||||
static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned;
|
||||
@ -83,27 +106,27 @@ public:
|
||||
_listener.set_card_table(this);
|
||||
}
|
||||
|
||||
static CardValue g1_young_card_val() { return g1_young_gen; }
|
||||
static CardValue g1_scanned_card_val() { return g1_card_already_scanned; }
|
||||
|
||||
void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
|
||||
void g1_mark_as_young(const MemRegion& mr);
|
||||
void verify_region(MemRegion mr, CardValue val, bool val_equals) override;
|
||||
|
||||
size_t index_for_cardvalue(CardValue const* p) const {
|
||||
return pointer_delta(p, _byte_map, sizeof(CardValue));
|
||||
}
|
||||
|
||||
// Mark the given card as Dirty if it is Clean. Returns whether the card was
|
||||
// Mark the given card as From Remset if it is Clean. Returns whether the card was
|
||||
// Clean before this operation. This result may be inaccurate as it does not
|
||||
// perform the dirtying atomically.
|
||||
inline bool mark_clean_as_dirty(CardValue* card);
|
||||
inline bool mark_clean_as_from_remset(CardValue* card);
|
||||
|
||||
// Change Clean cards in a (large) area on the card table as Dirty, preserving
|
||||
// already scanned cards. Assumes that most cards in that area are Clean.
|
||||
inline void mark_range_dirty(size_t start_card_index, size_t num_cards);
|
||||
// Change Clean cards in a (large) area on the card table as From_Remset, preserving
|
||||
// cards already marked otherwise. Assumes that most cards in that area are Clean.
|
||||
// Not atomic.
|
||||
inline size_t mark_clean_range_as_from_remset(size_t start_card_index, size_t num_cards);
|
||||
|
||||
// Change the given range of dirty cards to "which". All of these cards must be Dirty.
|
||||
inline void change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which);
|
||||
// Change the given range of dirty cards to "which". All of these cards must be non-clean.
|
||||
// Returns the number of pending cards found.
|
||||
inline size_t change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which);
|
||||
|
||||
inline uint region_idx_for(CardValue* p);
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -28,25 +28,39 @@
|
||||
#include "gc/g1/g1CardTable.hpp"
|
||||
|
||||
#include "gc/g1/g1HeapRegion.hpp"
|
||||
#include "utilities/population_count.hpp"
|
||||
|
||||
inline uint G1CardTable::region_idx_for(CardValue* p) {
|
||||
size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue));
|
||||
return (uint)(card_idx >> G1HeapRegion::LogCardsPerRegion);
|
||||
}
|
||||
|
||||
inline bool G1CardTable::mark_clean_as_dirty(CardValue* card) {
|
||||
inline bool G1CardTable::mark_clean_as_from_remset(CardValue* card) {
|
||||
CardValue value = *card;
|
||||
if (value == clean_card_val()) {
|
||||
*card = dirty_card_val();
|
||||
*card = g1_from_remset_card;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void G1CardTable::mark_range_dirty(size_t start_card_index, size_t num_cards) {
|
||||
// Returns bits from a where mask is 0, and bits from b where mask is 1.
|
||||
//
|
||||
// Example:
|
||||
// a = 0xAAAAAAAA
|
||||
// b = 0xBBBBBBBB
|
||||
// mask = 0xFF00FF00
|
||||
// result = 0xBBAABBAA
|
||||
inline size_t blend(size_t a, size_t b, size_t mask) {
|
||||
return (a & ~mask) | (b & mask);
|
||||
}
|
||||
|
||||
inline size_t G1CardTable::mark_clean_range_as_from_remset(size_t start_card_index, size_t num_cards) {
|
||||
assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
|
||||
assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
|
||||
|
||||
size_t result = 0;
|
||||
|
||||
size_t const num_chunks = num_cards / sizeof(size_t);
|
||||
|
||||
size_t* cur_word = (size_t*)&_byte_map[start_card_index];
|
||||
@ -54,31 +68,33 @@ inline void G1CardTable::mark_range_dirty(size_t start_card_index, size_t num_ca
|
||||
while (cur_word < end_word_map) {
|
||||
size_t value = *cur_word;
|
||||
if (value == WordAllClean) {
|
||||
*cur_word = WordAllDirty;
|
||||
} else if (value == WordAllDirty) {
|
||||
// do nothing.
|
||||
*cur_word = WordAllFromRemset;
|
||||
result += sizeof(size_t);
|
||||
} else if ((value & WordAlreadyScanned) == 0) {
|
||||
// Do nothing if there is no "Clean" card in it.
|
||||
} else {
|
||||
// There is a mix of cards in there. Tread slowly.
|
||||
CardValue* cur = (CardValue*)cur_word;
|
||||
for (size_t i = 0; i < sizeof(size_t); i++) {
|
||||
CardValue value = *cur;
|
||||
if (value == clean_card_val()) {
|
||||
*cur = dirty_card_val();
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
// There is a mix of cards in there. Tread "slowly".
|
||||
size_t clean_card_mask = (value & WordAlreadyScanned) * 0xff; // All "Clean" cards have 0xff, all other places 0x00 now.
|
||||
result += population_count(clean_card_mask) / BitsPerByte;
|
||||
*cur_word = blend(value, WordAllFromRemset, clean_card_mask);
|
||||
}
|
||||
cur_word++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void G1CardTable::change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which) {
|
||||
inline size_t G1CardTable::change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which) {
|
||||
size_t result = 0;
|
||||
for (CardValue* i_card = start_card; i_card < end_card; ++i_card) {
|
||||
CardValue value = *i_card;
|
||||
assert(value == dirty_card_val(),
|
||||
assert((value & g1_card_already_scanned) == 0,
|
||||
"Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start_card), p2i(end_card));
|
||||
if (value == g1_dirty_card) {
|
||||
result++;
|
||||
}
|
||||
*i_card = which;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */
|
||||
|
||||
97
src/hotspot/share/gc/g1/g1CardTableClaimTable.cpp
Normal file
97
src/hotspot/share/gc/g1/g1CardTableClaimTable.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1HeapRegion.inline.hpp"
|
||||
#include "gc/shared/workerThread.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/checkedCast.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
G1CardTableClaimTable::G1CardTableClaimTable(uint chunks_per_region) :
|
||||
_max_reserved_regions(0),
|
||||
_card_claims(nullptr),
|
||||
_cards_per_chunk(checked_cast<uint>(G1HeapRegion::CardsPerRegion / chunks_per_region))
|
||||
{
|
||||
guarantee(chunks_per_region > 0, "%u chunks per region", chunks_per_region);
|
||||
}
|
||||
|
||||
G1CardTableClaimTable::~G1CardTableClaimTable() {
|
||||
FREE_C_HEAP_ARRAY(uint, _card_claims);
|
||||
}
|
||||
|
||||
void G1CardTableClaimTable::initialize(uint max_reserved_regions) {
|
||||
assert(_card_claims == nullptr, "Must not be initialized twice");
|
||||
_card_claims = NEW_C_HEAP_ARRAY(uint, max_reserved_regions, mtGC);
|
||||
_max_reserved_regions = max_reserved_regions;
|
||||
reset_all_to_unclaimed();
|
||||
}
|
||||
|
||||
void G1CardTableClaimTable::reset_all_to_unclaimed() {
|
||||
for (uint i = 0; i < _max_reserved_regions; i++) {
|
||||
_card_claims[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void G1CardTableClaimTable::reset_all_to_claimed() {
|
||||
for (uint i = 0; i < _max_reserved_regions; i++) {
|
||||
_card_claims[i] = (uint)G1HeapRegion::CardsPerRegion;
|
||||
}
|
||||
}
|
||||
|
||||
void G1CardTableClaimTable::heap_region_iterate_from_worker_offset(G1HeapRegionClosure* cl, uint worker_id, uint max_workers) {
|
||||
// Every worker will actually look at all regions, skipping over regions that
|
||||
// are completed.
|
||||
const size_t n_regions = _max_reserved_regions;
|
||||
const uint start_index = (uint)(worker_id * n_regions / max_workers);
|
||||
|
||||
for (uint count = 0; count < n_regions; count++) {
|
||||
const uint index = (start_index + count) % n_regions;
|
||||
assert(index < n_regions, "sanity");
|
||||
// Skip over fully processed regions
|
||||
if (!has_unclaimed_cards(index)) {
|
||||
continue;
|
||||
}
|
||||
G1HeapRegion* r = G1CollectedHeap::heap()->region_at(index);
|
||||
bool res = cl->do_heap_region(r);
|
||||
if (res) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
G1CardTableChunkClaimer::G1CardTableChunkClaimer(G1CardTableClaimTable* scan_state, uint region_idx) :
|
||||
_claim_values(scan_state),
|
||||
_region_idx(region_idx),
|
||||
_cur_claim(0) {
|
||||
guarantee(size() <= G1HeapRegion::CardsPerRegion, "Should not claim more space than possible.");
|
||||
}
|
||||
|
||||
G1ChunkScanner::G1ChunkScanner(CardValue* const start_card, CardValue* const end_card) :
|
||||
_start_card(start_card),
|
||||
_end_card(end_card) {
|
||||
assert(is_word_aligned(start_card), "precondition");
|
||||
assert(is_word_aligned(end_card), "precondition");
|
||||
}
|
||||
137
src/hotspot/share/gc/g1/g1CardTableClaimTable.hpp
Normal file
137
src/hotspot/share/gc/g1/g1CardTableClaimTable.hpp
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
|
||||
#define SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
|
||||
|
||||
#include "gc/g1/g1CardTable.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
|
||||
class G1HeapRegionClosure;
|
||||
|
||||
// Helper class representing claim values for the cards in the card table corresponding
|
||||
// to a region.
|
||||
// I.e. for every region this class stores an atomic counter that represents the
|
||||
// number of cards from 0 to the number of cards per region already claimed for
|
||||
// this region.
|
||||
// If the claimed value is >= the number of cards of a region, the region can be
|
||||
// considered fully claimed.
|
||||
//
|
||||
// Claiming works on full region (all cards in region) or a range of contiguous cards
|
||||
// (chunk). Chunk size is given at construction time.
|
||||
class G1CardTableClaimTable : public CHeapObj<mtGC> {
|
||||
uint _max_reserved_regions;
|
||||
|
||||
// Card table iteration claim values for every heap region, from 0 (completely unclaimed)
|
||||
// to (>=) G1HeapRegion::CardsPerRegion (completely claimed).
|
||||
uint volatile* _card_claims;
|
||||
|
||||
uint _cards_per_chunk; // For conversion between card index and chunk index.
|
||||
|
||||
// Claim increment number of cards, returning the previous claim value.
|
||||
inline uint claim_cards(uint region, uint increment);
|
||||
|
||||
public:
|
||||
G1CardTableClaimTable(uint chunks_per_region);
|
||||
~G1CardTableClaimTable();
|
||||
|
||||
// Allocates the data structure and initializes the claims to unclaimed.
|
||||
void initialize(uint max_reserved_regions);
|
||||
|
||||
void reset_all_to_unclaimed();
|
||||
void reset_all_to_claimed();
|
||||
|
||||
inline bool has_unclaimed_cards(uint region);
|
||||
inline void reset_to_unclaimed(uint region);
|
||||
|
||||
// Claims all cards in that region, returning the previous claim value.
|
||||
inline uint claim_all_cards(uint region);
|
||||
|
||||
// Claim a single chunk in that region, returning the previous claim value.
|
||||
inline uint claim_chunk(uint region);
|
||||
inline uint cards_per_chunk() const;
|
||||
|
||||
size_t max_reserved_regions() { return _max_reserved_regions; }
|
||||
|
||||
void heap_region_iterate_from_worker_offset(G1HeapRegionClosure* cl, uint worker_id, uint max_workers);
|
||||
};
|
||||
|
||||
// Helper class to claim dirty chunks within the card table for a given region.
|
||||
class G1CardTableChunkClaimer {
|
||||
G1CardTableClaimTable* _claim_values;
|
||||
|
||||
uint _region_idx;
|
||||
uint _cur_claim;
|
||||
|
||||
public:
|
||||
G1CardTableChunkClaimer(G1CardTableClaimTable* claim_table, uint region_idx);
|
||||
|
||||
inline bool has_next();
|
||||
|
||||
inline uint value() const;
|
||||
inline uint size() const;
|
||||
};
|
||||
|
||||
// Helper class to locate consecutive dirty cards inside a range of cards.
|
||||
class G1ChunkScanner {
|
||||
using Word = size_t;
|
||||
using CardValue = G1CardTable::CardValue;
|
||||
|
||||
CardValue* const _start_card;
|
||||
CardValue* const _end_card;
|
||||
|
||||
static const size_t ExpandedToScanMask = G1CardTable::WordAlreadyScanned;
|
||||
static const size_t ToScanMask = G1CardTable::g1_card_already_scanned;
|
||||
|
||||
inline bool is_card_dirty(const CardValue* const card) const;
|
||||
|
||||
inline bool is_word_aligned(const void* const addr) const;
|
||||
|
||||
inline CardValue* find_first_dirty_card(CardValue* i_card) const;
|
||||
inline CardValue* find_first_non_dirty_card(CardValue* i_card) const;
|
||||
|
||||
public:
|
||||
G1ChunkScanner(CardValue* const start_card, CardValue* const end_card);
|
||||
|
||||
template<typename Func>
|
||||
void on_dirty_cards(Func&& f) {
|
||||
for (CardValue* cur_card = _start_card; cur_card < _end_card; /* empty */) {
|
||||
CardValue* dirty_l = find_first_dirty_card(cur_card);
|
||||
CardValue* dirty_r = find_first_non_dirty_card(dirty_l);
|
||||
|
||||
assert(dirty_l <= dirty_r, "inv");
|
||||
|
||||
if (dirty_l == dirty_r) {
|
||||
assert(dirty_r == _end_card, "finished the entire chunk");
|
||||
return;
|
||||
}
|
||||
|
||||
f(dirty_l, dirty_r);
|
||||
|
||||
cur_card = dirty_r + 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
|
||||
128
src/hotspot/share/gc/g1/g1CardTableClaimTable.inline.hpp
Normal file
128
src/hotspot/share/gc/g1/g1CardTableClaimTable.inline.hpp
Normal file
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
|
||||
#define SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
|
||||
|
||||
#include "gc/g1/g1CardTableClaimTable.hpp"
|
||||
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1HeapRegion.inline.hpp"
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
|
||||
bool G1CardTableClaimTable::has_unclaimed_cards(uint region) {
|
||||
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
|
||||
return AtomicAccess::load(&_card_claims[region]) < G1HeapRegion::CardsPerRegion;
|
||||
}
|
||||
|
||||
void G1CardTableClaimTable::reset_to_unclaimed(uint region) {
|
||||
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
|
||||
AtomicAccess::store(&_card_claims[region], 0u);
|
||||
}
|
||||
|
||||
uint G1CardTableClaimTable::claim_cards(uint region, uint increment) {
|
||||
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
|
||||
return AtomicAccess::fetch_then_add(&_card_claims[region], increment, memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint G1CardTableClaimTable::claim_chunk(uint region) {
|
||||
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
|
||||
return AtomicAccess::fetch_then_add(&_card_claims[region], cards_per_chunk(), memory_order_relaxed);
|
||||
}
|
||||
|
||||
uint G1CardTableClaimTable::claim_all_cards(uint region) {
|
||||
return claim_cards(region, (uint)G1HeapRegion::CardsPerRegion);
|
||||
}
|
||||
|
||||
uint G1CardTableClaimTable::cards_per_chunk() const { return _cards_per_chunk; }
|
||||
|
||||
bool G1CardTableChunkClaimer::has_next() {
|
||||
_cur_claim = _claim_values->claim_chunk(_region_idx);
|
||||
return (_cur_claim < G1HeapRegion::CardsPerRegion);
|
||||
}
|
||||
|
||||
uint G1CardTableChunkClaimer::value() const { return _cur_claim; }
|
||||
uint G1CardTableChunkClaimer::size() const { return _claim_values->cards_per_chunk(); }
|
||||
|
||||
bool G1ChunkScanner::is_card_dirty(const CardValue* const card) const {
|
||||
return (*card & ToScanMask) == 0;
|
||||
}
|
||||
|
||||
bool G1ChunkScanner::is_word_aligned(const void* const addr) const {
|
||||
return ((uintptr_t)addr) % sizeof(Word) == 0;
|
||||
}
|
||||
|
||||
G1CardTable::CardValue* G1ChunkScanner::find_first_dirty_card(CardValue* i_card) const {
|
||||
while (!is_word_aligned(i_card)) {
|
||||
if (is_card_dirty(i_card)) {
|
||||
return i_card;
|
||||
}
|
||||
i_card++;
|
||||
}
|
||||
|
||||
for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
|
||||
Word word_value = *reinterpret_cast<Word*>(i_card);
|
||||
bool has_dirty_cards_in_word = (~word_value & ExpandedToScanMask) != 0;
|
||||
|
||||
if (has_dirty_cards_in_word) {
|
||||
for (uint i = 0; i < sizeof(Word); ++i) {
|
||||
if (is_card_dirty(i_card)) {
|
||||
return i_card;
|
||||
}
|
||||
i_card++;
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
return _end_card;
|
||||
}
|
||||
|
||||
G1CardTable::CardValue* G1ChunkScanner::find_first_non_dirty_card(CardValue* i_card) const {
|
||||
while (!is_word_aligned(i_card)) {
|
||||
if (!is_card_dirty(i_card)) {
|
||||
return i_card;
|
||||
}
|
||||
i_card++;
|
||||
}
|
||||
|
||||
for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
|
||||
Word word_value = *reinterpret_cast<Word*>(i_card);
|
||||
bool all_cards_dirty = (word_value & ExpandedToScanMask) == 0;
|
||||
|
||||
if (!all_cards_dirty) {
|
||||
for (uint i = 0; i < sizeof(Word); ++i) {
|
||||
if (!is_card_dirty(i_card)) {
|
||||
return i_card;
|
||||
}
|
||||
i_card++;
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
return _end_card;
|
||||
}
|
||||
|
||||
#endif // SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
|
||||
@ -38,7 +38,6 @@
|
||||
#include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineThread.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1EvacStats.inline.hpp"
|
||||
#include "gc/g1/g1FullCollector.hpp"
|
||||
#include "gc/g1/g1GCCounters.hpp"
|
||||
@ -60,10 +59,10 @@
|
||||
#include "gc/g1/g1ParScanThreadState.inline.hpp"
|
||||
#include "gc/g1/g1PeriodicGCTask.hpp"
|
||||
#include "gc/g1/g1Policy.hpp"
|
||||
#include "gc/g1/g1RedirtyCardsQueue.hpp"
|
||||
#include "gc/g1/g1RegionPinCache.inline.hpp"
|
||||
#include "gc/g1/g1RegionToSpaceMapper.hpp"
|
||||
#include "gc/g1/g1RemSet.hpp"
|
||||
#include "gc/g1/g1ReviseYoungLengthTask.hpp"
|
||||
#include "gc/g1/g1RootClosures.hpp"
|
||||
#include "gc/g1/g1RootProcessor.hpp"
|
||||
#include "gc/g1/g1SATBMarkQueueSet.hpp"
|
||||
@ -111,6 +110,7 @@
|
||||
#include "runtime/init.hpp"
|
||||
#include "runtime/java.hpp"
|
||||
#include "runtime/orderAccess.hpp"
|
||||
#include "runtime/threads.hpp"
|
||||
#include "runtime/threadSMR.hpp"
|
||||
#include "runtime/vmThread.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
@ -146,7 +146,7 @@ void G1CollectedHeap::run_batch_task(G1BatchedTask* cl) {
|
||||
workers()->run_task(cl, num_workers);
|
||||
}
|
||||
|
||||
uint G1CollectedHeap::get_chunks_per_region() {
|
||||
uint G1CollectedHeap::get_chunks_per_region_for_scan() {
|
||||
uint log_region_size = G1HeapRegion::LogOfHRGrainBytes;
|
||||
// Limit the expected input values to current known possible values of the
|
||||
// (log) region size. Adjust as necessary after testing if changing the permissible
|
||||
@ -156,6 +156,18 @@ uint G1CollectedHeap::get_chunks_per_region() {
|
||||
return 1u << (log_region_size / 2 - 4);
|
||||
}
|
||||
|
||||
uint G1CollectedHeap::get_chunks_per_region_for_merge() {
|
||||
uint log_region_size = G1HeapRegion::LogOfHRGrainBytes;
|
||||
// Limit the expected input values to current known possible values of the
|
||||
// (log) region size. Adjust as necessary after testing if changing the permissible
|
||||
// values for region size.
|
||||
assert(log_region_size >= 20 && log_region_size <= 29,
|
||||
"expected value in [20,29], but got %u", log_region_size);
|
||||
|
||||
uint half_log_region_size = (log_region_size + 1) / 2;
|
||||
return 1 << (half_log_region_size - 9);
|
||||
}
|
||||
|
||||
G1HeapRegion* G1CollectedHeap::new_heap_region(uint hrs_index,
|
||||
MemRegion mr) {
|
||||
return new G1HeapRegion(hrs_index, bot(), mr, &_card_set_config);
|
||||
@ -614,7 +626,6 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t min_word_size,
|
||||
assert_heap_not_locked();
|
||||
if (result != nullptr) {
|
||||
assert(*actual_word_size != 0, "Actual size must have been set here");
|
||||
dirty_young_block(result, *actual_word_size);
|
||||
} else {
|
||||
*actual_word_size = 0;
|
||||
}
|
||||
@ -809,11 +820,27 @@ void G1CollectedHeap::prepare_for_mutator_after_full_collection(size_t allocatio
|
||||
}
|
||||
|
||||
void G1CollectedHeap::abort_refinement() {
|
||||
// Discard all remembered set updates and reset refinement statistics.
|
||||
G1BarrierSet::dirty_card_queue_set().abandon_logs_and_stats();
|
||||
assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0,
|
||||
"DCQS should be empty");
|
||||
concurrent_refine()->get_and_reset_refinement_stats();
|
||||
G1ConcurrentRefineSweepState& sweep_state = concurrent_refine()->sweep_state();
|
||||
if (sweep_state.is_in_progress()) {
|
||||
|
||||
if (!sweep_state.are_java_threads_synched()) {
|
||||
// Synchronize Java threads with global card table that has already been swapped.
|
||||
class SwapThreadCardTableClosure : public ThreadClosure {
|
||||
public:
|
||||
|
||||
virtual void do_thread(Thread* t) {
|
||||
G1BarrierSet* bs = G1BarrierSet::g1_barrier_set();
|
||||
bs->update_card_table_base(t);
|
||||
}
|
||||
} cl;
|
||||
Threads::java_threads_do(&cl);
|
||||
}
|
||||
|
||||
// Record any available refinement statistics.
|
||||
policy()->record_refinement_stats(sweep_state.stats());
|
||||
sweep_state.complete_work(false /* concurrent */, false /* print_log */);
|
||||
}
|
||||
sweep_state.reset_stats();
|
||||
}
|
||||
|
||||
void G1CollectedHeap::verify_after_full_collection() {
|
||||
@ -825,6 +852,7 @@ void G1CollectedHeap::verify_after_full_collection() {
|
||||
}
|
||||
_hrm.verify_optional();
|
||||
_verifier->verify_region_sets_optional();
|
||||
_verifier->verify_card_tables_clean(true /* both_card_tables */);
|
||||
_verifier->verify_after_gc();
|
||||
_verifier->verify_bitmap_clear(false /* above_tams_only */);
|
||||
|
||||
@ -1168,8 +1196,13 @@ G1CollectedHeap::G1CollectedHeap() :
|
||||
_service_thread(nullptr),
|
||||
_periodic_gc_task(nullptr),
|
||||
_free_arena_memory_task(nullptr),
|
||||
_revise_young_length_task(nullptr),
|
||||
_workers(nullptr),
|
||||
_card_table(nullptr),
|
||||
_refinement_epoch(0),
|
||||
_last_synchronized_start(0),
|
||||
_last_refinement_epoch_start(0),
|
||||
_yield_duration_in_refinement_epoch(0),
|
||||
_last_safepoint_refinement_epoch(0),
|
||||
_collection_pause_end(Ticks::now()),
|
||||
_old_set("Old Region Set", new OldRegionSetChecker()),
|
||||
_humongous_set("Humongous Region Set", new HumongousRegionSetChecker()),
|
||||
@ -1289,7 +1322,7 @@ G1RegionToSpaceMapper* G1CollectedHeap::create_aux_memory_mapper(const char* des
|
||||
|
||||
jint G1CollectedHeap::initialize_concurrent_refinement() {
|
||||
jint ecode = JNI_OK;
|
||||
_cr = G1ConcurrentRefine::create(policy(), &ecode);
|
||||
_cr = G1ConcurrentRefine::create(this, &ecode);
|
||||
return ecode;
|
||||
}
|
||||
|
||||
@ -1345,18 +1378,12 @@ jint G1CollectedHeap::initialize() {
|
||||
initialize_reserved_region(heap_rs);
|
||||
|
||||
// Create the barrier set for the entire reserved region.
|
||||
G1CardTable* ct = new G1CardTable(_reserved);
|
||||
G1BarrierSet* bs = new G1BarrierSet(ct);
|
||||
G1CardTable* card_table = new G1CardTable(_reserved);
|
||||
G1CardTable* refinement_table = new G1CardTable(_reserved);
|
||||
|
||||
G1BarrierSet* bs = new G1BarrierSet(card_table, refinement_table);
|
||||
bs->initialize();
|
||||
assert(bs->is_a(BarrierSet::G1BarrierSet), "sanity");
|
||||
BarrierSet::set_barrier_set(bs);
|
||||
_card_table = ct;
|
||||
|
||||
{
|
||||
G1SATBMarkQueueSet& satbqs = bs->satb_mark_queue_set();
|
||||
satbqs.set_process_completed_buffers_threshold(G1SATBProcessCompletedThreshold);
|
||||
satbqs.set_buffer_enqueue_threshold_percentage(G1SATBBufferEnqueueingThresholdPercent);
|
||||
}
|
||||
|
||||
// Create space mappers.
|
||||
size_t page_size = heap_rs.page_size();
|
||||
@ -1391,12 +1418,26 @@ jint G1CollectedHeap::initialize() {
|
||||
G1CardTable::compute_size(heap_rs.size() / HeapWordSize),
|
||||
G1CardTable::heap_map_factor());
|
||||
|
||||
G1RegionToSpaceMapper* refinement_cards_storage =
|
||||
create_aux_memory_mapper("Refinement Card Table",
|
||||
G1CardTable::compute_size(heap_rs.size() / HeapWordSize),
|
||||
G1CardTable::heap_map_factor());
|
||||
|
||||
size_t bitmap_size = G1CMBitMap::compute_size(heap_rs.size());
|
||||
G1RegionToSpaceMapper* bitmap_storage =
|
||||
create_aux_memory_mapper("Mark Bitmap", bitmap_size, G1CMBitMap::heap_map_factor());
|
||||
|
||||
_hrm.initialize(heap_storage, bitmap_storage, bot_storage, cardtable_storage);
|
||||
_card_table->initialize(cardtable_storage);
|
||||
_hrm.initialize(heap_storage, bitmap_storage, bot_storage, cardtable_storage, refinement_cards_storage);
|
||||
card_table->initialize(cardtable_storage);
|
||||
refinement_table->initialize(refinement_cards_storage);
|
||||
|
||||
BarrierSet::set_barrier_set(bs);
|
||||
|
||||
{
|
||||
G1SATBMarkQueueSet& satbqs = bs->satb_mark_queue_set();
|
||||
satbqs.set_process_completed_buffers_threshold(G1SATBProcessCompletedThreshold);
|
||||
satbqs.set_buffer_enqueue_threshold_percentage(G1SATBBufferEnqueueingThresholdPercent);
|
||||
}
|
||||
|
||||
// 6843694 - ensure that the maximum region index can fit
|
||||
// in the remembered set structures.
|
||||
@ -1408,7 +1449,7 @@ jint G1CollectedHeap::initialize() {
|
||||
guarantee((uintptr_t)(heap_rs.base()) >= G1CardTable::card_size(), "Java heap must not start within the first card.");
|
||||
G1FromCardCache::initialize(max_num_regions());
|
||||
// Also create a G1 rem set.
|
||||
_rem_set = new G1RemSet(this, _card_table);
|
||||
_rem_set = new G1RemSet(this);
|
||||
_rem_set->initialize(max_num_regions());
|
||||
|
||||
size_t max_cards_per_region = ((size_t)1 << (sizeof(CardIdx_t)*BitsPerByte-1)) - 1;
|
||||
@ -1467,6 +1508,11 @@ jint G1CollectedHeap::initialize() {
|
||||
_free_arena_memory_task = new G1MonotonicArenaFreeMemoryTask("Card Set Free Memory Task");
|
||||
_service_thread->register_task(_free_arena_memory_task);
|
||||
|
||||
if (policy()->use_adaptive_young_list_length()) {
|
||||
_revise_young_length_task = new G1ReviseYoungLengthTask("Revise Young Length List Task");
|
||||
_service_thread->register_task(_revise_young_length_task);
|
||||
}
|
||||
|
||||
// Here we allocate the dummy G1HeapRegion that is required by the
|
||||
// G1AllocRegion class.
|
||||
G1HeapRegion* dummy_region = _hrm.get_dummy_region();
|
||||
@ -1495,6 +1541,7 @@ jint G1CollectedHeap::initialize() {
|
||||
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_parallel_workers);
|
||||
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_mark);
|
||||
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_refine);
|
||||
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_refine_control);
|
||||
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_service);
|
||||
|
||||
G1InitLogger::print();
|
||||
@ -1519,12 +1566,35 @@ void G1CollectedHeap::stop() {
|
||||
|
||||
void G1CollectedHeap::safepoint_synchronize_begin() {
|
||||
SuspendibleThreadSet::synchronize();
|
||||
|
||||
_last_synchronized_start = os::elapsed_counter();
|
||||
}
|
||||
|
||||
void G1CollectedHeap::safepoint_synchronize_end() {
|
||||
jlong now = os::elapsed_counter();
|
||||
jlong synchronize_duration = now - _last_synchronized_start;
|
||||
|
||||
if (_last_safepoint_refinement_epoch == _refinement_epoch) {
|
||||
_yield_duration_in_refinement_epoch += synchronize_duration;
|
||||
} else {
|
||||
_last_refinement_epoch_start = now;
|
||||
_last_safepoint_refinement_epoch = _refinement_epoch;
|
||||
_yield_duration_in_refinement_epoch = 0;
|
||||
}
|
||||
|
||||
SuspendibleThreadSet::desynchronize();
|
||||
}
|
||||
|
||||
void G1CollectedHeap::set_last_refinement_epoch_start(jlong epoch_start, jlong last_yield_duration) {
|
||||
_last_refinement_epoch_start = epoch_start;
|
||||
guarantee(_yield_duration_in_refinement_epoch >= last_yield_duration, "should be");
|
||||
_yield_duration_in_refinement_epoch -= last_yield_duration;
|
||||
}
|
||||
|
||||
jlong G1CollectedHeap::yield_duration_in_refinement_epoch() {
|
||||
return _yield_duration_in_refinement_epoch;
|
||||
}
|
||||
|
||||
void G1CollectedHeap::post_initialize() {
|
||||
CollectedHeap::post_initialize();
|
||||
ref_processing_init();
|
||||
@ -2336,6 +2406,7 @@ void G1CollectedHeap::gc_epilogue(bool full) {
|
||||
&_collection_set_candidates_card_set_stats);
|
||||
|
||||
update_perf_counter_cpu_time();
|
||||
_refinement_epoch++;
|
||||
}
|
||||
|
||||
uint G1CollectedHeap::uncommit_regions(uint region_limit) {
|
||||
@ -2468,7 +2539,6 @@ void G1CollectedHeap::verify_before_young_collection(G1HeapVerifier::G1VerifyTyp
|
||||
Ticks start = Ticks::now();
|
||||
_verifier->prepare_for_verify();
|
||||
_verifier->verify_region_sets_optional();
|
||||
_verifier->verify_dirty_young_regions();
|
||||
_verifier->verify_before_gc();
|
||||
verify_numa_regions("GC Start");
|
||||
phase_times()->record_verify_before_time_ms((Ticks::now() - start).seconds() * MILLIUNITS);
|
||||
@ -2734,6 +2804,11 @@ void G1CollectedHeap::free_region(G1HeapRegion* hr, G1FreeRegionList* free_list)
|
||||
if (free_list != nullptr) {
|
||||
free_list->add_ordered(hr);
|
||||
}
|
||||
if (VerifyDuringGC) {
|
||||
// Card and refinement table must be clear for freed regions.
|
||||
card_table()->verify_region(MemRegion(hr->bottom(), hr->end()), G1CardTable::clean_card_val(), true);
|
||||
refinement_table()->verify_region(MemRegion(hr->bottom(), hr->end()), G1CardTable::clean_card_val(), true);
|
||||
}
|
||||
}
|
||||
|
||||
void G1CollectedHeap::retain_region(G1HeapRegion* hr) {
|
||||
|
||||
@ -75,6 +75,7 @@ class G1GCPhaseTimes;
|
||||
class G1HeapSizingPolicy;
|
||||
class G1NewTracer;
|
||||
class G1RemSet;
|
||||
class G1ReviseYoungLengthTask;
|
||||
class G1ServiceTask;
|
||||
class G1ServiceThread;
|
||||
class GCMemoryManager;
|
||||
@ -171,9 +172,23 @@ private:
|
||||
G1ServiceThread* _service_thread;
|
||||
G1ServiceTask* _periodic_gc_task;
|
||||
G1MonotonicArenaFreeMemoryTask* _free_arena_memory_task;
|
||||
G1ReviseYoungLengthTask* _revise_young_length_task;
|
||||
|
||||
WorkerThreads* _workers;
|
||||
G1CardTable* _card_table;
|
||||
|
||||
// The current epoch for refinement, i.e. the number of times the card tables
|
||||
// have been swapped by a garbage collection.
|
||||
// Used for detecting whether concurrent refinement has been interrupted by a
|
||||
// garbage collection.
|
||||
size_t _refinement_epoch;
|
||||
|
||||
// The following members are for tracking safepoint durations between garbage
|
||||
// collections.
|
||||
jlong _last_synchronized_start;
|
||||
|
||||
jlong _last_refinement_epoch_start;
|
||||
jlong _yield_duration_in_refinement_epoch; // Time spent in safepoints since beginning of last refinement epoch.
|
||||
size_t _last_safepoint_refinement_epoch; // Refinement epoch before last safepoint.
|
||||
|
||||
Ticks _collection_pause_end;
|
||||
|
||||
@ -541,12 +556,17 @@ public:
|
||||
void run_batch_task(G1BatchedTask* cl);
|
||||
|
||||
// Return "optimal" number of chunks per region we want to use for claiming areas
|
||||
// within a region to claim.
|
||||
// within a region to claim during card table scanning.
|
||||
// The returned value is a trade-off between granularity of work distribution and
|
||||
// memory usage and maintenance costs of that table.
|
||||
// Testing showed that 64 for 1M/2M region, 128 for 4M/8M regions, 256 for 16/32M regions,
|
||||
// and so on seems to be such a good trade-off.
|
||||
static uint get_chunks_per_region();
|
||||
static uint get_chunks_per_region_for_scan();
|
||||
// Return "optimal" number of chunks per region we want to use for claiming areas
|
||||
// within a region to claim during card table merging.
|
||||
// This is much smaller than for scanning as the merge work is much smaller.
|
||||
// Currently 1 for 1M regions, 2 for 2/4M regions, 4 for 8/16M regions and so on.
|
||||
static uint get_chunks_per_region_for_merge();
|
||||
|
||||
G1Allocator* allocator() {
|
||||
return _allocator;
|
||||
@ -687,11 +707,6 @@ public:
|
||||
|
||||
// Add the given region to the retained regions collection set candidates.
|
||||
void retain_region(G1HeapRegion* hr);
|
||||
// It dirties the cards that cover the block so that the post
|
||||
// write barrier never queues anything when updating objects on this
|
||||
// block. It is assumed (and in fact we assert) that the block
|
||||
// belongs to a young region.
|
||||
inline void dirty_young_block(HeapWord* start, size_t word_size);
|
||||
|
||||
// Frees a humongous region by collapsing it into individual regions
|
||||
// and calling free_region() for each of them. The freed regions
|
||||
@ -905,6 +920,10 @@ public:
|
||||
void safepoint_synchronize_begin() override;
|
||||
void safepoint_synchronize_end() override;
|
||||
|
||||
jlong last_refinement_epoch_start() const { return _last_refinement_epoch_start; }
|
||||
void set_last_refinement_epoch_start(jlong epoch_start, jlong last_yield_duration);
|
||||
jlong yield_duration_in_refinement_epoch();
|
||||
|
||||
// Does operations required after initialization has been done.
|
||||
void post_initialize() override;
|
||||
|
||||
@ -1069,7 +1088,16 @@ public:
|
||||
}
|
||||
|
||||
G1CardTable* card_table() const {
|
||||
return _card_table;
|
||||
return static_cast<G1CardTable*>(G1BarrierSet::g1_barrier_set()->card_table());
|
||||
}
|
||||
|
||||
G1CardTable* refinement_table() const {
|
||||
return G1BarrierSet::g1_barrier_set()->refinement_table();
|
||||
}
|
||||
|
||||
G1CardTable::CardValue* card_table_base() const {
|
||||
assert(card_table() != nullptr, "must be");
|
||||
return card_table()->byte_map_base();
|
||||
}
|
||||
|
||||
// Iteration functions.
|
||||
|
||||
@ -149,30 +149,6 @@ inline void G1CollectedHeap::old_set_remove(G1HeapRegion* hr) {
|
||||
_old_set.remove(hr);
|
||||
}
|
||||
|
||||
// It dirties the cards that cover the block so that the post
|
||||
// write barrier never queues anything when updating objects on this
|
||||
// block. It is assumed (and in fact we assert) that the block
|
||||
// belongs to a young region.
|
||||
inline void
|
||||
G1CollectedHeap::dirty_young_block(HeapWord* start, size_t word_size) {
|
||||
assert_heap_not_locked();
|
||||
|
||||
// Assign the containing region to containing_hr so that we don't
|
||||
// have to keep calling heap_region_containing() in the
|
||||
// asserts below.
|
||||
DEBUG_ONLY(G1HeapRegion* containing_hr = heap_region_containing(start);)
|
||||
assert(word_size > 0, "pre-condition");
|
||||
assert(containing_hr->is_in(start), "it should contain start");
|
||||
assert(containing_hr->is_young(), "it should be young");
|
||||
assert(!containing_hr->is_humongous(), "it should not be humongous");
|
||||
|
||||
HeapWord* end = start + word_size;
|
||||
assert(containing_hr->is_in(end - 1), "it should also contain end - 1");
|
||||
|
||||
MemRegion mr(start, end);
|
||||
card_table()->g1_mark_as_young(mr);
|
||||
}
|
||||
|
||||
inline G1ScannerTasksQueueSet* G1CollectedHeap::task_queues() const {
|
||||
return _task_queues;
|
||||
}
|
||||
|
||||
@ -308,7 +308,8 @@ double G1CollectionSet::finalize_young_part(double target_pause_time_ms, G1Survi
|
||||
guarantee(target_pause_time_ms > 0.0,
|
||||
"target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
|
||||
|
||||
size_t pending_cards = _policy->pending_cards_at_gc_start();
|
||||
bool in_young_only_phase = _policy->collector_state()->in_young_only_phase();
|
||||
size_t pending_cards = _policy->analytics()->predict_pending_cards(in_young_only_phase);
|
||||
|
||||
log_trace(gc, ergo, cset)("Start choosing CSet. Pending cards: %zu target pause time: %1.2fms",
|
||||
pending_cards, target_pause_time_ms);
|
||||
@ -323,10 +324,8 @@ double G1CollectionSet::finalize_young_part(double target_pause_time_ms, G1Survi
|
||||
|
||||
verify_young_cset_indices();
|
||||
|
||||
size_t num_young_cards = _g1h->young_regions_cardset()->occupied();
|
||||
_policy->record_card_rs_length(num_young_cards);
|
||||
|
||||
double predicted_base_time_ms = _policy->predict_base_time_ms(pending_cards, num_young_cards);
|
||||
size_t card_rs_length = _policy->analytics()->predict_card_rs_length(in_young_only_phase);
|
||||
double predicted_base_time_ms = _policy->predict_base_time_ms(pending_cards, card_rs_length);
|
||||
// Base time already includes the whole remembered set related time, so do not add that here
|
||||
// again.
|
||||
double predicted_eden_time = _policy->predict_young_region_other_time_ms(eden_region_length) +
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "gc/g1/g1BarrierSet.hpp"
|
||||
#include "gc/g1/g1BatchedTask.hpp"
|
||||
#include "gc/g1/g1CardSetMemory.hpp"
|
||||
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1CollectionSetChooser.hpp"
|
||||
#include "gc/g1/g1CollectorState.hpp"
|
||||
@ -34,7 +35,7 @@
|
||||
#include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
|
||||
#include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1HeapRegion.inline.hpp"
|
||||
#include "gc/g1/g1HeapRegionManager.hpp"
|
||||
#include "gc/g1/g1HeapRegionPrinter.hpp"
|
||||
@ -483,7 +484,7 @@ G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
|
||||
|
||||
// _finger set in set_non_marking_state
|
||||
|
||||
_worker_id_offset(G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
|
||||
_worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
|
||||
_max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
|
||||
// _num_active_tasks set in set_non_marking_state()
|
||||
// _tasks set inside the constructor
|
||||
@ -1141,7 +1142,7 @@ void G1ConcurrentMark::mark_from_roots() {
|
||||
// worker threads may currently exist and more may not be
|
||||
// available.
|
||||
active_workers = _concurrent_workers->set_active_workers(active_workers);
|
||||
log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->max_workers());
|
||||
log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
|
||||
|
||||
_num_concurrent_workers = active_workers;
|
||||
|
||||
|
||||
@ -580,6 +580,8 @@ public:
|
||||
// TARS for the given region during remembered set rebuilding.
|
||||
inline HeapWord* top_at_rebuild_start(G1HeapRegion* r) const;
|
||||
|
||||
uint worker_id_offset() const { return _worker_id_offset; }
|
||||
|
||||
// Clear statistics gathered during the concurrent cycle for the given region after
|
||||
// it has been reclaimed.
|
||||
void clear_statistics(G1HeapRegion* r);
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentMark.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1HeapRegion.inline.hpp"
|
||||
#include "gc/g1/g1HeapRegionPrinter.hpp"
|
||||
#include "gc/g1/g1RemSetTrackingPolicy.hpp"
|
||||
@ -54,15 +55,16 @@ struct G1UpdateRegionLivenessAndSelectForRebuildTask::G1OnRegionClosure : public
|
||||
_num_humongous_regions_removed(0),
|
||||
_local_cleanup_list(local_cleanup_list) {}
|
||||
|
||||
void reclaim_empty_region(G1HeapRegion* hr) {
|
||||
void reclaim_empty_region_common(G1HeapRegion* hr) {
|
||||
assert(!hr->has_pinned_objects(), "precondition");
|
||||
assert(hr->used() > 0, "precondition");
|
||||
|
||||
_freed_bytes += hr->used();
|
||||
hr->set_containing_set(nullptr);
|
||||
hr->clear_cardtable();
|
||||
hr->clear_both_card_tables();
|
||||
_cm->clear_statistics(hr);
|
||||
G1HeapRegionPrinter::mark_reclaim(hr);
|
||||
_g1h->concurrent_refine()->notify_region_reclaimed(hr);
|
||||
}
|
||||
|
||||
void reclaim_empty_humongous_region(G1HeapRegion* hr) {
|
||||
@ -71,8 +73,8 @@ struct G1UpdateRegionLivenessAndSelectForRebuildTask::G1OnRegionClosure : public
|
||||
auto on_humongous_region = [&] (G1HeapRegion* hr) {
|
||||
assert(hr->is_humongous(), "precondition");
|
||||
|
||||
reclaim_empty_region(hr);
|
||||
_num_humongous_regions_removed++;
|
||||
reclaim_empty_region_common(hr);
|
||||
_g1h->free_humongous_region(hr, _local_cleanup_list);
|
||||
};
|
||||
|
||||
@ -82,8 +84,8 @@ struct G1UpdateRegionLivenessAndSelectForRebuildTask::G1OnRegionClosure : public
|
||||
void reclaim_empty_old_region(G1HeapRegion* hr) {
|
||||
assert(hr->is_old(), "precondition");
|
||||
|
||||
reclaim_empty_region(hr);
|
||||
_num_old_regions_removed++;
|
||||
reclaim_empty_region_common(hr);
|
||||
_g1h->free_region(hr, _local_cleanup_list);
|
||||
}
|
||||
|
||||
|
||||
@ -245,7 +245,7 @@ class G1RebuildRSAndScrubTask : public WorkerTask {
|
||||
G1RebuildRSAndScrubRegionClosure(G1ConcurrentMark* cm, bool should_rebuild_remset, uint worker_id) :
|
||||
_cm(cm),
|
||||
_bitmap(_cm->mark_bitmap()),
|
||||
_rebuild_closure(G1CollectedHeap::heap(), worker_id),
|
||||
_rebuild_closure(G1CollectedHeap::heap(), worker_id + cm->worker_id_offset()),
|
||||
_should_rebuild_remset(should_rebuild_remset),
|
||||
_processed_words(0) { }
|
||||
|
||||
|
||||
@ -22,15 +22,20 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1Analytics.hpp"
|
||||
#include "gc/g1/g1BarrierSet.hpp"
|
||||
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1CollectionSet.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineThread.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1HeapRegion.inline.hpp"
|
||||
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
|
||||
#include "gc/g1/g1Policy.hpp"
|
||||
#include "gc/shared/gc_globals.hpp"
|
||||
#include "gc/shared/gcTraceTime.inline.hpp"
|
||||
#include "gc/shared/workerThread.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "memory/iterator.hpp"
|
||||
@ -38,17 +43,15 @@
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) {
|
||||
G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread() {
|
||||
G1ConcurrentRefineThread* result = nullptr;
|
||||
if (initializing || !InjectGCWorkerCreationFailure) {
|
||||
result = G1ConcurrentRefineThread::create(_cr, worker_id);
|
||||
}
|
||||
result = G1ConcurrentRefineThread::create(_cr);
|
||||
if (result == nullptr || result->osthread() == nullptr) {
|
||||
log_warning(gc)("Failed to create refinement thread %u, no more %s",
|
||||
worker_id,
|
||||
log_warning(gc)("Failed to create refinement control thread, no more %s",
|
||||
result == nullptr ? "memory" : "OS threads");
|
||||
if (result != nullptr) {
|
||||
delete result;
|
||||
@ -60,106 +63,392 @@ G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thr
|
||||
|
||||
G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl(uint max_num_threads) :
|
||||
_cr(nullptr),
|
||||
_threads(max_num_threads)
|
||||
_control_thread(nullptr),
|
||||
_workers(nullptr),
|
||||
_max_num_threads(max_num_threads)
|
||||
{}
|
||||
|
||||
G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() {
|
||||
while (_threads.is_nonempty()) {
|
||||
delete _threads.pop();
|
||||
}
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineThreadControl::ensure_threads_created(uint worker_id, bool initializing) {
|
||||
assert(worker_id < max_num_threads(), "precondition");
|
||||
|
||||
while ((uint)_threads.length() <= worker_id) {
|
||||
G1ConcurrentRefineThread* rt = create_refinement_thread(_threads.length(), initializing);
|
||||
if (rt == nullptr) {
|
||||
return false;
|
||||
}
|
||||
_threads.push(rt);
|
||||
}
|
||||
|
||||
return true;
|
||||
delete _control_thread;
|
||||
delete _workers;
|
||||
}
|
||||
|
||||
jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr) {
|
||||
assert(cr != nullptr, "G1ConcurrentRefine must not be null");
|
||||
_cr = cr;
|
||||
|
||||
if (max_num_threads() > 0) {
|
||||
_threads.push(create_refinement_thread(0, true));
|
||||
if (_threads.at(0) == nullptr) {
|
||||
vm_shutdown_during_initialization("Could not allocate primary refinement thread");
|
||||
if (is_refinement_enabled()) {
|
||||
_control_thread = create_refinement_thread();
|
||||
if (_control_thread == nullptr) {
|
||||
vm_shutdown_during_initialization("Could not allocate refinement control thread");
|
||||
return JNI_ENOMEM;
|
||||
}
|
||||
|
||||
if (!UseDynamicNumberOfGCThreads) {
|
||||
if (!ensure_threads_created(max_num_threads() - 1, true)) {
|
||||
vm_shutdown_during_initialization("Could not allocate refinement threads");
|
||||
return JNI_ENOMEM;
|
||||
}
|
||||
}
|
||||
_workers = new WorkerThreads("G1 Refinement Workers", max_num_threads());
|
||||
_workers->initialize_workers();
|
||||
}
|
||||
|
||||
return JNI_OK;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void G1ConcurrentRefineThreadControl::assert_current_thread_is_primary_refinement_thread() const {
|
||||
assert(Thread::current() == _threads.at(0), "Not primary thread");
|
||||
void G1ConcurrentRefineThreadControl::assert_current_thread_is_control_refinement_thread() const {
|
||||
assert(Thread::current() == _control_thread, "Not refinement control thread");
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
bool G1ConcurrentRefineThreadControl::activate(uint worker_id) {
|
||||
if (ensure_threads_created(worker_id, false)) {
|
||||
_threads.at(worker_id)->activate();
|
||||
return true;
|
||||
}
|
||||
void G1ConcurrentRefineThreadControl::activate() {
|
||||
_control_thread->activate();
|
||||
}
|
||||
|
||||
return false;
|
||||
void G1ConcurrentRefineThreadControl::run_task(WorkerTask* task, uint num_workers) {
|
||||
assert(num_workers >= 1, "must be");
|
||||
|
||||
WithActiveWorkers w(_workers, num_workers);
|
||||
_workers->run_task(task);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThreadControl::control_thread_do(ThreadClosure* tc) {
|
||||
if (is_refinement_enabled()) {
|
||||
tc->do_thread(_control_thread);
|
||||
}
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) {
|
||||
for (G1ConcurrentRefineThread* t : _threads) {
|
||||
tc->do_thread(t);
|
||||
if (is_refinement_enabled()) {
|
||||
_workers->threads_do(tc);
|
||||
}
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThreadControl::stop() {
|
||||
for (G1ConcurrentRefineThread* t : _threads) {
|
||||
t->stop();
|
||||
if (is_refinement_enabled()) {
|
||||
_control_thread->stop();
|
||||
}
|
||||
}
|
||||
|
||||
G1ConcurrentRefineSweepState::G1ConcurrentRefineSweepState(uint max_reserved_regions) :
|
||||
_state(State::Idle),
|
||||
_sweep_table(new G1CardTableClaimTable(G1CollectedHeap::get_chunks_per_region_for_merge())),
|
||||
_stats()
|
||||
{
|
||||
_sweep_table->initialize(max_reserved_regions);
|
||||
}
|
||||
|
||||
G1ConcurrentRefineSweepState::~G1ConcurrentRefineSweepState() {
|
||||
delete _sweep_table;
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::set_state_start_time() {
|
||||
_state_start[static_cast<uint>(_state)] = Ticks::now();
|
||||
}
|
||||
|
||||
Tickspan G1ConcurrentRefineSweepState::get_duration(State start, State end) {
|
||||
return _state_start[static_cast<uint>(end)] - _state_start[static_cast<uint>(start)];
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::reset_stats() {
|
||||
stats()->reset();
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::add_yield_during_sweep_duration(jlong duration) {
|
||||
stats()->inc_yield_during_sweep_duration(duration);
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::advance_state(State next_state) {
|
||||
bool result = is_in_progress();
|
||||
if (result) {
|
||||
_state = next_state;
|
||||
} else {
|
||||
_state = State::Idle;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::assert_state(State expected) {
|
||||
assert(_state == expected, "must be %s but is %s", state_name(expected), state_name(_state));
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::start_work() {
|
||||
assert_state(State::Idle);
|
||||
|
||||
set_state_start_time();
|
||||
|
||||
_stats.reset();
|
||||
|
||||
_state = State::SwapGlobalCT;
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::swap_global_card_table() {
|
||||
assert_state(State::SwapGlobalCT);
|
||||
|
||||
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Global Card Table Swap");
|
||||
set_state_start_time();
|
||||
|
||||
{
|
||||
// We can't have any new threads being in the process of created while we
|
||||
// swap the card table because we read the current card table state during
|
||||
// initialization.
|
||||
// A safepoint may occur during that time, so leave the STS temporarily.
|
||||
SuspendibleThreadSetLeaver sts_leave;
|
||||
|
||||
MutexLocker mu(Threads_lock);
|
||||
// A GC that advanced the epoch might have happened, which already switched
|
||||
// The global card table. Do nothing.
|
||||
if (is_in_progress()) {
|
||||
G1BarrierSet::g1_barrier_set()->swap_global_card_table();
|
||||
}
|
||||
}
|
||||
|
||||
return advance_state(State::SwapJavaThreadsCT);
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::swap_java_threads_ct() {
|
||||
assert_state(State::SwapJavaThreadsCT);
|
||||
|
||||
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Java Thread CT swap");
|
||||
|
||||
set_state_start_time();
|
||||
|
||||
{
|
||||
// Need to leave the STS to avoid potential deadlock in the handshake.
|
||||
SuspendibleThreadSetLeaver sts;
|
||||
|
||||
class G1SwapThreadCardTableClosure : public HandshakeClosure {
|
||||
public:
|
||||
G1SwapThreadCardTableClosure() : HandshakeClosure("G1 Java Thread CT swap") { }
|
||||
|
||||
virtual void do_thread(Thread* thread) {
|
||||
G1BarrierSet* bs = G1BarrierSet::g1_barrier_set();
|
||||
bs->update_card_table_base(thread);
|
||||
}
|
||||
} cl;
|
||||
Handshake::execute(&cl);
|
||||
}
|
||||
|
||||
return advance_state(State::SynchronizeGCThreads);
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::swap_gc_threads_ct() {
|
||||
assert_state(State::SynchronizeGCThreads);
|
||||
|
||||
GCTraceTime(Info, gc, refine) tm("Concurrent Refine GC Thread CT swap");
|
||||
|
||||
set_state_start_time();
|
||||
|
||||
{
|
||||
class RendezvousGCThreads: public VM_Operation {
|
||||
public:
|
||||
VMOp_Type type() const { return VMOp_G1RendezvousGCThreads; }
|
||||
|
||||
virtual bool evaluate_at_safepoint() const {
|
||||
// We only care about synchronizing the GC threads.
|
||||
// Leave the Java threads running.
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool skip_thread_oop_barriers() const {
|
||||
fatal("Concurrent VMOps should not call this");
|
||||
return true;
|
||||
}
|
||||
|
||||
void doit() {
|
||||
// Light weight "handshake" of the GC threads for memory synchronization;
|
||||
// both changes to the Java heap need to be synchronized as well as the
|
||||
// previous global card table reference change, so that no GC thread
|
||||
// accesses the wrong card table.
|
||||
// For example in the rebuild remset process the marking threads write
|
||||
// marks into the card table, and that card table reference must be the
|
||||
// correct one.
|
||||
SuspendibleThreadSet::synchronize();
|
||||
SuspendibleThreadSet::desynchronize();
|
||||
};
|
||||
} op;
|
||||
|
||||
SuspendibleThreadSetLeaver sts_leave;
|
||||
VMThread::execute(&op);
|
||||
}
|
||||
|
||||
return advance_state(State::SnapshotHeap);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::snapshot_heap(bool concurrent) {
|
||||
if (concurrent) {
|
||||
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Snapshot Heap");
|
||||
|
||||
assert_state(State::SnapshotHeap);
|
||||
|
||||
set_state_start_time();
|
||||
|
||||
snapshot_heap_inner();
|
||||
|
||||
advance_state(State::SweepRT);
|
||||
} else {
|
||||
assert_state(State::Idle);
|
||||
assert_at_safepoint();
|
||||
|
||||
snapshot_heap_inner();
|
||||
}
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::sweep_refinement_table_start() {
|
||||
assert_state(State::SweepRT);
|
||||
|
||||
set_state_start_time();
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::sweep_refinement_table_step() {
|
||||
assert_state(State::SweepRT);
|
||||
|
||||
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Table Step");
|
||||
|
||||
G1ConcurrentRefine* cr = G1CollectedHeap::heap()->concurrent_refine();
|
||||
|
||||
G1ConcurrentRefineSweepTask task(_sweep_table, &_stats, cr->num_threads_wanted());
|
||||
cr->run_with_refinement_workers(&task);
|
||||
|
||||
if (task.sweep_completed()) {
|
||||
advance_state(State::CompleteRefineWork);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::complete_work(bool concurrent, bool print_log) {
|
||||
if (concurrent) {
|
||||
assert_state(State::CompleteRefineWork);
|
||||
} else {
|
||||
// May have been forced to complete at any other time.
|
||||
assert(is_in_progress() && _state != State::CompleteRefineWork, "must be but is %s", state_name(_state));
|
||||
}
|
||||
|
||||
set_state_start_time();
|
||||
|
||||
if (print_log) {
|
||||
G1ConcurrentRefineStats* s = &_stats;
|
||||
|
||||
log_debug(gc, refine)("Refinement took %.2fms (pre-sweep %.2fms card refine %.2f) "
|
||||
"(scanned %zu clean %zu (%.2f%%) not_clean %zu (%.2f%%) not_parsable %zu "
|
||||
"refers_to_cset %zu (%.2f%%) still_refers_to_cset %zu (%.2f%%) no_cross_region %zu pending %zu)",
|
||||
get_duration(State::Idle, _state).seconds() * 1000.0,
|
||||
get_duration(State::Idle, State::SweepRT).seconds() * 1000.0,
|
||||
TimeHelper::counter_to_millis(s->refine_duration()),
|
||||
s->cards_scanned(),
|
||||
s->cards_clean(),
|
||||
percent_of(s->cards_clean(), s->cards_scanned()),
|
||||
s->cards_not_clean(),
|
||||
percent_of(s->cards_not_clean(), s->cards_scanned()),
|
||||
s->cards_not_parsable(),
|
||||
s->cards_refer_to_cset(),
|
||||
percent_of(s->cards_refer_to_cset(), s->cards_not_clean()),
|
||||
s->cards_already_refer_to_cset(),
|
||||
percent_of(s->cards_already_refer_to_cset(), s->cards_not_clean()),
|
||||
s->cards_no_cross_region(),
|
||||
s->cards_pending()
|
||||
);
|
||||
}
|
||||
|
||||
bool has_sweep_rt_work = _state == State::SweepRT;
|
||||
|
||||
advance_state(State::Idle);
|
||||
return has_sweep_rt_work;
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineSweepState::snapshot_heap_inner() {
|
||||
// G1CollectedHeap::heap_region_iterate() below will only visit currently committed
|
||||
// regions. Initialize all entries in the state table here and later in this method
|
||||
// selectively enable regions that we are interested. This way regions committed
|
||||
// later will be automatically excluded from iteration.
|
||||
// Their refinement table must be completely empty anyway.
|
||||
_sweep_table->reset_all_to_claimed();
|
||||
|
||||
class SnapshotRegionsClosure : public G1HeapRegionClosure {
|
||||
G1CardTableClaimTable* _sweep_table;
|
||||
|
||||
public:
|
||||
SnapshotRegionsClosure(G1CardTableClaimTable* sweep_table) : G1HeapRegionClosure(), _sweep_table(sweep_table) { }
|
||||
|
||||
bool do_heap_region(G1HeapRegion* r) override {
|
||||
if (!r->is_free()) {
|
||||
// Need to scan all parts of non-free regions, so reset the claim.
|
||||
// No need for synchronization: we are only interested in regions
|
||||
// that were allocated before the handshake; the handshake makes such
|
||||
// regions' metadata visible to all threads, and we do not care about
|
||||
// humongous regions that were allocated afterwards.
|
||||
_sweep_table->reset_to_unclaimed(r->hrm_index());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} cl(_sweep_table);
|
||||
G1CollectedHeap::heap()->heap_region_iterate(&cl);
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::is_in_progress() const {
|
||||
return _state != State::Idle;
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepState::are_java_threads_synched() const {
|
||||
return _state > State::SwapJavaThreadsCT || !is_in_progress();
|
||||
}
|
||||
|
||||
uint64_t G1ConcurrentRefine::adjust_threads_period_ms() const {
|
||||
// Instead of a fixed value, this could be a command line option. But then
|
||||
// we might also want to allow configuration of adjust_threads_wait_ms().
|
||||
return 50;
|
||||
|
||||
// Use a prime number close to 50ms, different to other components that derive
|
||||
// their wait time from the try_get_available_bytes_estimate() call to minimize
|
||||
// interference.
|
||||
return 53;
|
||||
}
|
||||
|
||||
static size_t minimum_pending_cards_target() {
|
||||
// One buffer per thread.
|
||||
return ParallelGCThreads * G1UpdateBufferSize;
|
||||
return ParallelGCThreads * G1PerThreadPendingCardThreshold;
|
||||
}
|
||||
|
||||
G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) :
|
||||
_policy(policy),
|
||||
_threads_wanted(0),
|
||||
G1ConcurrentRefine::G1ConcurrentRefine(G1CollectedHeap* g1h) :
|
||||
_policy(g1h->policy()),
|
||||
_num_threads_wanted(0),
|
||||
_pending_cards_target(PendingCardsTargetUninitialized),
|
||||
_last_adjust(),
|
||||
_needs_adjust(false),
|
||||
_threads_needed(policy, adjust_threads_period_ms()),
|
||||
_heap_was_locked(false),
|
||||
_threads_needed(g1h->policy(), adjust_threads_period_ms()),
|
||||
_thread_control(G1ConcRefinementThreads),
|
||||
_dcqs(G1BarrierSet::dirty_card_queue_set())
|
||||
{}
|
||||
_sweep_state(g1h->max_num_regions())
|
||||
{ }
|
||||
|
||||
jint G1ConcurrentRefine::initialize() {
|
||||
return _thread_control.initialize(this);
|
||||
}
|
||||
|
||||
G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
|
||||
G1ConcurrentRefine* cr = new G1ConcurrentRefine(policy);
|
||||
G1ConcurrentRefineSweepState& G1ConcurrentRefine::sweep_state_for_merge() {
|
||||
bool has_sweep_claims = sweep_state().complete_work(false /* concurrent */);
|
||||
if (has_sweep_claims) {
|
||||
log_debug(gc, refine)("Continue existing work");
|
||||
} else {
|
||||
// Refinement has been interrupted without having a snapshot. There may
|
||||
// be a mix of already swapped and not-swapped card tables assigned to threads,
|
||||
// so they might have already dirtied the swapped card tables.
|
||||
// Conservatively scan all (non-free, non-committed) region's card tables,
|
||||
// creating the snapshot right now.
|
||||
log_debug(gc, refine)("Create work from scratch");
|
||||
|
||||
sweep_state().snapshot_heap(false /* concurrent */);
|
||||
}
|
||||
return sweep_state();
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::run_with_refinement_workers(WorkerTask* task) {
|
||||
_thread_control.run_task(task, num_threads_wanted());
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::notify_region_reclaimed(G1HeapRegion* r) {
|
||||
assert_at_safepoint();
|
||||
if (_sweep_state.is_in_progress()) {
|
||||
_sweep_state.sweep_table()->claim_all_cards(r->hrm_index());
|
||||
}
|
||||
}
|
||||
|
||||
G1ConcurrentRefine* G1ConcurrentRefine::create(G1CollectedHeap* g1h, jint* ecode) {
|
||||
G1ConcurrentRefine* cr = new G1ConcurrentRefine(g1h);
|
||||
*ecode = cr->initialize();
|
||||
if (*ecode != 0) {
|
||||
delete cr;
|
||||
@ -176,25 +465,31 @@ G1ConcurrentRefine::~G1ConcurrentRefine() {
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
|
||||
worker_threads_do(tc);
|
||||
control_thread_do(tc);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::worker_threads_do(ThreadClosure *tc) {
|
||||
_thread_control.worker_threads_do(tc);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
|
||||
size_t processed_logged_cards,
|
||||
size_t predicted_thread_buffer_cards,
|
||||
void G1ConcurrentRefine::control_thread_do(ThreadClosure *tc) {
|
||||
_thread_control.control_thread_do(tc);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::update_pending_cards_target(double pending_cards_time_ms,
|
||||
size_t processed_pending_cards,
|
||||
double goal_ms) {
|
||||
size_t minimum = minimum_pending_cards_target();
|
||||
if ((processed_logged_cards < minimum) || (logged_cards_time_ms == 0.0)) {
|
||||
log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu",
|
||||
_pending_cards_target);
|
||||
if ((processed_pending_cards < minimum) || (pending_cards_time_ms == 0.0)) {
|
||||
log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu (processed %zu minimum %zu time %1.2f)",
|
||||
_pending_cards_target, processed_pending_cards, minimum, pending_cards_time_ms);
|
||||
return;
|
||||
}
|
||||
|
||||
// Base the pending cards budget on the measured rate.
|
||||
double rate = processed_logged_cards / logged_cards_time_ms;
|
||||
size_t budget = static_cast<size_t>(goal_ms * rate);
|
||||
// Deduct predicted cards in thread buffers to get target.
|
||||
size_t new_target = budget - MIN2(budget, predicted_thread_buffer_cards);
|
||||
double rate = processed_pending_cards / pending_cards_time_ms;
|
||||
size_t new_target = static_cast<size_t>(goal_ms * rate);
|
||||
// Add some hysteresis with previous values.
|
||||
if (is_pending_cards_target_initialized()) {
|
||||
new_target = (new_target + _pending_cards_target) / 2;
|
||||
@ -205,46 +500,36 @@ void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms
|
||||
log_debug(gc, ergo, refine)("New pending cards target: %zu", new_target);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::adjust_after_gc(double logged_cards_time_ms,
|
||||
size_t processed_logged_cards,
|
||||
size_t predicted_thread_buffer_cards,
|
||||
void G1ConcurrentRefine::adjust_after_gc(double pending_cards_time_ms,
|
||||
size_t processed_pending_cards,
|
||||
double goal_ms) {
|
||||
if (!G1UseConcRefinement) return;
|
||||
if (!G1UseConcRefinement) {
|
||||
return;
|
||||
}
|
||||
|
||||
update_pending_cards_target(logged_cards_time_ms,
|
||||
processed_logged_cards,
|
||||
predicted_thread_buffer_cards,
|
||||
update_pending_cards_target(pending_cards_time_ms,
|
||||
processed_pending_cards,
|
||||
goal_ms);
|
||||
if (_thread_control.max_num_threads() == 0) {
|
||||
// If no refinement threads then the mutator threshold is the target.
|
||||
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
|
||||
} else {
|
||||
// Provisionally make the mutator threshold unlimited, to be updated by
|
||||
// the next periodic adjustment. Because card state may have changed
|
||||
// drastically, record that adjustment is needed and kick the primary
|
||||
// thread, in case it is waiting.
|
||||
_dcqs.set_mutator_refinement_threshold(SIZE_MAX);
|
||||
if (_thread_control.is_refinement_enabled()) {
|
||||
_needs_adjust = true;
|
||||
if (is_pending_cards_target_initialized()) {
|
||||
_thread_control.activate(0);
|
||||
_thread_control.activate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wake up the primary thread less frequently when the time available until
|
||||
// the next GC is longer. But don't increase the wait time too rapidly.
|
||||
// This reduces the number of primary thread wakeups that just immediately
|
||||
// go back to waiting, while still being responsive to behavior changes.
|
||||
static uint64_t compute_adjust_wait_time_ms(double available_ms) {
|
||||
return static_cast<uint64_t>(sqrt(available_ms) * 4.0);
|
||||
}
|
||||
|
||||
uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
|
||||
assert_current_thread_is_primary_refinement_thread();
|
||||
assert_current_thread_is_control_refinement_thread();
|
||||
if (is_pending_cards_target_initialized()) {
|
||||
double available_ms = _threads_needed.predicted_time_until_next_gc_ms();
|
||||
uint64_t wait_time_ms = compute_adjust_wait_time_ms(available_ms);
|
||||
return MAX2(wait_time_ms, adjust_threads_period_ms());
|
||||
// Retry asap when the cause for not getting a prediction was that we temporarily
|
||||
// did not get the heap lock. Otherwise we might wait for too long until we get
|
||||
// back here.
|
||||
if (_heap_was_locked) {
|
||||
return 1;
|
||||
}
|
||||
double available_time_ms = _threads_needed.predicted_time_until_next_gc_ms();
|
||||
|
||||
return _policy->adjust_wait_time_ms(available_time_ms, adjust_threads_period_ms());
|
||||
} else {
|
||||
// If target not yet initialized then wait forever (until explicitly
|
||||
// activated). This happens during startup, when we don't bother with
|
||||
@ -253,185 +538,74 @@ uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
|
||||
}
|
||||
}
|
||||
|
||||
class G1ConcurrentRefine::RemSetSamplingClosure : public G1HeapRegionClosure {
|
||||
size_t _sampled_code_root_rs_length;
|
||||
bool G1ConcurrentRefine::adjust_num_threads_periodically() {
|
||||
assert_current_thread_is_control_refinement_thread();
|
||||
|
||||
public:
|
||||
RemSetSamplingClosure() :
|
||||
_sampled_code_root_rs_length(0) {}
|
||||
|
||||
bool do_heap_region(G1HeapRegion* r) override {
|
||||
G1HeapRegionRemSet* rem_set = r->rem_set();
|
||||
_sampled_code_root_rs_length += rem_set->code_roots_list_length();
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
|
||||
};
|
||||
|
||||
// Adjust the target length (in regions) of the young gen, based on the
|
||||
// current length of the remembered sets.
|
||||
//
|
||||
// At the end of the GC G1 determines the length of the young gen based on
|
||||
// how much time the next GC can take, and when the next GC may occur
|
||||
// according to the MMU.
|
||||
//
|
||||
// The assumption is that a significant part of the GC is spent on scanning
|
||||
// the remembered sets (and many other components), so this thread constantly
|
||||
// reevaluates the prediction for the remembered set scanning costs, and potentially
|
||||
// resizes the young gen. This may do a premature GC or even increase the young
|
||||
// gen size to keep pause time length goal.
|
||||
void G1ConcurrentRefine::adjust_young_list_target_length() {
|
||||
if (_policy->use_adaptive_young_list_length()) {
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
G1CollectionSet* cset = g1h->collection_set();
|
||||
RemSetSamplingClosure cl;
|
||||
cset->iterate(&cl);
|
||||
|
||||
size_t card_rs_length = g1h->young_regions_cardset()->occupied();
|
||||
|
||||
size_t sampled_code_root_rs_length = cl.sampled_code_root_rs_length();
|
||||
_policy->revise_young_list_target_length(card_rs_length, sampled_code_root_rs_length);
|
||||
}
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefine::adjust_threads_periodically() {
|
||||
assert_current_thread_is_primary_refinement_thread();
|
||||
|
||||
// Check whether it's time to do a periodic adjustment.
|
||||
_heap_was_locked = false;
|
||||
// Check whether it's time to do a periodic adjustment if there is no explicit
|
||||
// request pending. We might have spuriously woken up.
|
||||
if (!_needs_adjust) {
|
||||
Tickspan since_adjust = Ticks::now() - _last_adjust;
|
||||
if (since_adjust.milliseconds() >= adjust_threads_period_ms()) {
|
||||
_needs_adjust = true;
|
||||
if (since_adjust.milliseconds() < adjust_threads_period_ms()) {
|
||||
_num_threads_wanted = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, try to adjust threads wanted.
|
||||
if (_needs_adjust) {
|
||||
// Getting used young bytes requires holding Heap_lock. But we can't use
|
||||
// normal lock and block until available. Blocking on the lock could
|
||||
// deadlock with a GC VMOp that is holding the lock and requesting a
|
||||
// safepoint. Instead try to lock, and if fail then skip adjustment for
|
||||
// this iteration of the thread, do some refinement work, and retry the
|
||||
// adjustment later.
|
||||
if (Heap_lock->try_lock()) {
|
||||
size_t used_bytes = _policy->estimate_used_young_bytes_locked();
|
||||
Heap_lock->unlock();
|
||||
adjust_young_list_target_length();
|
||||
size_t young_bytes = _policy->young_list_target_length() * G1HeapRegion::GrainBytes;
|
||||
size_t available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
|
||||
adjust_threads_wanted(available_bytes);
|
||||
_needs_adjust = false;
|
||||
_last_adjust = Ticks::now();
|
||||
return true;
|
||||
}
|
||||
// Reset pending request.
|
||||
_needs_adjust = false;
|
||||
size_t available_bytes = 0;
|
||||
if (_policy->try_get_available_bytes_estimate(available_bytes)) {
|
||||
adjust_threads_wanted(available_bytes);
|
||||
_last_adjust = Ticks::now();
|
||||
} else {
|
||||
_heap_was_locked = true;
|
||||
// Defer adjustment to next time.
|
||||
_needs_adjust = true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefine::is_in_last_adjustment_period() const {
|
||||
return _threads_needed.predicted_time_until_next_gc_ms() <= adjust_threads_period_ms();
|
||||
return (_num_threads_wanted > 0) && !heap_was_locked();
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::adjust_threads_wanted(size_t available_bytes) {
|
||||
assert_current_thread_is_primary_refinement_thread();
|
||||
size_t num_cards = _dcqs.num_cards();
|
||||
size_t mutator_threshold = SIZE_MAX;
|
||||
uint old_wanted = AtomicAccess::load(&_threads_wanted);
|
||||
assert_current_thread_is_control_refinement_thread();
|
||||
|
||||
_threads_needed.update(old_wanted,
|
||||
G1Policy* policy = G1CollectedHeap::heap()->policy();
|
||||
const G1Analytics* analytics = policy->analytics();
|
||||
|
||||
size_t num_cards = policy->current_pending_cards();
|
||||
|
||||
_threads_needed.update(_num_threads_wanted,
|
||||
available_bytes,
|
||||
num_cards,
|
||||
_pending_cards_target);
|
||||
uint new_wanted = _threads_needed.threads_needed();
|
||||
if (new_wanted > _thread_control.max_num_threads()) {
|
||||
// If running all the threads can't reach goal, turn on refinement by
|
||||
// mutator threads. Using target as the threshold may be stronger
|
||||
// than required, but will do the most to get us under goal, and we'll
|
||||
// reevaluate with the next adjustment.
|
||||
mutator_threshold = _pending_cards_target;
|
||||
// Bound the wanted threads by maximum available.
|
||||
new_wanted = _thread_control.max_num_threads();
|
||||
} else if (is_in_last_adjustment_period()) {
|
||||
// If very little time remains until GC, enable mutator refinement. If
|
||||
// the target has been reached, this keeps the number of pending cards on
|
||||
// target even if refinement threads deactivate in the meantime. And if
|
||||
// the target hasn't been reached, this prevents things from getting
|
||||
// worse.
|
||||
mutator_threshold = _pending_cards_target;
|
||||
}
|
||||
AtomicAccess::store(&_threads_wanted, new_wanted);
|
||||
_dcqs.set_mutator_refinement_threshold(mutator_threshold);
|
||||
log_debug(gc, refine)("Concurrent refinement: wanted %u, cards: %zu, "
|
||||
"predicted: %zu, time: %1.2fms",
|
||||
|
||||
_num_threads_wanted = new_wanted;
|
||||
|
||||
log_debug(gc, refine)("Concurrent refinement: wanted %u, pending cards: %zu (pending-from-gc %zu), "
|
||||
"predicted: %zu, goal %zu, time-until-next-gc: %1.2fms pred-refine-rate %1.2fc/ms log-rate %1.2fc/ms",
|
||||
new_wanted,
|
||||
num_cards,
|
||||
G1CollectedHeap::heap()->policy()->pending_cards_from_gc(),
|
||||
_threads_needed.predicted_cards_at_next_gc(),
|
||||
_threads_needed.predicted_time_until_next_gc_ms());
|
||||
// Activate newly wanted threads. The current thread is the primary
|
||||
// refinement thread, so is already active.
|
||||
for (uint i = MAX2(old_wanted, 1u); i < new_wanted; ++i) {
|
||||
if (!_thread_control.activate(i)) {
|
||||
// Failed to allocate and activate thread. Stop trying to activate, and
|
||||
// instead use mutator threads to make up the gap.
|
||||
AtomicAccess::store(&_threads_wanted, i);
|
||||
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::reduce_threads_wanted() {
|
||||
assert_current_thread_is_primary_refinement_thread();
|
||||
if (!_needs_adjust) { // Defer if adjustment request is active.
|
||||
uint wanted = AtomicAccess::load(&_threads_wanted);
|
||||
if (wanted > 0) {
|
||||
AtomicAccess::store(&_threads_wanted, --wanted);
|
||||
}
|
||||
// If very little time remains until GC, enable mutator refinement. If
|
||||
// the target has been reached, this keeps the number of pending cards on
|
||||
// target even as refinement threads deactivate in the meantime.
|
||||
if (is_in_last_adjustment_period()) {
|
||||
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefine::is_thread_wanted(uint worker_id) const {
|
||||
return worker_id < AtomicAccess::load(&_threads_wanted);
|
||||
_pending_cards_target,
|
||||
_threads_needed.predicted_time_until_next_gc_ms(),
|
||||
analytics->predict_concurrent_refine_rate_ms(),
|
||||
analytics->predict_dirtied_cards_rate_ms()
|
||||
);
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefine::is_thread_adjustment_needed() const {
|
||||
assert_current_thread_is_primary_refinement_thread();
|
||||
assert_current_thread_is_control_refinement_thread();
|
||||
return _needs_adjust;
|
||||
}
|
||||
|
||||
void G1ConcurrentRefine::record_thread_adjustment_needed() {
|
||||
assert_current_thread_is_primary_refinement_thread();
|
||||
assert_current_thread_is_control_refinement_thread();
|
||||
_needs_adjust = true;
|
||||
}
|
||||
|
||||
G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() {
|
||||
struct CollectStats : public ThreadClosure {
|
||||
G1ConcurrentRefineStats _total_stats;
|
||||
virtual void do_thread(Thread* t) {
|
||||
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
|
||||
G1ConcurrentRefineStats& stats = *crt->refinement_stats();
|
||||
_total_stats += stats;
|
||||
stats.reset();
|
||||
}
|
||||
} collector;
|
||||
threads_do(&collector);
|
||||
return collector._total_stats;
|
||||
}
|
||||
|
||||
uint G1ConcurrentRefine::worker_id_offset() {
|
||||
return G1DirtyCardQueueSet::num_par_ids();
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefine::try_refinement_step(uint worker_id,
|
||||
size_t stop_at,
|
||||
G1ConcurrentRefineStats* stats) {
|
||||
uint adjusted_id = worker_id + worker_id_offset();
|
||||
return _dcqs.refine_completed_buffer_concurrently(adjusted_id, stop_at, stats);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,23 +34,28 @@
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
// Forward decl
|
||||
class G1CardTableClaimTable;
|
||||
class G1CollectedHeap;
|
||||
class G1ConcurrentRefine;
|
||||
class G1ConcurrentRefineThread;
|
||||
class G1DirtyCardQueueSet;
|
||||
class G1HeapRegion;
|
||||
class G1Policy;
|
||||
class ThreadClosure;
|
||||
class WorkerTask;
|
||||
class WorkerThreads;
|
||||
|
||||
// Helper class for refinement thread management. Used to start, stop and
|
||||
// iterate over them.
|
||||
class G1ConcurrentRefineThreadControl {
|
||||
G1ConcurrentRefine* _cr;
|
||||
GrowableArrayCHeap<G1ConcurrentRefineThread*, mtGC> _threads;
|
||||
G1ConcurrentRefineThread* _control_thread;
|
||||
|
||||
WorkerThreads* _workers;
|
||||
uint _max_num_threads;
|
||||
|
||||
// Create the refinement thread for the given worker id.
|
||||
// If initializing is true, ignore InjectGCWorkerCreationFailure.
|
||||
G1ConcurrentRefineThread* create_refinement_thread(uint worker_id, bool initializing);
|
||||
|
||||
bool ensure_threads_created(uint worker_id, bool initializing);
|
||||
G1ConcurrentRefineThread* create_refinement_thread();
|
||||
|
||||
NONCOPYABLE(G1ConcurrentRefineThreadControl);
|
||||
|
||||
@ -60,21 +65,119 @@ public:
|
||||
|
||||
jint initialize(G1ConcurrentRefine* cr);
|
||||
|
||||
void assert_current_thread_is_primary_refinement_thread() const NOT_DEBUG_RETURN;
|
||||
void assert_current_thread_is_control_refinement_thread() const NOT_DEBUG_RETURN;
|
||||
|
||||
uint max_num_threads() const { return _threads.capacity(); }
|
||||
uint max_num_threads() const { return _max_num_threads; }
|
||||
bool is_refinement_enabled() const { return _max_num_threads > 0; }
|
||||
|
||||
// Activate the indicated thread. If the thread has not yet been allocated,
|
||||
// allocate and then activate. If allocation is needed and fails, return
|
||||
// false. Otherwise return true.
|
||||
// precondition: worker_id < max_num_threads().
|
||||
// precondition: current thread is not the designated worker.
|
||||
bool activate(uint worker_id);
|
||||
// Activate the control thread.
|
||||
void activate();
|
||||
|
||||
void run_task(WorkerTask* task, uint num_workers);
|
||||
|
||||
void control_thread_do(ThreadClosure* tc);
|
||||
void worker_threads_do(ThreadClosure* tc);
|
||||
void stop();
|
||||
};
|
||||
|
||||
// Tracks the current state of re-examining the dirty cards from idle to completion
|
||||
// (and reset back to idle).
|
||||
//
|
||||
// The process steps are as follows:
|
||||
//
|
||||
// 1) Swap global card table pointers
|
||||
//
|
||||
// 2) Swap Java Thread's card table pointers
|
||||
//
|
||||
// 3) Synchronize GC Threads
|
||||
// Ensures memory visibility
|
||||
//
|
||||
// After this point mutator threads should not mark the refinement table.
|
||||
//
|
||||
// 4) Snapshot the heap
|
||||
// Determines which regions need to be swept.
|
||||
//
|
||||
// 5) Sweep Refinement table
|
||||
// Examines non-Clean cards on the refinement table.
|
||||
//
|
||||
// 6) Completion Work
|
||||
// Calculates statistics about the process to be used in various parts of
|
||||
// the garbage collection.
|
||||
//
|
||||
// All but step 4 are interruptible by safepoints. In case of a garbage collection,
|
||||
// the garbage collection will interrupt this process, and go to Idle state.
|
||||
//
|
||||
class G1ConcurrentRefineSweepState {
|
||||
|
||||
enum class State : uint {
|
||||
Idle, // Refinement is doing nothing.
|
||||
SwapGlobalCT, // Swap global card table.
|
||||
SwapJavaThreadsCT, // Swap java thread's card tables.
|
||||
SynchronizeGCThreads, // Synchronize GC thread's memory view.
|
||||
SnapshotHeap, // Take a snapshot of the region's top() values.
|
||||
SweepRT, // Sweep the refinement table for pending (dirty) cards.
|
||||
CompleteRefineWork, // Cleanup of refinement work, reset to idle.
|
||||
Last
|
||||
} _state;
|
||||
|
||||
static const char* state_name(State state) {
|
||||
static const char* _state_names[] = {
|
||||
"Idle",
|
||||
"Swap Global Card Table",
|
||||
"Swap JavaThread Card Table",
|
||||
"Synchronize GC Threads",
|
||||
"Snapshot Heap",
|
||||
"Sweep Refinement Table",
|
||||
"Complete Sweep Work"
|
||||
};
|
||||
|
||||
return _state_names[static_cast<uint>(state)];
|
||||
}
|
||||
|
||||
// Current heap snapshot.
|
||||
G1CardTableClaimTable* _sweep_table;
|
||||
|
||||
// Start times for all states.
|
||||
Ticks _state_start[static_cast<uint>(State::Last)];
|
||||
|
||||
void set_state_start_time();
|
||||
Tickspan get_duration(State start, State end);
|
||||
|
||||
G1ConcurrentRefineStats _stats;
|
||||
|
||||
// Advances the state to next_state if not interrupted by a changed epoch. Returns
|
||||
// to Idle otherwise.
|
||||
bool advance_state(State next_state);
|
||||
|
||||
void assert_state(State expected);
|
||||
|
||||
void snapshot_heap_inner();
|
||||
|
||||
public:
|
||||
G1ConcurrentRefineSweepState(uint max_reserved_regions);
|
||||
~G1ConcurrentRefineSweepState();
|
||||
|
||||
void start_work();
|
||||
|
||||
bool swap_global_card_table();
|
||||
bool swap_java_threads_ct();
|
||||
bool swap_gc_threads_ct();
|
||||
void snapshot_heap(bool concurrent = true);
|
||||
void sweep_refinement_table_start();
|
||||
bool sweep_refinement_table_step();
|
||||
|
||||
bool complete_work(bool concurrent, bool print_log = true);
|
||||
|
||||
G1CardTableClaimTable* sweep_table() { return _sweep_table; }
|
||||
G1ConcurrentRefineStats* stats() { return &_stats; }
|
||||
void reset_stats();
|
||||
|
||||
void add_yield_during_sweep_duration(jlong duration);
|
||||
|
||||
bool is_in_progress() const;
|
||||
bool are_java_threads_synched() const;
|
||||
};
|
||||
|
||||
// Controls concurrent refinement.
|
||||
//
|
||||
// Mutator threads produce dirty cards, which need to be examined for updates
|
||||
@ -84,49 +187,43 @@ public:
|
||||
// pending dirty cards at the start of a GC can be processed within that time
|
||||
// budget.
|
||||
//
|
||||
// Concurrent refinement is performed by a combination of dedicated threads
|
||||
// and by mutator threads as they produce dirty cards. If configured to not
|
||||
// have any dedicated threads (-XX:G1ConcRefinementThreads=0) then all
|
||||
// concurrent refinement work is performed by mutator threads. When there are
|
||||
// dedicated threads, they generally do most of the concurrent refinement
|
||||
// work, to minimize throughput impact of refinement work on mutator threads.
|
||||
// Concurrent refinement is performed by a set of dedicated threads. If configured
|
||||
// to not have any dedicated threads (-XX:G1ConcRefinementThreads=0) then no
|
||||
// refinement work is performed at all.
|
||||
//
|
||||
// This class determines the target number of dirty cards pending for the next
|
||||
// GC. It also owns the dedicated refinement threads and controls their
|
||||
// activation in order to achieve that target.
|
||||
//
|
||||
// There are two kinds of dedicated refinement threads, a single primary
|
||||
// thread and some number of secondary threads. When active, all refinement
|
||||
// threads take buffers of dirty cards from the dirty card queue and process
|
||||
// them. Between buffers they query this owning object to find out whether
|
||||
// they should continue running, deactivating themselves if not.
|
||||
// There are two kinds of dedicated refinement threads, a single control
|
||||
// thread and some number of refinement worker threads.
|
||||
// The control thread determines whether there is need to do work, and then starts
|
||||
// an appropriate number of refinement worker threads to get back to the target
|
||||
// number of pending dirty cards.
|
||||
//
|
||||
// The control wakes up periodically whether there is need to do refinement
|
||||
// work, starting the refinement process as necessary.
|
||||
//
|
||||
// The primary thread drives the control system that determines how many
|
||||
// refinement threads should be active. If inactive, it wakes up periodically
|
||||
// to recalculate the number of active threads needed, and activates
|
||||
// additional threads as necessary. While active it also periodically
|
||||
// recalculates the number wanted and activates more threads if needed. It
|
||||
// also reduces the number of wanted threads when the target has been reached,
|
||||
// triggering deactivations.
|
||||
class G1ConcurrentRefine : public CHeapObj<mtGC> {
|
||||
G1Policy* _policy;
|
||||
volatile uint _threads_wanted;
|
||||
volatile uint _num_threads_wanted;
|
||||
size_t _pending_cards_target;
|
||||
Ticks _last_adjust;
|
||||
Ticks _last_deactivate;
|
||||
bool _needs_adjust;
|
||||
bool _heap_was_locked; // The heap has been locked the last time we tried to adjust the number of refinement threads.
|
||||
|
||||
G1ConcurrentRefineThreadsNeeded _threads_needed;
|
||||
G1ConcurrentRefineThreadControl _thread_control;
|
||||
G1DirtyCardQueueSet& _dcqs;
|
||||
|
||||
G1ConcurrentRefine(G1Policy* policy);
|
||||
G1ConcurrentRefineSweepState _sweep_state;
|
||||
|
||||
static uint worker_id_offset();
|
||||
G1ConcurrentRefine(G1CollectedHeap* g1h);
|
||||
|
||||
jint initialize();
|
||||
|
||||
void assert_current_thread_is_primary_refinement_thread() const {
|
||||
_thread_control.assert_current_thread_is_primary_refinement_thread();
|
||||
void assert_current_thread_is_control_refinement_thread() const {
|
||||
_thread_control.assert_current_thread_is_control_refinement_thread();
|
||||
}
|
||||
|
||||
// For the first few collection cycles we don't have a target (and so don't
|
||||
@ -138,16 +235,11 @@ class G1ConcurrentRefine : public CHeapObj<mtGC> {
|
||||
return _pending_cards_target != PendingCardsTargetUninitialized;
|
||||
}
|
||||
|
||||
void update_pending_cards_target(double logged_cards_scan_time_ms,
|
||||
size_t processed_logged_cards,
|
||||
size_t predicted_thread_buffer_cards,
|
||||
void update_pending_cards_target(double pending_cards_scan_time_ms,
|
||||
size_t processed_pending_cards,
|
||||
double goal_ms);
|
||||
|
||||
uint64_t adjust_threads_period_ms() const;
|
||||
bool is_in_last_adjustment_period() const;
|
||||
|
||||
class RemSetSamplingClosure; // Helper class for adjusting young length.
|
||||
void adjust_young_list_target_length();
|
||||
|
||||
void adjust_threads_wanted(size_t available_bytes);
|
||||
|
||||
@ -156,67 +248,66 @@ class G1ConcurrentRefine : public CHeapObj<mtGC> {
|
||||
public:
|
||||
~G1ConcurrentRefine();
|
||||
|
||||
G1ConcurrentRefineSweepState& sweep_state() { return _sweep_state; }
|
||||
|
||||
G1ConcurrentRefineSweepState& sweep_state_for_merge();
|
||||
|
||||
void run_with_refinement_workers(WorkerTask* task);
|
||||
|
||||
void notify_region_reclaimed(G1HeapRegion* r);
|
||||
|
||||
// Returns a G1ConcurrentRefine instance if succeeded to create/initialize the
|
||||
// G1ConcurrentRefine instance. Otherwise, returns null with error code.
|
||||
static G1ConcurrentRefine* create(G1Policy* policy, jint* ecode);
|
||||
static G1ConcurrentRefine* create(G1CollectedHeap* g1h, jint* ecode);
|
||||
|
||||
// Stop all the refinement threads.
|
||||
void stop();
|
||||
|
||||
// Called at the end of a GC to prepare for refinement during the next
|
||||
// concurrent phase. Updates the target for the number of pending dirty
|
||||
// cards. Updates the mutator refinement threshold. Ensures the primary
|
||||
// refinement thread (if it exists) is active, so it will adjust the number
|
||||
// cards. Updates the mutator refinement threshold. Ensures the refinement
|
||||
// control thread (if it exists) is active, so it will adjust the number
|
||||
// of running threads.
|
||||
void adjust_after_gc(double logged_cards_scan_time_ms,
|
||||
size_t processed_logged_cards,
|
||||
size_t predicted_thread_buffer_cards,
|
||||
void adjust_after_gc(double pending_cards_scan_time_ms,
|
||||
size_t processed_pending_cards,
|
||||
double goal_ms);
|
||||
|
||||
// Target number of pending dirty cards at the start of the next GC.
|
||||
size_t pending_cards_target() const { return _pending_cards_target; }
|
||||
|
||||
// May recalculate the number of refinement threads that should be active in
|
||||
// order to meet the pending cards target. Returns true if adjustment was
|
||||
// performed, and clears any pending request. Returns false if the
|
||||
// adjustment period has not expired, or because a timed or requested
|
||||
// adjustment could not be performed immediately and so was deferred.
|
||||
// precondition: current thread is the primary refinement thread.
|
||||
bool adjust_threads_periodically();
|
||||
// Recalculates the number of refinement threads that should be active in
|
||||
// order to meet the pending cards target.
|
||||
// Returns true if it could recalculate the number of threads and
|
||||
// refinement threads should be started.
|
||||
// Returns false if the adjustment period has not expired, or because a timed
|
||||
// or requested adjustment could not be performed immediately and so was deferred.
|
||||
bool adjust_num_threads_periodically();
|
||||
|
||||
// The amount of time (in ms) the primary refinement thread should sleep
|
||||
// The amount of time (in ms) the refinement control thread should sleep
|
||||
// when it is inactive. It requests adjustment whenever it is reactivated.
|
||||
// precondition: current thread is the primary refinement thread.
|
||||
// precondition: current thread is the refinement control thread.
|
||||
uint64_t adjust_threads_wait_ms() const;
|
||||
|
||||
// Record a request for thread adjustment as soon as possible.
|
||||
// precondition: current thread is the primary refinement thread.
|
||||
// precondition: current thread is the refinement control thread.
|
||||
void record_thread_adjustment_needed();
|
||||
|
||||
// Test whether there is a pending request for thread adjustment.
|
||||
// precondition: current thread is the primary refinement thread.
|
||||
// precondition: current thread is the refinement control thread.
|
||||
bool is_thread_adjustment_needed() const;
|
||||
|
||||
// Reduce the number of active threads wanted.
|
||||
// precondition: current thread is the primary refinement thread.
|
||||
void reduce_threads_wanted();
|
||||
// Indicate that last refinement adjustment had been deferred due to not
|
||||
// obtaining the heap lock.
|
||||
bool heap_was_locked() const { return _heap_was_locked; }
|
||||
|
||||
// Test whether the thread designated by worker_id should be active.
|
||||
bool is_thread_wanted(uint worker_id) const;
|
||||
|
||||
// Return total of concurrent refinement stats for the
|
||||
// ConcurrentRefineThreads. Also reset the stats for the threads.
|
||||
G1ConcurrentRefineStats get_and_reset_refinement_stats();
|
||||
|
||||
// Perform a single refinement step; called by the refinement
|
||||
// threads. Returns true if there was refinement work available.
|
||||
// Updates stats.
|
||||
bool try_refinement_step(uint worker_id,
|
||||
size_t stop_at,
|
||||
G1ConcurrentRefineStats* stats);
|
||||
uint num_threads_wanted() const { return _num_threads_wanted; }
|
||||
uint max_num_threads() const { return _thread_control.max_num_threads(); }
|
||||
|
||||
// Iterate over all concurrent refinement threads applying the given closure.
|
||||
void threads_do(ThreadClosure *tc);
|
||||
// Iterate over specific refinement threads applying the given closure.
|
||||
void worker_threads_do(ThreadClosure *tc);
|
||||
void control_thread_do(ThreadClosure *tc);
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1CONCURRENTREFINE_HPP
|
||||
|
||||
@ -23,41 +23,33 @@
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1ConcurrentRefineStats.hpp"
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
#include "runtime/timer.hpp"
|
||||
|
||||
G1ConcurrentRefineStats::G1ConcurrentRefineStats() :
|
||||
_refinement_time(),
|
||||
_refined_cards(0),
|
||||
_precleaned_cards(0),
|
||||
_dirtied_cards(0)
|
||||
_sweep_duration(0),
|
||||
_yield_during_sweep_duration(0),
|
||||
_cards_scanned(0),
|
||||
_cards_clean(0),
|
||||
_cards_not_parsable(0),
|
||||
_cards_already_refer_to_cset(0),
|
||||
_cards_refer_to_cset(0),
|
||||
_cards_no_cross_region(0),
|
||||
_refine_duration(0)
|
||||
{}
|
||||
|
||||
double G1ConcurrentRefineStats::refinement_rate_ms() const {
|
||||
// Report 0 when no time recorded because no refinement performed.
|
||||
double secs = refinement_time().seconds();
|
||||
return (secs > 0) ? (refined_cards() / (secs * MILLIUNITS)) : 0.0;
|
||||
}
|
||||
void G1ConcurrentRefineStats::add_atomic(G1ConcurrentRefineStats* other) {
|
||||
AtomicAccess::add(&_sweep_duration, other->_sweep_duration, memory_order_relaxed);
|
||||
AtomicAccess::add(&_yield_during_sweep_duration, other->_yield_during_sweep_duration, memory_order_relaxed);
|
||||
|
||||
G1ConcurrentRefineStats&
|
||||
G1ConcurrentRefineStats::operator+=(const G1ConcurrentRefineStats& other) {
|
||||
_refinement_time += other._refinement_time;
|
||||
_refined_cards += other._refined_cards;
|
||||
_precleaned_cards += other._precleaned_cards;
|
||||
_dirtied_cards += other._dirtied_cards;
|
||||
return *this;
|
||||
}
|
||||
AtomicAccess::add(&_cards_scanned, other->_cards_scanned, memory_order_relaxed);
|
||||
AtomicAccess::add(&_cards_clean, other->_cards_clean, memory_order_relaxed);
|
||||
AtomicAccess::add(&_cards_not_parsable, other->_cards_not_parsable, memory_order_relaxed);
|
||||
AtomicAccess::add(&_cards_already_refer_to_cset, other->_cards_already_refer_to_cset, memory_order_relaxed);
|
||||
AtomicAccess::add(&_cards_refer_to_cset, other->_cards_refer_to_cset, memory_order_relaxed);
|
||||
AtomicAccess::add(&_cards_no_cross_region, other->_cards_no_cross_region, memory_order_relaxed);
|
||||
|
||||
template<typename T>
|
||||
static T clipped_sub(T x, T y) {
|
||||
return (x < y) ? T() : (x - y);
|
||||
}
|
||||
|
||||
G1ConcurrentRefineStats&
|
||||
G1ConcurrentRefineStats::operator-=(const G1ConcurrentRefineStats& other) {
|
||||
_refinement_time = clipped_sub(_refinement_time, other._refinement_time);
|
||||
_refined_cards = clipped_sub(_refined_cards, other._refined_cards);
|
||||
_precleaned_cards = clipped_sub(_precleaned_cards, other._precleaned_cards);
|
||||
_dirtied_cards = clipped_sub(_dirtied_cards, other._dirtied_cards);
|
||||
return *this;
|
||||
AtomicAccess::add(&_refine_duration, other->_refine_duration, memory_order_relaxed);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineStats::reset() {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -33,47 +33,56 @@
|
||||
// Used for collecting per-thread statistics and for summaries over a
|
||||
// collection of threads.
|
||||
class G1ConcurrentRefineStats : public CHeapObj<mtGC> {
|
||||
Tickspan _refinement_time;
|
||||
size_t _refined_cards;
|
||||
size_t _precleaned_cards;
|
||||
size_t _dirtied_cards;
|
||||
jlong _sweep_duration; // Time spent sweeping the table finding non-clean cards
|
||||
// and refining them.
|
||||
jlong _yield_during_sweep_duration; // Time spent yielding during the sweep (not doing the sweep).
|
||||
|
||||
size_t _cards_scanned; // Total number of cards scanned.
|
||||
size_t _cards_clean; // Number of cards found clean.
|
||||
size_t _cards_not_parsable; // Number of cards we could not parse and left unrefined.
|
||||
size_t _cards_already_refer_to_cset;// Number of cards marked found to be already young.
|
||||
size_t _cards_refer_to_cset; // Number of dirty cards that were recently found to contain a to-cset reference.
|
||||
size_t _cards_no_cross_region; // Number of dirty cards that were dirtied, but then cleaned again by the mutator.
|
||||
|
||||
jlong _refine_duration; // Time spent during actual refinement.
|
||||
|
||||
public:
|
||||
G1ConcurrentRefineStats();
|
||||
|
||||
// Time spent performing concurrent refinement.
|
||||
Tickspan refinement_time() const { return _refinement_time; }
|
||||
// Time spent performing sweeping the refinement table (includes actual refinement,
|
||||
// but not yield time).
|
||||
jlong sweep_duration() const { return _sweep_duration - _yield_during_sweep_duration; }
|
||||
jlong yield_during_sweep_duration() const { return _yield_during_sweep_duration; }
|
||||
jlong refine_duration() const { return _refine_duration; }
|
||||
|
||||
// Number of refined cards.
|
||||
size_t refined_cards() const { return _refined_cards; }
|
||||
size_t refined_cards() const { return cards_not_clean(); }
|
||||
|
||||
// Refinement rate, in cards per ms.
|
||||
double refinement_rate_ms() const;
|
||||
size_t cards_scanned() const { return _cards_scanned; }
|
||||
size_t cards_clean() const { return _cards_clean; }
|
||||
size_t cards_not_clean() const { return _cards_scanned - _cards_clean; }
|
||||
size_t cards_not_parsable() const { return _cards_not_parsable; }
|
||||
size_t cards_already_refer_to_cset() const { return _cards_already_refer_to_cset; }
|
||||
size_t cards_refer_to_cset() const { return _cards_refer_to_cset; }
|
||||
size_t cards_no_cross_region() const { return _cards_no_cross_region; }
|
||||
// Number of cards that were marked dirty and in need of refinement. This includes cards recently
|
||||
// found to refer to the collection set as they originally were dirty.
|
||||
size_t cards_pending() const { return cards_not_clean() - _cards_already_refer_to_cset; }
|
||||
|
||||
// Number of cards for which refinement was skipped because some other
|
||||
// thread had already refined them.
|
||||
size_t precleaned_cards() const { return _precleaned_cards; }
|
||||
size_t cards_to_cset() const { return _cards_already_refer_to_cset + _cards_refer_to_cset; }
|
||||
|
||||
// Number of cards marked dirty and in need of refinement.
|
||||
size_t dirtied_cards() const { return _dirtied_cards; }
|
||||
void inc_sweep_time(jlong t) { _sweep_duration += t; }
|
||||
void inc_yield_during_sweep_duration(jlong t) { _yield_during_sweep_duration += t; }
|
||||
void inc_refine_duration(jlong t) { _refine_duration += t; }
|
||||
|
||||
void inc_refinement_time(Tickspan t) { _refinement_time += t; }
|
||||
void inc_refined_cards(size_t cards) { _refined_cards += cards; }
|
||||
void inc_precleaned_cards(size_t cards) { _precleaned_cards += cards; }
|
||||
void inc_dirtied_cards(size_t cards) { _dirtied_cards += cards; }
|
||||
void inc_cards_scanned(size_t increment) { _cards_scanned += increment; }
|
||||
void inc_cards_clean(size_t increment) { _cards_clean += increment; }
|
||||
void inc_cards_not_parsable() { _cards_not_parsable++; }
|
||||
void inc_cards_already_refer_to_cset() { _cards_already_refer_to_cset++; }
|
||||
void inc_cards_refer_to_cset() { _cards_refer_to_cset++; }
|
||||
void inc_cards_no_cross_region() { _cards_no_cross_region++; }
|
||||
|
||||
G1ConcurrentRefineStats& operator+=(const G1ConcurrentRefineStats& other);
|
||||
G1ConcurrentRefineStats& operator-=(const G1ConcurrentRefineStats& other);
|
||||
|
||||
friend G1ConcurrentRefineStats operator+(G1ConcurrentRefineStats x,
|
||||
const G1ConcurrentRefineStats& y) {
|
||||
return x += y;
|
||||
}
|
||||
|
||||
friend G1ConcurrentRefineStats operator-(G1ConcurrentRefineStats x,
|
||||
const G1ConcurrentRefineStats& y) {
|
||||
return x -= y;
|
||||
}
|
||||
void add_atomic(G1ConcurrentRefineStats* other);
|
||||
|
||||
void reset();
|
||||
};
|
||||
|
||||
191
src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp
Normal file
191
src/hotspot/share/gc/g1/g1ConcurrentRefineSweepTask.cpp
Normal file
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
|
||||
|
||||
class G1RefineRegionClosure : public G1HeapRegionClosure {
|
||||
using CardValue = G1CardTable::CardValue;
|
||||
|
||||
G1RemSet* _rem_set;
|
||||
G1CardTableClaimTable* _scan_state;
|
||||
|
||||
uint _worker_id;
|
||||
|
||||
size_t _num_collections_at_start;
|
||||
|
||||
bool has_work(G1HeapRegion* r) {
|
||||
return _scan_state->has_unclaimed_cards(r->hrm_index());
|
||||
}
|
||||
|
||||
void verify_card_pair_refers_to_same_card(CardValue* source_card, CardValue* dest_card) {
|
||||
#ifdef ASSERT
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
G1HeapRegion* refinement_r = g1h->heap_region_containing(g1h->refinement_table()->addr_for(source_card));
|
||||
G1HeapRegion* card_r = g1h->heap_region_containing(g1h->card_table()->addr_for(dest_card));
|
||||
size_t refinement_i = g1h->refinement_table()->index_for_cardvalue(source_card);
|
||||
size_t card_i = g1h->card_table()->index_for_cardvalue(dest_card);
|
||||
|
||||
assert(refinement_r == card_r, "not same region source %u (%zu) dest %u (%zu) ", refinement_r->hrm_index(), refinement_i, card_r->hrm_index(), card_i);
|
||||
assert(refinement_i == card_i, "indexes are not same %zu %zu", refinement_i, card_i);
|
||||
#endif
|
||||
}
|
||||
|
||||
void do_dirty_card(CardValue* source_card, CardValue* dest_card) {
|
||||
verify_card_pair_refers_to_same_card(source_card, dest_card);
|
||||
|
||||
G1RemSet::RefineResult res = _rem_set->refine_card_concurrently(source_card, _worker_id);
|
||||
// Gather statistics based on the result.
|
||||
switch (res) {
|
||||
case G1RemSet::HasRefToCSet: {
|
||||
*dest_card = G1CardTable::g1_to_cset_card;
|
||||
_refine_stats.inc_cards_refer_to_cset();
|
||||
break;
|
||||
}
|
||||
case G1RemSet::AlreadyToCSet: {
|
||||
*dest_card = G1CardTable::g1_to_cset_card;
|
||||
_refine_stats.inc_cards_already_refer_to_cset();
|
||||
break;
|
||||
}
|
||||
case G1RemSet::NoCrossRegion: {
|
||||
_refine_stats.inc_cards_no_cross_region();
|
||||
break;
|
||||
}
|
||||
case G1RemSet::CouldNotParse: {
|
||||
// Could not refine - redirty with the original value.
|
||||
*dest_card = *source_card;
|
||||
_refine_stats.inc_cards_not_parsable();
|
||||
break;
|
||||
}
|
||||
case G1RemSet::HasRefToOld : break; // Nothing special to do.
|
||||
}
|
||||
// Clean card on source card table.
|
||||
*source_card = G1CardTable::clean_card_val();
|
||||
}
|
||||
|
||||
void do_claimed_block(CardValue* dirty_l, CardValue* dirty_r, CardValue* dest_card) {
|
||||
for (CardValue* source = dirty_l; source < dirty_r; ++source, ++dest_card) {
|
||||
do_dirty_card(source, dest_card);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
bool _completed;
|
||||
G1ConcurrentRefineStats _refine_stats;
|
||||
|
||||
G1RefineRegionClosure(uint worker_id, G1CardTableClaimTable* scan_state) :
|
||||
G1HeapRegionClosure(),
|
||||
_rem_set(G1CollectedHeap::heap()->rem_set()),
|
||||
_scan_state(scan_state),
|
||||
_worker_id(worker_id),
|
||||
_completed(true),
|
||||
_refine_stats() { }
|
||||
|
||||
bool do_heap_region(G1HeapRegion* r) override {
|
||||
|
||||
if (!has_work(r)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
|
||||
if (r->is_young()) {
|
||||
if (_scan_state->claim_all_cards(r->hrm_index()) == 0) {
|
||||
// Clear the pre-dirtying information.
|
||||
r->clear_refinement_table();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
G1CardTable* card_table = g1h->card_table();
|
||||
G1CardTable* refinement_table = g1h->refinement_table();
|
||||
|
||||
G1CardTableChunkClaimer claim(_scan_state, r->hrm_index());
|
||||
|
||||
size_t const region_card_base_idx = (size_t)r->hrm_index() << G1HeapRegion::LogCardsPerRegion;
|
||||
|
||||
while (claim.has_next()) {
|
||||
size_t const start_idx = region_card_base_idx + claim.value();
|
||||
CardValue* const start_card = refinement_table->byte_for_index(start_idx);
|
||||
CardValue* const end_card = start_card + claim.size();
|
||||
|
||||
CardValue* dest_card = card_table->byte_for_index(start_idx);
|
||||
|
||||
G1ChunkScanner scanner{start_card, end_card};
|
||||
|
||||
size_t num_dirty_cards = 0;
|
||||
scanner.on_dirty_cards([&] (CardValue* dirty_l, CardValue* dirty_r) {
|
||||
jlong refine_start = os::elapsed_counter();
|
||||
|
||||
do_claimed_block(dirty_l, dirty_r, dest_card + pointer_delta(dirty_l, start_card, sizeof(CardValue)));
|
||||
num_dirty_cards += pointer_delta(dirty_r, dirty_l, sizeof(CardValue));
|
||||
|
||||
_refine_stats.inc_refine_duration(os::elapsed_counter() - refine_start);
|
||||
});
|
||||
|
||||
if (VerifyDuringGC) {
|
||||
for (CardValue* i = start_card; i < end_card; ++i) {
|
||||
guarantee(*i == G1CardTable::clean_card_val(), "must be");
|
||||
}
|
||||
}
|
||||
|
||||
_refine_stats.inc_cards_scanned(claim.size());
|
||||
_refine_stats.inc_cards_clean(claim.size() - num_dirty_cards);
|
||||
|
||||
if (SuspendibleThreadSet::should_yield()) {
|
||||
_completed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return !_completed;
|
||||
}
|
||||
};
|
||||
|
||||
G1ConcurrentRefineSweepTask::G1ConcurrentRefineSweepTask(G1CardTableClaimTable* scan_state,
|
||||
G1ConcurrentRefineStats* stats,
|
||||
uint max_workers) :
|
||||
WorkerTask("G1 Refine Task"),
|
||||
_scan_state(scan_state),
|
||||
_stats(stats),
|
||||
_max_workers(max_workers),
|
||||
_sweep_completed(true)
|
||||
{ }
|
||||
|
||||
void G1ConcurrentRefineSweepTask::work(uint worker_id) {
|
||||
jlong start = os::elapsed_counter();
|
||||
|
||||
G1RefineRegionClosure sweep_cl(worker_id, _scan_state);
|
||||
_scan_state->heap_region_iterate_from_worker_offset(&sweep_cl, worker_id, _max_workers);
|
||||
|
||||
if (!sweep_cl._completed) {
|
||||
_sweep_completed = false;
|
||||
}
|
||||
|
||||
sweep_cl._refine_stats.inc_sweep_time(os::elapsed_counter() - start);
|
||||
_stats->add_atomic(&sweep_cl._refine_stats);
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineSweepTask::sweep_completed() const { return _sweep_completed; }
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -22,20 +22,27 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_SHARED_BUFFERNODELIST_HPP
|
||||
#define SHARE_GC_SHARED_BUFFERNODELIST_HPP
|
||||
#ifndef SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP
|
||||
#define SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP
|
||||
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineStats.hpp"
|
||||
#include "gc/shared/workerThread.hpp"
|
||||
|
||||
class BufferNode;
|
||||
class G1CardTableClaimTable;
|
||||
|
||||
struct BufferNodeList {
|
||||
BufferNode* _head; // First node in list or null if empty.
|
||||
BufferNode* _tail; // Last node in list or null if empty.
|
||||
size_t _entry_count; // Sum of entries in nodes in list.
|
||||
class G1ConcurrentRefineSweepTask : public WorkerTask {
|
||||
G1CardTableClaimTable* _scan_state;
|
||||
G1ConcurrentRefineStats* _stats;
|
||||
uint _max_workers;
|
||||
bool _sweep_completed;
|
||||
|
||||
BufferNodeList();
|
||||
BufferNodeList(BufferNode* head, BufferNode* tail, size_t entry_count);
|
||||
public:
|
||||
|
||||
G1ConcurrentRefineSweepTask(G1CardTableClaimTable* scan_state, G1ConcurrentRefineStats* stats, uint max_workers);
|
||||
|
||||
void work(uint worker_id) override;
|
||||
|
||||
bool sweep_completed() const;
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_SHARED_BUFFERNODELIST_HPP
|
||||
#endif /* SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP */
|
||||
@ -23,10 +23,13 @@
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1BarrierSet.hpp"
|
||||
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineStats.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineThread.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/shared/gcTraceTime.inline.hpp"
|
||||
#include "gc/shared/suspendibleThreadSet.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "runtime/cpuTimeCounters.hpp"
|
||||
@ -38,60 +41,61 @@
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
|
||||
G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr) :
|
||||
ConcurrentGCThread(),
|
||||
_notifier(Mutex::nosafepoint, FormatBuffer<>("G1 Refine#%d", worker_id), true),
|
||||
_notifier(Mutex::nosafepoint, "G1 Refine Control", true),
|
||||
_requested_active(false),
|
||||
_refinement_stats(),
|
||||
_worker_id(worker_id),
|
||||
_cr(cr)
|
||||
{
|
||||
// set name
|
||||
set_name("G1 Refine#%d", worker_id);
|
||||
set_name("G1 Refine Control");
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThread::run_service() {
|
||||
while (wait_for_completed_buffers()) {
|
||||
while (wait_for_work()) {
|
||||
SuspendibleThreadSetJoiner sts_join;
|
||||
G1ConcurrentRefineStats active_stats_start = _refinement_stats;
|
||||
report_active("Activated");
|
||||
while (!should_terminate()) {
|
||||
if (sts_join.should_yield()) {
|
||||
report_inactive("Paused", _refinement_stats - active_stats_start);
|
||||
report_inactive("Paused");
|
||||
sts_join.yield();
|
||||
// Reset after yield rather than accumulating across yields, else a
|
||||
// very long running thread could overflow.
|
||||
active_stats_start = _refinement_stats;
|
||||
report_active("Resumed");
|
||||
} else if (maybe_deactivate()) {
|
||||
break;
|
||||
}
|
||||
// Look if we want to do refinement. If we don't then don't do any refinement
|
||||
// this. This thread may have just woken up but no threads are currently
|
||||
// needed, which is common. In this case we want to just go back to
|
||||
// waiting, with a minimum of fuss; in particular, don't do any "premature"
|
||||
// refinement. However, adjustment may be pending but temporarily
|
||||
// blocked. In that case we wait for adjustment to succeed.
|
||||
Ticks adjust_start = Ticks::now();
|
||||
if (cr()->adjust_num_threads_periodically()) {
|
||||
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Cycle");
|
||||
do_refinement();
|
||||
} else {
|
||||
do_refinement_step();
|
||||
log_debug(gc, refine)("Concurrent Refine Adjust Only (#threads wanted: %u adjustment_needed: %s wait_for_heap_lock: %s) %.2fms",
|
||||
cr()->num_threads_wanted(),
|
||||
BOOL_TO_STR(cr()->is_thread_adjustment_needed()),
|
||||
BOOL_TO_STR(cr()->heap_was_locked()),
|
||||
(Ticks::now() - adjust_start).seconds() * MILLIUNITS);
|
||||
|
||||
deactivate();
|
||||
break;
|
||||
}
|
||||
}
|
||||
report_inactive("Deactivated", _refinement_stats - active_stats_start);
|
||||
report_inactive("Deactivated");
|
||||
update_perf_counter_cpu_time();
|
||||
}
|
||||
|
||||
log_debug(gc, refine)("Stopping %d", _worker_id);
|
||||
log_debug(gc, refine)("Stopping %s", name());
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThread::report_active(const char* reason) const {
|
||||
log_trace(gc, refine)("%s worker %u, current: %zu",
|
||||
reason,
|
||||
_worker_id,
|
||||
G1BarrierSet::dirty_card_queue_set().num_cards());
|
||||
log_trace(gc, refine)("%s active (%s)", name(), reason);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThread::report_inactive(const char* reason,
|
||||
const G1ConcurrentRefineStats& stats) const {
|
||||
log_trace(gc, refine)
|
||||
("%s worker %u, cards: %zu, refined %zu, rate %1.2fc/ms",
|
||||
reason,
|
||||
_worker_id,
|
||||
G1BarrierSet::dirty_card_queue_set().num_cards(),
|
||||
stats.refined_cards(),
|
||||
stats.refinement_rate_ms());
|
||||
void G1ConcurrentRefineThread::report_inactive(const char* reason) const {
|
||||
log_trace(gc, refine)("%s inactive (%s)", name(), reason);
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThread::activate() {
|
||||
@ -103,21 +107,12 @@ void G1ConcurrentRefineThread::activate() {
|
||||
}
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineThread::maybe_deactivate() {
|
||||
bool G1ConcurrentRefineThread::deactivate() {
|
||||
assert(this == Thread::current(), "precondition");
|
||||
if (cr()->is_thread_wanted(_worker_id)) {
|
||||
return false;
|
||||
} else {
|
||||
MutexLocker ml(&_notifier, Mutex::_no_safepoint_check_flag);
|
||||
bool requested = _requested_active;
|
||||
_requested_active = false;
|
||||
return !requested; // Deactivate only if not recently requested active.
|
||||
}
|
||||
}
|
||||
|
||||
bool G1ConcurrentRefineThread::try_refinement_step(size_t stop_at) {
|
||||
assert(this == Thread::current(), "precondition");
|
||||
return _cr->try_refinement_step(_worker_id, stop_at, &_refinement_stats);
|
||||
MutexLocker ml(&_notifier, Mutex::_no_safepoint_check_flag);
|
||||
bool requested = _requested_active;
|
||||
_requested_active = false;
|
||||
return !requested; // Deactivate only if not recently requested active.
|
||||
}
|
||||
|
||||
void G1ConcurrentRefineThread::stop_service() {
|
||||
@ -128,23 +123,9 @@ jlong G1ConcurrentRefineThread::cpu_time() {
|
||||
return os::thread_cpu_time(this);
|
||||
}
|
||||
|
||||
// The (single) primary thread drives the controller for the refinement threads.
|
||||
class G1PrimaryConcurrentRefineThread final : public G1ConcurrentRefineThread {
|
||||
bool wait_for_completed_buffers() override;
|
||||
bool maybe_deactivate() override;
|
||||
void do_refinement_step() override;
|
||||
// Updates jstat cpu usage for all refinement threads.
|
||||
void update_perf_counter_cpu_time() override;
|
||||
|
||||
public:
|
||||
G1PrimaryConcurrentRefineThread(G1ConcurrentRefine* cr) :
|
||||
G1ConcurrentRefineThread(cr, 0)
|
||||
{}
|
||||
};
|
||||
|
||||
// When inactive, the primary thread periodically wakes up and requests
|
||||
// adjustment of the number of active refinement threads.
|
||||
bool G1PrimaryConcurrentRefineThread::wait_for_completed_buffers() {
|
||||
// When inactive, the control thread periodically wakes up to check if there is
|
||||
// refinement work pending.
|
||||
bool G1ConcurrentRefineThread::wait_for_work() {
|
||||
assert(this == Thread::current(), "precondition");
|
||||
MonitorLocker ml(notifier(), Mutex::_no_safepoint_check_flag);
|
||||
if (!requested_active() && !should_terminate()) {
|
||||
@ -157,78 +138,115 @@ bool G1PrimaryConcurrentRefineThread::wait_for_completed_buffers() {
|
||||
return !should_terminate();
|
||||
}
|
||||
|
||||
bool G1PrimaryConcurrentRefineThread::maybe_deactivate() {
|
||||
// Don't deactivate while needing to adjust the number of active threads.
|
||||
return !cr()->is_thread_adjustment_needed() &&
|
||||
G1ConcurrentRefineThread::maybe_deactivate();
|
||||
void G1ConcurrentRefineThread::do_refinement() {
|
||||
G1ConcurrentRefineSweepState& state = _cr->sweep_state();
|
||||
|
||||
state.start_work();
|
||||
|
||||
// Swap card tables.
|
||||
|
||||
// 1. Global card table
|
||||
if (!state.swap_global_card_table()) {
|
||||
log_debug(gc, refine)("GC pause after Global Card Table Swap");
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. Java threads
|
||||
if (!state.swap_java_threads_ct()) {
|
||||
log_debug(gc, refine)("GC pause after Java Thread CT swap");
|
||||
return;
|
||||
}
|
||||
|
||||
// 3. GC threads
|
||||
if (!state.swap_gc_threads_ct()) {
|
||||
log_debug(gc, refine)("GC pause after GC Thread CT swap");
|
||||
return;
|
||||
}
|
||||
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
jlong epoch_yield_duration = g1h->yield_duration_in_refinement_epoch();
|
||||
jlong next_epoch_start = os::elapsed_counter();
|
||||
|
||||
jlong total_yield_during_sweep_duration = 0;
|
||||
|
||||
// 4. Snapshot heap.
|
||||
state.snapshot_heap();
|
||||
|
||||
// 5. Sweep refinement table until done
|
||||
bool interrupted_by_gc = false;
|
||||
|
||||
log_info(gc, task)("Concurrent Refine Sweep Using %u of %u Workers", _cr->num_threads_wanted(), _cr->max_num_threads());
|
||||
|
||||
state.sweep_refinement_table_start();
|
||||
while (true) {
|
||||
bool completed = state.sweep_refinement_table_step();
|
||||
|
||||
if (completed) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (SuspendibleThreadSet::should_yield()) {
|
||||
jlong yield_during_sweep_start = os::elapsed_counter();
|
||||
SuspendibleThreadSet::yield();
|
||||
|
||||
// The yielding may have completed the task, check.
|
||||
if (!state.is_in_progress()) {
|
||||
log_debug(gc, refine)("GC completed sweeping, aborting concurrent operation");
|
||||
interrupted_by_gc = true;
|
||||
break;
|
||||
} else {
|
||||
jlong yield_during_sweep_duration = os::elapsed_counter() - yield_during_sweep_start;
|
||||
log_debug(gc, refine)("Yielded from card table sweeping for %.2fms, no GC inbetween, continue",
|
||||
TimeHelper::counter_to_millis(yield_during_sweep_duration));
|
||||
total_yield_during_sweep_duration += yield_during_sweep_duration;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!interrupted_by_gc) {
|
||||
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Complete Work");
|
||||
|
||||
state.add_yield_during_sweep_duration(total_yield_during_sweep_duration);
|
||||
|
||||
state.complete_work(true);
|
||||
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
G1Policy* policy = g1h->policy();
|
||||
G1ConcurrentRefineStats* stats = state.stats();
|
||||
policy->record_refinement_stats(stats);
|
||||
|
||||
{
|
||||
// The young gen revising mechanism reads the predictor and the values set
|
||||
// here. Avoid inconsistencies by locking.
|
||||
MutexLocker x(G1ReviseYoungLength_lock, Mutex::_no_safepoint_check_flag);
|
||||
policy->record_dirtying_stats(TimeHelper::counter_to_millis(G1CollectedHeap::heap()->last_refinement_epoch_start()),
|
||||
TimeHelper::counter_to_millis(next_epoch_start),
|
||||
stats->cards_pending(),
|
||||
TimeHelper::counter_to_millis(epoch_yield_duration),
|
||||
0 /* pending_cards_from_gc */,
|
||||
stats->cards_to_cset());
|
||||
G1CollectedHeap::heap()->set_last_refinement_epoch_start(next_epoch_start, epoch_yield_duration);
|
||||
}
|
||||
stats->reset();
|
||||
}
|
||||
}
|
||||
|
||||
void G1PrimaryConcurrentRefineThread::do_refinement_step() {
|
||||
// Try adjustment first. If it succeeds then don't do any refinement this
|
||||
// round. This thread may have just woken up but no threads are currently
|
||||
// needed, which is common. In this case we want to just go back to
|
||||
// waiting, with a minimum of fuss; in particular, don't do any "premature"
|
||||
// refinement. However, adjustment may be pending but temporarily
|
||||
// blocked. In that case we *do* try refinement, rather than possibly
|
||||
// uselessly spinning while waiting for adjustment to succeed.
|
||||
if (!cr()->adjust_threads_periodically()) {
|
||||
// No adjustment, so try refinement, with the target as a cuttoff.
|
||||
if (!try_refinement_step(cr()->pending_cards_target())) {
|
||||
// Refinement was cut off, so proceed with fewer threads.
|
||||
cr()->reduce_threads_wanted();
|
||||
void G1ConcurrentRefineThread::update_perf_counter_cpu_time() {
|
||||
// The control thread is responsible for updating the CPU time for all workers.
|
||||
if (UsePerfData) {
|
||||
{
|
||||
ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine);
|
||||
cr()->worker_threads_do(&tttc);
|
||||
}
|
||||
{
|
||||
ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine_control);
|
||||
cr()->control_thread_do(&tttc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void G1PrimaryConcurrentRefineThread::update_perf_counter_cpu_time() {
|
||||
if (UsePerfData) {
|
||||
ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine);
|
||||
cr()->threads_do(&tttc);
|
||||
}
|
||||
}
|
||||
|
||||
class G1SecondaryConcurrentRefineThread final : public G1ConcurrentRefineThread {
|
||||
bool wait_for_completed_buffers() override;
|
||||
void do_refinement_step() override;
|
||||
void update_perf_counter_cpu_time() override { /* Nothing to do. The primary thread does all the work. */ }
|
||||
|
||||
public:
|
||||
G1SecondaryConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
|
||||
G1ConcurrentRefineThread(cr, worker_id)
|
||||
{
|
||||
assert(worker_id > 0, "precondition");
|
||||
}
|
||||
};
|
||||
|
||||
bool G1SecondaryConcurrentRefineThread::wait_for_completed_buffers() {
|
||||
assert(this == Thread::current(), "precondition");
|
||||
MonitorLocker ml(notifier(), Mutex::_no_safepoint_check_flag);
|
||||
while (!requested_active() && !should_terminate()) {
|
||||
ml.wait();
|
||||
}
|
||||
return !should_terminate();
|
||||
}
|
||||
|
||||
void G1SecondaryConcurrentRefineThread::do_refinement_step() {
|
||||
assert(this == Thread::current(), "precondition");
|
||||
// Secondary threads ignore the target and just drive the number of pending
|
||||
// dirty cards down. The primary thread is responsible for noticing the
|
||||
// target has been reached and reducing the number of wanted threads. This
|
||||
// makes the control of wanted threads all under the primary, while avoiding
|
||||
// useless spinning by secondary threads until the primary thread notices.
|
||||
// (Useless spinning is still possible if there are no pending cards, but
|
||||
// that should rarely happen.)
|
||||
try_refinement_step(0);
|
||||
}
|
||||
|
||||
G1ConcurrentRefineThread*
|
||||
G1ConcurrentRefineThread::create(G1ConcurrentRefine* cr, uint worker_id) {
|
||||
G1ConcurrentRefineThread* crt;
|
||||
if (worker_id == 0) {
|
||||
crt = new (std::nothrow) G1PrimaryConcurrentRefineThread(cr);
|
||||
} else {
|
||||
crt = new (std::nothrow) G1SecondaryConcurrentRefineThread(cr, worker_id);
|
||||
}
|
||||
G1ConcurrentRefineThread* G1ConcurrentRefineThread::create(G1ConcurrentRefine* cr) {
|
||||
G1ConcurrentRefineThread* crt = new (std::nothrow) G1ConcurrentRefineThread(cr);
|
||||
if (crt != nullptr) {
|
||||
crt->create_and_start();
|
||||
}
|
||||
|
||||
@ -33,8 +33,8 @@
|
||||
// Forward Decl.
|
||||
class G1ConcurrentRefine;
|
||||
|
||||
// One or more G1 Concurrent Refinement Threads may be active if concurrent
|
||||
// refinement is in progress.
|
||||
// Concurrent refinement control thread watching card mark accrual on the card table
|
||||
// and starting refinement work.
|
||||
class G1ConcurrentRefineThread: public ConcurrentGCThread {
|
||||
friend class VMStructs;
|
||||
friend class G1CollectedHeap;
|
||||
@ -42,43 +42,34 @@ class G1ConcurrentRefineThread: public ConcurrentGCThread {
|
||||
Monitor _notifier;
|
||||
bool _requested_active;
|
||||
|
||||
G1ConcurrentRefineStats _refinement_stats;
|
||||
|
||||
uint _worker_id;
|
||||
|
||||
G1ConcurrentRefine* _cr;
|
||||
|
||||
NONCOPYABLE(G1ConcurrentRefineThread);
|
||||
|
||||
protected:
|
||||
G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id);
|
||||
G1ConcurrentRefineThread(G1ConcurrentRefine* cr);
|
||||
|
||||
Monitor* notifier() { return &_notifier; }
|
||||
bool requested_active() const { return _requested_active; }
|
||||
|
||||
// Returns !should_terminate().
|
||||
// precondition: this is the current thread.
|
||||
virtual bool wait_for_completed_buffers() = 0;
|
||||
bool wait_for_work();
|
||||
|
||||
// Deactivate if appropriate. Returns true if deactivated.
|
||||
// precondition: this is the current thread.
|
||||
virtual bool maybe_deactivate();
|
||||
bool deactivate();
|
||||
|
||||
// Attempt to do some refinement work.
|
||||
// precondition: this is the current thread.
|
||||
virtual void do_refinement_step() = 0;
|
||||
// Swap card table and do a complete re-examination/refinement pass over the
|
||||
// refinement table.
|
||||
void do_refinement();
|
||||
|
||||
// Update concurrent refine threads cpu time stats.
|
||||
virtual void update_perf_counter_cpu_time() = 0;
|
||||
|
||||
// Helper for do_refinement_step implementations. Try to perform some
|
||||
// refinement work, limited by stop_at. Returns true if any refinement work
|
||||
// was performed, false if no work available per stop_at.
|
||||
// precondition: this is the current thread.
|
||||
bool try_refinement_step(size_t stop_at);
|
||||
void update_perf_counter_cpu_time();
|
||||
|
||||
void report_active(const char* reason) const;
|
||||
void report_inactive(const char* reason, const G1ConcurrentRefineStats& stats) const;
|
||||
void report_inactive(const char* reason) const;
|
||||
|
||||
G1ConcurrentRefine* cr() const { return _cr; }
|
||||
|
||||
@ -86,23 +77,12 @@ protected:
|
||||
void stop_service() override;
|
||||
|
||||
public:
|
||||
static G1ConcurrentRefineThread* create(G1ConcurrentRefine* cr, uint worker_id);
|
||||
virtual ~G1ConcurrentRefineThread() = default;
|
||||
|
||||
uint worker_id() const { return _worker_id; }
|
||||
static G1ConcurrentRefineThread* create(G1ConcurrentRefine* cr);
|
||||
|
||||
// Activate this thread.
|
||||
// precondition: this is not the current thread.
|
||||
void activate();
|
||||
|
||||
G1ConcurrentRefineStats* refinement_stats() {
|
||||
return &_refinement_stats;
|
||||
}
|
||||
|
||||
const G1ConcurrentRefineStats* refinement_stats() const {
|
||||
return &_refinement_stats;
|
||||
}
|
||||
|
||||
// Total cpu time spent in this thread so far.
|
||||
jlong cpu_time();
|
||||
};
|
||||
|
||||
@ -45,48 +45,22 @@ G1ConcurrentRefineThreadsNeeded::G1ConcurrentRefineThreadsNeeded(G1Policy* polic
|
||||
//
|
||||
// 1. Minimize the number of refinement threads running at once.
|
||||
//
|
||||
// 2. Minimize the number of activations and deactivations for the
|
||||
// refinement threads that run.
|
||||
//
|
||||
// 3. Delay performing refinement work. Having more dirty cards waiting to
|
||||
// 2. Delay performing refinement work. Having more dirty cards waiting to
|
||||
// be refined can be beneficial, as further writes to the same card don't
|
||||
// create more work.
|
||||
void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
|
||||
size_t available_bytes,
|
||||
size_t num_cards,
|
||||
size_t target_num_cards) {
|
||||
_predicted_time_until_next_gc_ms = _policy->predict_time_to_next_gc_ms(available_bytes);
|
||||
|
||||
// Estimate number of cards that need to be processed before next GC.
|
||||
const G1Analytics* analytics = _policy->analytics();
|
||||
|
||||
// Estimate time until next GC, based on remaining bytes available for
|
||||
// allocation and the allocation rate.
|
||||
double alloc_region_rate = analytics->predict_alloc_rate_ms();
|
||||
double alloc_bytes_rate = alloc_region_rate * G1HeapRegion::GrainBytes;
|
||||
if (alloc_bytes_rate == 0.0) {
|
||||
// A zero rate indicates we don't yet have data to use for predictions.
|
||||
// Since we don't have any idea how long until the next GC, use a time of
|
||||
// zero.
|
||||
_predicted_time_until_next_gc_ms = 0.0;
|
||||
} else {
|
||||
// If the heap size is large and the allocation rate is small, we can get
|
||||
// a predicted time until next GC that is so large it can cause problems
|
||||
// (such as overflow) in other calculations. Limit the prediction to one
|
||||
// hour, which is still large in this context.
|
||||
const double one_hour_ms = 60.0 * 60.0 * MILLIUNITS;
|
||||
double raw_time_ms = available_bytes / alloc_bytes_rate;
|
||||
_predicted_time_until_next_gc_ms = MIN2(raw_time_ms, one_hour_ms);
|
||||
}
|
||||
double incoming_rate = analytics->predict_dirtied_cards_rate_ms();
|
||||
double raw_cards = incoming_rate * _predicted_time_until_next_gc_ms;
|
||||
size_t incoming_cards = static_cast<size_t>(raw_cards);
|
||||
|
||||
// Estimate number of cards that need to be processed before next GC. There
|
||||
// are no incoming cards when time is short, because in that case the
|
||||
// controller activates refinement by mutator threads to stay on target even
|
||||
// if threads deactivate in the meantime. This also covers the case of not
|
||||
// having a real prediction of time until GC.
|
||||
size_t incoming_cards = 0;
|
||||
if (_predicted_time_until_next_gc_ms > _update_period_ms) {
|
||||
double incoming_rate = analytics->predict_dirtied_cards_rate_ms();
|
||||
double raw_cards = incoming_rate * _predicted_time_until_next_gc_ms;
|
||||
incoming_cards = static_cast<size_t>(raw_cards);
|
||||
}
|
||||
size_t total_cards = num_cards + incoming_cards;
|
||||
_predicted_cards_at_next_gc = total_cards;
|
||||
|
||||
@ -100,9 +74,8 @@ void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
|
||||
// The calculation of the number of threads needed isn't very stable when
|
||||
// time is short, and can lead to starting up lots of threads for not much
|
||||
// profit. If we're in the last update period, don't change the number of
|
||||
// threads running, other than to treat the current thread as running. That
|
||||
// might not be sufficient, but hopefully we were already reasonably close.
|
||||
// We won't accumulate more because mutator refinement will be activated.
|
||||
// threads needed. That might not be sufficient, but hopefully we were
|
||||
// already reasonably close.
|
||||
if (_predicted_time_until_next_gc_ms <= _update_period_ms) {
|
||||
_threads_needed = MAX2(active_threads, 1u);
|
||||
return;
|
||||
@ -133,11 +106,12 @@ void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
|
||||
// close to the next GC we want to drive toward the target, so round up
|
||||
// then. The rest of the time we round to nearest, trying to remain near
|
||||
// the middle of the range.
|
||||
double rthreads = nthreads;
|
||||
if (_predicted_time_until_next_gc_ms <= _update_period_ms * 5.0) {
|
||||
nthreads = ::ceil(nthreads);
|
||||
rthreads = ::ceil(nthreads);
|
||||
} else {
|
||||
nthreads = ::round(nthreads);
|
||||
rthreads = ::round(nthreads);
|
||||
}
|
||||
|
||||
_threads_needed = static_cast<uint>(MIN2<size_t>(nthreads, UINT_MAX));
|
||||
_threads_needed = static_cast<uint>(MIN2<size_t>(rthreads, UINT_MAX));
|
||||
}
|
||||
|
||||
@ -1,599 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1BarrierSet.inline.hpp"
|
||||
#include "gc/g1/g1CardTableEntryClosure.hpp"
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineStats.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineThread.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1FreeIdSet.hpp"
|
||||
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
|
||||
#include "gc/g1/g1RedirtyCardsQueue.hpp"
|
||||
#include "gc/g1/g1RemSet.hpp"
|
||||
#include "gc/g1/g1ThreadLocalData.hpp"
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "gc/shared/bufferNodeList.hpp"
|
||||
#include "gc/shared/suspendibleThreadSet.hpp"
|
||||
#include "memory/iterator.hpp"
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
#include "runtime/javaThread.hpp"
|
||||
#include "runtime/mutex.hpp"
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
#include "runtime/threads.hpp"
|
||||
#include "runtime/threadSMR.hpp"
|
||||
#include "utilities/globalCounter.inline.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/nonblockingQueue.inline.hpp"
|
||||
#include "utilities/pair.hpp"
|
||||
#include "utilities/quickSort.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
|
||||
PtrQueue(qset),
|
||||
_refinement_stats(new G1ConcurrentRefineStats())
|
||||
{ }
|
||||
|
||||
G1DirtyCardQueue::~G1DirtyCardQueue() {
|
||||
delete _refinement_stats;
|
||||
}
|
||||
|
||||
// Assumed to be zero by concurrent threads.
|
||||
static uint par_ids_start() { return 0; }
|
||||
|
||||
G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) :
|
||||
PtrQueueSet(allocator),
|
||||
_num_cards(0),
|
||||
_mutator_refinement_threshold(SIZE_MAX),
|
||||
_completed(),
|
||||
_paused(),
|
||||
_free_ids(par_ids_start(), num_par_ids()),
|
||||
_detached_refinement_stats()
|
||||
{}
|
||||
|
||||
G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
|
||||
abandon_completed_buffers();
|
||||
}
|
||||
|
||||
// Determines how many mutator threads can process the buffers in parallel.
|
||||
uint G1DirtyCardQueueSet::num_par_ids() {
|
||||
return (uint)os::initial_active_processor_count();
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::flush_queue(G1DirtyCardQueue& queue) {
|
||||
if (queue.buffer() != nullptr) {
|
||||
G1ConcurrentRefineStats* stats = queue.refinement_stats();
|
||||
stats->inc_dirtied_cards(queue.size());
|
||||
}
|
||||
PtrQueueSet::flush_queue(queue);
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::enqueue(G1DirtyCardQueue& queue,
|
||||
volatile CardValue* card_ptr) {
|
||||
CardValue* value = const_cast<CardValue*>(card_ptr);
|
||||
if (!try_enqueue(queue, value)) {
|
||||
handle_zero_index(queue);
|
||||
retry_enqueue(queue, value);
|
||||
}
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::handle_zero_index(G1DirtyCardQueue& queue) {
|
||||
assert(queue.index() == 0, "precondition");
|
||||
BufferNode* old_node = exchange_buffer_with_new(queue);
|
||||
if (old_node != nullptr) {
|
||||
assert(old_node->index() == 0, "invariant");
|
||||
G1ConcurrentRefineStats* stats = queue.refinement_stats();
|
||||
stats->inc_dirtied_cards(old_node->capacity());
|
||||
handle_completed_buffer(old_node, stats);
|
||||
}
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
|
||||
G1BarrierSet::dirty_card_queue_set().handle_zero_index(queue);
|
||||
}
|
||||
|
||||
size_t G1DirtyCardQueueSet::num_cards() const {
|
||||
return AtomicAccess::load(&_num_cards);
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
|
||||
assert(cbn != nullptr, "precondition");
|
||||
// Increment _num_cards before adding to queue, so queue removal doesn't
|
||||
// need to deal with _num_cards possibly going negative.
|
||||
AtomicAccess::add(&_num_cards, cbn->size());
|
||||
// Perform push in CS. The old tail may be popped while the push is
|
||||
// observing it (attaching it to the new buffer). We need to ensure it
|
||||
// can't be reused until the push completes, to avoid ABA problems.
|
||||
GlobalCounter::CriticalSection cs(Thread::current());
|
||||
_completed.push(*cbn);
|
||||
}
|
||||
|
||||
// Thread-safe attempt to remove and return the first buffer from
|
||||
// the _completed queue, using the NonblockingQueue::try_pop() underneath.
|
||||
// It has a limitation that it may return null when there are objects
|
||||
// in the queue if there is a concurrent push/append operation.
|
||||
BufferNode* G1DirtyCardQueueSet::dequeue_completed_buffer() {
|
||||
Thread* current_thread = Thread::current();
|
||||
BufferNode* result = nullptr;
|
||||
while (true) {
|
||||
// Use GlobalCounter critical section to avoid ABA problem.
|
||||
// The release of a buffer to its allocator's free list uses
|
||||
// GlobalCounter::write_synchronize() to coordinate with this
|
||||
// dequeuing operation.
|
||||
// We use a CS per iteration, rather than over the whole loop,
|
||||
// because we're not guaranteed to make progress. Lingering in
|
||||
// one CS could defer releasing buffer to the free list for reuse,
|
||||
// leading to excessive allocations.
|
||||
GlobalCounter::CriticalSection cs(current_thread);
|
||||
if (_completed.try_pop(&result)) return result;
|
||||
}
|
||||
}
|
||||
|
||||
BufferNode* G1DirtyCardQueueSet::get_completed_buffer() {
|
||||
BufferNode* result = dequeue_completed_buffer();
|
||||
if (result == nullptr) { // Unlikely if no paused buffers.
|
||||
enqueue_previous_paused_buffers();
|
||||
result = dequeue_completed_buffer();
|
||||
if (result == nullptr) return nullptr;
|
||||
}
|
||||
AtomicAccess::sub(&_num_cards, result->size());
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void G1DirtyCardQueueSet::verify_num_cards() const {
|
||||
size_t actual = 0;
|
||||
for (BufferNode* cur = _completed.first();
|
||||
!_completed.is_end(cur);
|
||||
cur = cur->next()) {
|
||||
actual += cur->size();
|
||||
}
|
||||
assert(actual == AtomicAccess::load(&_num_cards),
|
||||
"Num entries in completed buffers should be %zu but are %zu",
|
||||
AtomicAccess::load(&_num_cards), actual);
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
G1DirtyCardQueueSet::PausedBuffers::PausedList::PausedList() :
|
||||
_head(nullptr), _tail(nullptr),
|
||||
_safepoint_id(SafepointSynchronize::safepoint_id())
|
||||
{}
|
||||
|
||||
#ifdef ASSERT
|
||||
G1DirtyCardQueueSet::PausedBuffers::PausedList::~PausedList() {
|
||||
assert(AtomicAccess::load(&_head) == nullptr, "precondition");
|
||||
assert(_tail == nullptr, "precondition");
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
bool G1DirtyCardQueueSet::PausedBuffers::PausedList::is_next() const {
|
||||
assert_not_at_safepoint();
|
||||
return _safepoint_id == SafepointSynchronize::safepoint_id();
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::PausedBuffers::PausedList::add(BufferNode* node) {
|
||||
assert_not_at_safepoint();
|
||||
assert(is_next(), "precondition");
|
||||
BufferNode* old_head = AtomicAccess::xchg(&_head, node);
|
||||
if (old_head == nullptr) {
|
||||
assert(_tail == nullptr, "invariant");
|
||||
_tail = node;
|
||||
} else {
|
||||
node->set_next(old_head);
|
||||
}
|
||||
}
|
||||
|
||||
G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::PausedList::take() {
|
||||
BufferNode* head = AtomicAccess::load(&_head);
|
||||
BufferNode* tail = _tail;
|
||||
AtomicAccess::store(&_head, (BufferNode*)nullptr);
|
||||
_tail = nullptr;
|
||||
return HeadTail(head, tail);
|
||||
}
|
||||
|
||||
G1DirtyCardQueueSet::PausedBuffers::PausedBuffers() : _plist(nullptr) {}
|
||||
|
||||
#ifdef ASSERT
|
||||
G1DirtyCardQueueSet::PausedBuffers::~PausedBuffers() {
|
||||
assert(AtomicAccess::load(&_plist) == nullptr, "invariant");
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
void G1DirtyCardQueueSet::PausedBuffers::add(BufferNode* node) {
|
||||
assert_not_at_safepoint();
|
||||
PausedList* plist = AtomicAccess::load_acquire(&_plist);
|
||||
if (plist == nullptr) {
|
||||
// Try to install a new next list.
|
||||
plist = new PausedList();
|
||||
PausedList* old_plist = AtomicAccess::cmpxchg(&_plist, (PausedList*)nullptr, plist);
|
||||
if (old_plist != nullptr) {
|
||||
// Some other thread installed a new next list. Use it instead.
|
||||
delete plist;
|
||||
plist = old_plist;
|
||||
}
|
||||
}
|
||||
assert(plist->is_next(), "invariant");
|
||||
plist->add(node);
|
||||
}
|
||||
|
||||
G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_previous() {
|
||||
assert_not_at_safepoint();
|
||||
PausedList* previous;
|
||||
{
|
||||
// Deal with plist in a critical section, to prevent it from being
|
||||
// deleted out from under us by a concurrent take_previous().
|
||||
GlobalCounter::CriticalSection cs(Thread::current());
|
||||
previous = AtomicAccess::load_acquire(&_plist);
|
||||
if ((previous == nullptr) || // Nothing to take.
|
||||
previous->is_next() || // Not from a previous safepoint.
|
||||
// Some other thread stole it.
|
||||
(AtomicAccess::cmpxchg(&_plist, previous, (PausedList*)nullptr) != previous)) {
|
||||
return HeadTail();
|
||||
}
|
||||
}
|
||||
// We now own previous.
|
||||
HeadTail result = previous->take();
|
||||
// There might be other threads examining previous (in concurrent
|
||||
// take_previous()). Synchronize to wait until any such threads are
|
||||
// done with such examination before deleting.
|
||||
GlobalCounter::write_synchronize();
|
||||
delete previous;
|
||||
return result;
|
||||
}
|
||||
|
||||
G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_all() {
|
||||
assert_at_safepoint();
|
||||
HeadTail result;
|
||||
PausedList* plist = AtomicAccess::load(&_plist);
|
||||
if (plist != nullptr) {
|
||||
AtomicAccess::store(&_plist, (PausedList*)nullptr);
|
||||
result = plist->take();
|
||||
delete plist;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::record_paused_buffer(BufferNode* node) {
|
||||
assert_not_at_safepoint();
|
||||
assert(node->next() == nullptr, "precondition");
|
||||
// Ensure there aren't any paused buffers from a previous safepoint.
|
||||
enqueue_previous_paused_buffers();
|
||||
// Cards for paused buffers are included in count, to contribute to
|
||||
// notification checking after the coming safepoint if it doesn't GC.
|
||||
// Note that this means the queue's _num_cards differs from the number
|
||||
// of cards in the queued buffers when there are paused buffers.
|
||||
AtomicAccess::add(&_num_cards, node->size());
|
||||
_paused.add(node);
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::enqueue_paused_buffers_aux(const HeadTail& paused) {
|
||||
if (paused._head != nullptr) {
|
||||
assert(paused._tail != nullptr, "invariant");
|
||||
// Cards from paused buffers are already recorded in the queue count.
|
||||
_completed.append(*paused._head, *paused._tail);
|
||||
}
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::enqueue_previous_paused_buffers() {
|
||||
assert_not_at_safepoint();
|
||||
enqueue_paused_buffers_aux(_paused.take_previous());
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::enqueue_all_paused_buffers() {
|
||||
assert_at_safepoint();
|
||||
enqueue_paused_buffers_aux(_paused.take_all());
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::abandon_completed_buffers() {
|
||||
BufferNodeList list = take_all_completed_buffers();
|
||||
BufferNode* buffers_to_delete = list._head;
|
||||
while (buffers_to_delete != nullptr) {
|
||||
BufferNode* bn = buffers_to_delete;
|
||||
buffers_to_delete = bn->next();
|
||||
bn->set_next(nullptr);
|
||||
deallocate_buffer(bn);
|
||||
}
|
||||
}
|
||||
|
||||
// Merge lists of buffers. The source queue set is emptied as a
|
||||
// result. The queue sets must share the same allocator.
|
||||
void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
|
||||
assert(allocator() == src->allocator(), "precondition");
|
||||
const BufferNodeList from = src->take_all_completed_buffers();
|
||||
if (from._head != nullptr) {
|
||||
AtomicAccess::add(&_num_cards, from._entry_count);
|
||||
_completed.append(*from._head, *from._tail);
|
||||
}
|
||||
}
|
||||
|
||||
BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
|
||||
enqueue_all_paused_buffers();
|
||||
verify_num_cards();
|
||||
Pair<BufferNode*, BufferNode*> pair = _completed.take_all();
|
||||
size_t num_cards = AtomicAccess::load(&_num_cards);
|
||||
AtomicAccess::store(&_num_cards, size_t(0));
|
||||
return BufferNodeList(pair.first, pair.second, num_cards);
|
||||
}
|
||||
|
||||
class G1RefineBufferedCards : public StackObj {
|
||||
BufferNode* const _node;
|
||||
CardTable::CardValue** const _node_buffer;
|
||||
const size_t _node_buffer_capacity;
|
||||
const uint _worker_id;
|
||||
G1ConcurrentRefineStats* _stats;
|
||||
G1RemSet* const _g1rs;
|
||||
|
||||
static inline ptrdiff_t compare_cards(const CardTable::CardValue* p1,
|
||||
const CardTable::CardValue* p2) {
|
||||
return p2 - p1;
|
||||
}
|
||||
|
||||
// Sorts the cards from start_index to _node_buffer_capacity in *decreasing*
|
||||
// address order. Tests showed that this order is preferable to not sorting
|
||||
// or increasing address order.
|
||||
void sort_cards(size_t start_index) {
|
||||
QuickSort::sort(&_node_buffer[start_index],
|
||||
_node_buffer_capacity - start_index,
|
||||
compare_cards);
|
||||
}
|
||||
|
||||
// Returns the index to the first clean card in the buffer.
|
||||
size_t clean_cards() {
|
||||
const size_t start = _node->index();
|
||||
assert(start <= _node_buffer_capacity, "invariant");
|
||||
|
||||
// Two-fingered compaction algorithm similar to the filtering mechanism in
|
||||
// SATBMarkQueue. The main difference is that clean_card_before_refine()
|
||||
// could change the buffer element in-place.
|
||||
// We don't check for SuspendibleThreadSet::should_yield(), because
|
||||
// cleaning and redirtying the cards is fast.
|
||||
CardTable::CardValue** src = &_node_buffer[start];
|
||||
CardTable::CardValue** dst = &_node_buffer[_node_buffer_capacity];
|
||||
assert(src <= dst, "invariant");
|
||||
for ( ; src < dst; ++src) {
|
||||
// Search low to high for a card to keep.
|
||||
if (_g1rs->clean_card_before_refine(src)) {
|
||||
// Found keeper. Search high to low for a card to discard.
|
||||
while (src < --dst) {
|
||||
if (!_g1rs->clean_card_before_refine(dst)) {
|
||||
*dst = *src; // Replace discard with keeper.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If discard search failed (src == dst), the outer loop will also end.
|
||||
}
|
||||
}
|
||||
|
||||
// dst points to the first retained clean card, or the end of the buffer
|
||||
// if all the cards were discarded.
|
||||
const size_t first_clean = dst - _node_buffer;
|
||||
assert(first_clean >= start && first_clean <= _node_buffer_capacity, "invariant");
|
||||
// Discarded cards are considered as refined.
|
||||
_stats->inc_refined_cards(first_clean - start);
|
||||
_stats->inc_precleaned_cards(first_clean - start);
|
||||
return first_clean;
|
||||
}
|
||||
|
||||
bool refine_cleaned_cards(size_t start_index) {
|
||||
bool result = true;
|
||||
size_t i = start_index;
|
||||
for ( ; i < _node_buffer_capacity; ++i) {
|
||||
if (SuspendibleThreadSet::should_yield()) {
|
||||
redirty_unrefined_cards(i);
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
_g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
|
||||
}
|
||||
_node->set_index(i);
|
||||
_stats->inc_refined_cards(i - start_index);
|
||||
return result;
|
||||
}
|
||||
|
||||
void redirty_unrefined_cards(size_t start) {
|
||||
for ( ; start < _node_buffer_capacity; ++start) {
|
||||
*_node_buffer[start] = G1CardTable::dirty_card_val();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
G1RefineBufferedCards(BufferNode* node,
|
||||
uint worker_id,
|
||||
G1ConcurrentRefineStats* stats) :
|
||||
_node(node),
|
||||
_node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
|
||||
_node_buffer_capacity(node->capacity()),
|
||||
_worker_id(worker_id),
|
||||
_stats(stats),
|
||||
_g1rs(G1CollectedHeap::heap()->rem_set()) {}
|
||||
|
||||
bool refine() {
|
||||
size_t first_clean_index = clean_cards();
|
||||
if (first_clean_index == _node_buffer_capacity) {
|
||||
_node->set_index(first_clean_index);
|
||||
return true;
|
||||
}
|
||||
// This fence serves two purposes. First, the cards must be cleaned
|
||||
// before processing the contents. Second, we can't proceed with
|
||||
// processing a region until after the read of the region's top in
|
||||
// collect_and_clean_cards(), for synchronization with possibly concurrent
|
||||
// humongous object allocation (see comment at the StoreStore fence before
|
||||
// setting the regions' tops in humongous allocation path).
|
||||
// It's okay that reading region's top and reading region's type were racy
|
||||
// wrto each other. We need both set, in any order, to proceed.
|
||||
OrderAccess::fence();
|
||||
sort_cards(first_clean_index);
|
||||
return refine_cleaned_cards(first_clean_index);
|
||||
}
|
||||
};
|
||||
|
||||
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
|
||||
uint worker_id,
|
||||
G1ConcurrentRefineStats* stats) {
|
||||
Ticks start_time = Ticks::now();
|
||||
G1RefineBufferedCards buffered_cards(node, worker_id, stats);
|
||||
bool result = buffered_cards.refine();
|
||||
stats->inc_refinement_time(Ticks::now() - start_time);
|
||||
return result;
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::handle_refined_buffer(BufferNode* node,
|
||||
bool fully_processed) {
|
||||
if (fully_processed) {
|
||||
assert(node->is_empty(), "Buffer not fully consumed: index: %zu, size: %zu",
|
||||
node->index(), node->capacity());
|
||||
deallocate_buffer(node);
|
||||
} else {
|
||||
assert(!node->is_empty(), "Buffer fully consumed.");
|
||||
// Buffer incompletely processed because there is a pending safepoint.
|
||||
// Record partially processed buffer, to be finished later.
|
||||
record_paused_buffer(node);
|
||||
}
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::handle_completed_buffer(BufferNode* new_node,
|
||||
G1ConcurrentRefineStats* stats) {
|
||||
enqueue_completed_buffer(new_node);
|
||||
|
||||
// No need for mutator refinement if number of cards is below limit.
|
||||
if (AtomicAccess::load(&_num_cards) <= AtomicAccess::load(&_mutator_refinement_threshold)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Don't try to process a buffer that will just get immediately paused.
|
||||
// When going into a safepoint it's just a waste of effort.
|
||||
// When coming out of a safepoint, Java threads may be running before the
|
||||
// yield request (for non-Java threads) has been cleared.
|
||||
if (SuspendibleThreadSet::should_yield()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Only Java threads perform mutator refinement.
|
||||
if (!Thread::current()->is_Java_thread()) {
|
||||
return;
|
||||
}
|
||||
|
||||
BufferNode* node = get_completed_buffer();
|
||||
if (node == nullptr) return; // Didn't get a buffer to process.
|
||||
|
||||
// Refine cards in buffer.
|
||||
|
||||
uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
|
||||
bool fully_processed = refine_buffer(node, worker_id, stats);
|
||||
_free_ids.release_par_id(worker_id); // release the id
|
||||
|
||||
// Deal with buffer after releasing id, to let another thread use id.
|
||||
handle_refined_buffer(node, fully_processed);
|
||||
}
|
||||
|
||||
bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
|
||||
size_t stop_at,
|
||||
G1ConcurrentRefineStats* stats) {
|
||||
// Not enough cards to trigger processing.
|
||||
if (AtomicAccess::load(&_num_cards) <= stop_at) return false;
|
||||
|
||||
BufferNode* node = get_completed_buffer();
|
||||
if (node == nullptr) return false; // Didn't get a buffer to process.
|
||||
|
||||
bool fully_processed = refine_buffer(node, worker_id, stats);
|
||||
handle_refined_buffer(node, fully_processed);
|
||||
return true;
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::abandon_logs_and_stats() {
|
||||
assert_at_safepoint();
|
||||
|
||||
// Disable mutator refinement until concurrent refinement decides otherwise.
|
||||
set_mutator_refinement_threshold(SIZE_MAX);
|
||||
|
||||
// Iterate over all the threads, resetting per-thread queues and stats.
|
||||
struct AbandonThreadLogClosure : public ThreadClosure {
|
||||
G1DirtyCardQueueSet& _qset;
|
||||
AbandonThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
|
||||
virtual void do_thread(Thread* t) {
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
|
||||
_qset.reset_queue(queue);
|
||||
queue.refinement_stats()->reset();
|
||||
}
|
||||
} closure(*this);
|
||||
Threads::threads_do(&closure);
|
||||
|
||||
enqueue_all_paused_buffers();
|
||||
abandon_completed_buffers();
|
||||
|
||||
// Reset stats from detached threads.
|
||||
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
|
||||
_detached_refinement_stats.reset();
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::update_refinement_stats(G1ConcurrentRefineStats& stats) {
|
||||
assert_at_safepoint();
|
||||
|
||||
_concatenated_refinement_stats = stats;
|
||||
|
||||
enqueue_all_paused_buffers();
|
||||
verify_num_cards();
|
||||
|
||||
// Collect and reset stats from detached threads.
|
||||
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
|
||||
_concatenated_refinement_stats += _detached_refinement_stats;
|
||||
_detached_refinement_stats.reset();
|
||||
}
|
||||
|
||||
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenate_log_and_stats(Thread* thread) {
|
||||
assert_at_safepoint();
|
||||
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
|
||||
// Flush the buffer if non-empty. Flush before accumulating and
|
||||
// resetting stats, since flushing may modify the stats.
|
||||
if (!queue.is_empty()) {
|
||||
flush_queue(queue);
|
||||
}
|
||||
|
||||
G1ConcurrentRefineStats result = *queue.refinement_stats();
|
||||
queue.refinement_stats()->reset();
|
||||
return result;
|
||||
}
|
||||
|
||||
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const {
|
||||
assert_at_safepoint();
|
||||
return _concatenated_refinement_stats;
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) {
|
||||
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
|
||||
_detached_refinement_stats += *stats;
|
||||
stats->reset();
|
||||
}
|
||||
|
||||
size_t G1DirtyCardQueueSet::mutator_refinement_threshold() const {
|
||||
return AtomicAccess::load(&_mutator_refinement_threshold);
|
||||
}
|
||||
|
||||
void G1DirtyCardQueueSet::set_mutator_refinement_threshold(size_t value) {
|
||||
AtomicAccess::store(&_mutator_refinement_threshold, value);
|
||||
}
|
||||
@ -1,302 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
|
||||
#define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
|
||||
|
||||
#include "gc/g1/g1CardTable.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineStats.hpp"
|
||||
#include "gc/g1/g1FreeIdSet.hpp"
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "gc/shared/bufferNodeList.hpp"
|
||||
#include "gc/shared/ptrQueue.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "utilities/nonblockingQueue.hpp"
|
||||
|
||||
class G1PrimaryConcurrentRefineThread;
|
||||
class G1DirtyCardQueueSet;
|
||||
class G1RedirtyCardsQueueSet;
|
||||
class Thread;
|
||||
|
||||
// A ptrQueue whose elements are "oops", pointers to object heads.
|
||||
class G1DirtyCardQueue: public PtrQueue {
|
||||
G1ConcurrentRefineStats* _refinement_stats;
|
||||
|
||||
public:
|
||||
G1DirtyCardQueue(G1DirtyCardQueueSet* qset);
|
||||
|
||||
// Flush before destroying; queue may be used to capture pending work while
|
||||
// doing something else, with auto-flush on completion.
|
||||
~G1DirtyCardQueue();
|
||||
|
||||
G1ConcurrentRefineStats* refinement_stats() const {
|
||||
return _refinement_stats;
|
||||
}
|
||||
|
||||
// Compiler support.
|
||||
static ByteSize byte_offset_of_index() {
|
||||
return PtrQueue::byte_offset_of_index<G1DirtyCardQueue>();
|
||||
}
|
||||
using PtrQueue::byte_width_of_index;
|
||||
|
||||
static ByteSize byte_offset_of_buf() {
|
||||
return PtrQueue::byte_offset_of_buf<G1DirtyCardQueue>();
|
||||
}
|
||||
using PtrQueue::byte_width_of_buf;
|
||||
|
||||
};
|
||||
|
||||
class G1DirtyCardQueueSet: public PtrQueueSet {
|
||||
// Head and tail of a list of BufferNodes, linked through their next()
|
||||
// fields. Similar to BufferNodeList, but without the _entry_count.
|
||||
struct HeadTail {
|
||||
BufferNode* _head;
|
||||
BufferNode* _tail;
|
||||
HeadTail() : _head(nullptr), _tail(nullptr) {}
|
||||
HeadTail(BufferNode* head, BufferNode* tail) : _head(head), _tail(tail) {}
|
||||
};
|
||||
|
||||
// Concurrent refinement may stop processing in the middle of a buffer if
|
||||
// there is a pending safepoint, to avoid long delays to safepoint. A
|
||||
// partially processed buffer needs to be recorded for processing by the
|
||||
// safepoint if it's a GC safepoint; otherwise it needs to be recorded for
|
||||
// further concurrent refinement work after the safepoint. But if the
|
||||
// buffer was obtained from the completed buffer queue then it can't simply
|
||||
// be added back to the queue, as that would introduce a new source of ABA
|
||||
// for the queue.
|
||||
//
|
||||
// The PausedBuffer object is used to record such buffers for the upcoming
|
||||
// safepoint, and provides access to the buffers recorded for previous
|
||||
// safepoints. Before obtaining a buffer from the completed buffers queue,
|
||||
// we first transfer any buffers from previous safepoints to the queue.
|
||||
// This is ABA-safe because threads cannot be in the midst of a queue pop
|
||||
// across a safepoint.
|
||||
//
|
||||
// The paused buffers are conceptually an extension of the completed buffers
|
||||
// queue, and operations which need to deal with all of the queued buffers
|
||||
// (such as concatenating or abandoning logs) also need to deal with any
|
||||
// paused buffers. In general, if a safepoint performs a GC then the paused
|
||||
// buffers will be processed as part of it, and there won't be any paused
|
||||
// buffers after a GC safepoint.
|
||||
class PausedBuffers {
|
||||
class PausedList : public CHeapObj<mtGC> {
|
||||
BufferNode* volatile _head;
|
||||
BufferNode* _tail;
|
||||
size_t _safepoint_id;
|
||||
|
||||
NONCOPYABLE(PausedList);
|
||||
|
||||
public:
|
||||
PausedList();
|
||||
DEBUG_ONLY(~PausedList();)
|
||||
|
||||
// Return true if this list was created to hold buffers for the
|
||||
// next safepoint.
|
||||
// precondition: not at safepoint.
|
||||
bool is_next() const;
|
||||
|
||||
// Thread-safe add the buffer to the list.
|
||||
// precondition: not at safepoint.
|
||||
// precondition: is_next().
|
||||
void add(BufferNode* node);
|
||||
|
||||
// Take all the buffers from the list. Not thread-safe.
|
||||
HeadTail take();
|
||||
};
|
||||
|
||||
// The most recently created list, which might be for either the next or
|
||||
// a previous safepoint, or might be null if the next list hasn't been
|
||||
// created yet. We only need one list because of the requirement that
|
||||
// threads calling add() must first ensure there are no paused buffers
|
||||
// from a previous safepoint. There might be many list instances existing
|
||||
// at the same time though; there can be many threads competing to create
|
||||
// and install the next list, and meanwhile there can be a thread dealing
|
||||
// with the previous list.
|
||||
PausedList* volatile _plist;
|
||||
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(PausedList*));
|
||||
|
||||
NONCOPYABLE(PausedBuffers);
|
||||
|
||||
public:
|
||||
PausedBuffers();
|
||||
DEBUG_ONLY(~PausedBuffers();)
|
||||
|
||||
// Thread-safe add the buffer to paused list for next safepoint.
|
||||
// precondition: not at safepoint.
|
||||
// precondition: does not have paused buffers from a previous safepoint.
|
||||
void add(BufferNode* node);
|
||||
|
||||
// Thread-safe take all paused buffers for previous safepoints.
|
||||
// precondition: not at safepoint.
|
||||
HeadTail take_previous();
|
||||
|
||||
// Take all the paused buffers.
|
||||
// precondition: at safepoint.
|
||||
HeadTail take_all();
|
||||
};
|
||||
|
||||
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_PADDING_SIZE, 0);
|
||||
// Upper bound on the number of cards in the completed and paused buffers.
|
||||
volatile size_t _num_cards;
|
||||
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(size_t));
|
||||
// If the queue contains more cards than configured here, the
|
||||
// mutator must start doing some of the concurrent refinement work.
|
||||
volatile size_t _mutator_refinement_threshold;
|
||||
DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(size_t));
|
||||
// Buffers ready for refinement.
|
||||
// NonblockingQueue has inner padding of one cache line.
|
||||
NonblockingQueue<BufferNode, &BufferNode::next_ptr> _completed;
|
||||
// Add a trailer padding after NonblockingQueue.
|
||||
DEFINE_PAD_MINUS_SIZE(3, DEFAULT_PADDING_SIZE, sizeof(BufferNode*));
|
||||
// Buffers for which refinement is temporarily paused.
|
||||
// PausedBuffers has inner padding, including trailer.
|
||||
PausedBuffers _paused;
|
||||
|
||||
G1FreeIdSet _free_ids;
|
||||
|
||||
G1ConcurrentRefineStats _concatenated_refinement_stats;
|
||||
G1ConcurrentRefineStats _detached_refinement_stats;
|
||||
|
||||
// Verify _num_cards == sum of cards in the completed queue.
|
||||
void verify_num_cards() const NOT_DEBUG_RETURN;
|
||||
|
||||
// Thread-safe add a buffer to paused list for next safepoint.
|
||||
// precondition: not at safepoint.
|
||||
void record_paused_buffer(BufferNode* node);
|
||||
void enqueue_paused_buffers_aux(const HeadTail& paused);
|
||||
// Thread-safe transfer paused buffers for previous safepoints to the queue.
|
||||
// precondition: not at safepoint.
|
||||
void enqueue_previous_paused_buffers();
|
||||
// Transfer all paused buffers to the queue.
|
||||
// precondition: at safepoint.
|
||||
void enqueue_all_paused_buffers();
|
||||
|
||||
void abandon_completed_buffers();
|
||||
|
||||
// Refine the cards in "node" from its index to buffer_capacity.
|
||||
// Stops processing if SuspendibleThreadSet::should_yield() is true.
|
||||
// Returns true if the entire buffer was processed, false if there
|
||||
// is a pending yield request. The node's index is updated to exclude
|
||||
// the processed elements, e.g. up to the element before processing
|
||||
// stopped, or one past the last element if the entire buffer was
|
||||
// processed. Updates stats.
|
||||
bool refine_buffer(BufferNode* node,
|
||||
uint worker_id,
|
||||
G1ConcurrentRefineStats* stats);
|
||||
|
||||
// Deal with buffer after a call to refine_buffer. If fully processed,
|
||||
// deallocate the buffer. Otherwise, record it as paused.
|
||||
void handle_refined_buffer(BufferNode* node, bool fully_processed);
|
||||
|
||||
// Thread-safe attempt to remove and return the first buffer from
|
||||
// the _completed queue.
|
||||
// Returns null if the queue is empty, or if a concurrent push/append
|
||||
// interferes. It uses GlobalCounter critical section to avoid ABA problem.
|
||||
BufferNode* dequeue_completed_buffer();
|
||||
// Remove and return a completed buffer from the list, or return null
|
||||
// if none available.
|
||||
BufferNode* get_completed_buffer();
|
||||
|
||||
// Called when queue is full or has no buffer.
|
||||
void handle_zero_index(G1DirtyCardQueue& queue);
|
||||
|
||||
// Enqueue the buffer, and optionally perform refinement by the mutator.
|
||||
// Mutator refinement is only done by Java threads, and only if there
|
||||
// are more than mutator_refinement_threshold cards in the completed buffers.
|
||||
// Updates stats.
|
||||
//
|
||||
// Mutator refinement, if performed, stops processing a buffer if
|
||||
// SuspendibleThreadSet::should_yield(), recording the incompletely
|
||||
// processed buffer for later processing of the remainder.
|
||||
void handle_completed_buffer(BufferNode* node, G1ConcurrentRefineStats* stats);
|
||||
|
||||
public:
|
||||
G1DirtyCardQueueSet(BufferNode::Allocator* allocator);
|
||||
~G1DirtyCardQueueSet();
|
||||
|
||||
// The number of parallel ids that can be claimed to allow collector or
|
||||
// mutator threads to do card-processing work.
|
||||
static uint num_par_ids();
|
||||
|
||||
static void handle_zero_index_for_thread(Thread* t);
|
||||
|
||||
virtual void enqueue_completed_buffer(BufferNode* node);
|
||||
|
||||
// Upper bound on the number of cards currently in this queue set.
|
||||
// Read without synchronization. The value may be high because there
|
||||
// is a concurrent modification of the set of buffers.
|
||||
size_t num_cards() const;
|
||||
|
||||
void merge_bufferlists(G1RedirtyCardsQueueSet* src);
|
||||
|
||||
BufferNodeList take_all_completed_buffers();
|
||||
|
||||
void flush_queue(G1DirtyCardQueue& queue);
|
||||
|
||||
using CardValue = G1CardTable::CardValue;
|
||||
void enqueue(G1DirtyCardQueue& queue, volatile CardValue* card_ptr);
|
||||
|
||||
// If there are more than stop_at cards in the completed buffers, pop
|
||||
// a buffer, refine its contents, and return true. Otherwise return
|
||||
// false. Updates stats.
|
||||
//
|
||||
// Stops processing a buffer if SuspendibleThreadSet::should_yield(),
|
||||
// recording the incompletely processed buffer for later processing of
|
||||
// the remainder.
|
||||
bool refine_completed_buffer_concurrently(uint worker_id,
|
||||
size_t stop_at,
|
||||
G1ConcurrentRefineStats* stats);
|
||||
|
||||
// If a full collection is happening, reset per-thread refinement stats and
|
||||
// partial logs, and release completed logs. The full collection will make
|
||||
// them all irrelevant.
|
||||
// precondition: at safepoint.
|
||||
void abandon_logs_and_stats();
|
||||
|
||||
// Update global refinement statistics with the ones given and the ones from
|
||||
// detached threads.
|
||||
// precondition: at safepoint.
|
||||
void update_refinement_stats(G1ConcurrentRefineStats& stats);
|
||||
// Add the given thread's partial logs to the global list and return and reset
|
||||
// its refinement stats.
|
||||
// precondition: at safepoint.
|
||||
G1ConcurrentRefineStats concatenate_log_and_stats(Thread* thread);
|
||||
|
||||
// Return the total of mutator refinement stats for all threads.
|
||||
// precondition: at safepoint.
|
||||
// precondition: only call after concatenate_logs_and_stats.
|
||||
G1ConcurrentRefineStats concatenated_refinement_stats() const;
|
||||
|
||||
// Accumulate refinement stats from threads that are detaching.
|
||||
void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);
|
||||
|
||||
// Number of cards above which mutator threads should do refinement.
|
||||
size_t mutator_refinement_threshold() const;
|
||||
|
||||
// Set number of cards above which mutator threads should do refinement.
|
||||
void set_mutator_refinement_threshold(size_t value);
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
|
||||
@ -22,8 +22,6 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1FromCardCache.hpp"
|
||||
#include "gc/shared/gc_globals.hpp"
|
||||
#include "memory/padded.inline.hpp"
|
||||
@ -80,7 +78,7 @@ void G1FromCardCache::print(outputStream* out) {
|
||||
#endif
|
||||
|
||||
uint G1FromCardCache::num_par_rem_sets() {
|
||||
return G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads + MAX2(ConcGCThreads, ParallelGCThreads);
|
||||
return G1ConcRefinementThreads + ConcGCThreads;
|
||||
}
|
||||
|
||||
void G1FromCardCache::clear(uint region_idx) {
|
||||
|
||||
@ -147,6 +147,10 @@ void G1FullGCCompactTask::free_non_overlapping_regions(uint src_start_idx, uint
|
||||
|
||||
for (uint i = non_overlapping_start; i <= src_end_idx; ++i) {
|
||||
G1HeapRegion* hr = _g1h->region_at(i);
|
||||
if (VerifyDuringGC) {
|
||||
// Satisfy some asserts in free_..._region
|
||||
hr->clear_both_card_tables();
|
||||
}
|
||||
_g1h->free_humongous_region(hr, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,6 +35,10 @@
|
||||
#include "gc/shared/fullGCForwarding.inline.hpp"
|
||||
|
||||
void G1DetermineCompactionQueueClosure::free_empty_humongous_region(G1HeapRegion* hr) {
|
||||
if (VerifyDuringGC) {
|
||||
// Satisfy some asserts in free_..._region.
|
||||
hr->clear_both_card_tables();
|
||||
}
|
||||
_g1h->free_humongous_region(hr, nullptr);
|
||||
_collector->set_free(hr->hrm_index());
|
||||
add_to_compaction_queue(hr);
|
||||
|
||||
@ -32,7 +32,7 @@ G1FullGCResetMetadataTask::G1ResetMetadataClosure::G1ResetMetadataClosure(G1Full
|
||||
|
||||
void G1FullGCResetMetadataTask::G1ResetMetadataClosure::reset_region_metadata(G1HeapRegion* hr) {
|
||||
hr->rem_set()->clear();
|
||||
hr->clear_cardtable();
|
||||
hr->clear_both_card_tables();
|
||||
}
|
||||
|
||||
bool G1FullGCResetMetadataTask::G1ResetMetadataClosure::do_heap_region(G1HeapRegion* hr) {
|
||||
|
||||
@ -50,8 +50,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
{
|
||||
assert(max_gc_threads > 0, "Must have some GC threads");
|
||||
|
||||
_gc_par_phases[RetireTLABsAndFlushLogs] = new WorkerDataArray<double>("RetireTLABsAndFlushLogs", "JT Retire TLABs And Flush Logs (ms):", max_gc_threads);
|
||||
_gc_par_phases[NonJavaThreadFlushLogs] = new WorkerDataArray<double>("NonJavaThreadFlushLogs", "Non-JT Flush Logs (ms):", max_gc_threads);
|
||||
_gc_par_phases[RetireTLABs] = new WorkerDataArray<double>("RetireTLABs", "JavaThread Retire TLABs (ms):", max_gc_threads);
|
||||
|
||||
_gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>("GCWorkerStart", "GC Worker Start (ms):", max_gc_threads);
|
||||
_gc_par_phases[ExtRootScan] = new WorkerDataArray<double>("ExtRootScan", "Ext Root Scanning (ms):", max_gc_threads);
|
||||
@ -83,7 +82,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
_gc_par_phases[OptMergeRS]->create_thread_work_items(GCMergeRSWorkItemsStrings[i], i);
|
||||
}
|
||||
|
||||
_gc_par_phases[MergeLB] = new WorkerDataArray<double>("MergeLB", "Log Buffers (ms):", max_gc_threads);
|
||||
_gc_par_phases[SweepRT] = new WorkerDataArray<double>("SweepRT", "Sweep (ms):", max_gc_threads);
|
||||
_gc_par_phases[ScanHR] = new WorkerDataArray<double>("ScanHR", "Scan Heap Roots (ms):", max_gc_threads);
|
||||
_gc_par_phases[OptScanHR] = new WorkerDataArray<double>("OptScanHR", "Optional Scan Heap Roots (ms):", max_gc_threads);
|
||||
_gc_par_phases[CodeRoots] = new WorkerDataArray<double>("CodeRoots", "Code Root Scan (ms):", max_gc_threads);
|
||||
@ -98,7 +97,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
_gc_par_phases[MergePSS] = new WorkerDataArray<double>("MergePSS", "Merge Per-Thread State (ms):", max_gc_threads);
|
||||
_gc_par_phases[RestoreEvacuationFailedRegions] = new WorkerDataArray<double>("RestoreEvacuationFailedRegions", "Restore Evacuation Failed Regions (ms):", max_gc_threads);
|
||||
_gc_par_phases[RemoveSelfForwards] = new WorkerDataArray<double>("RemoveSelfForwards", "Remove Self Forwards (ms):", max_gc_threads);
|
||||
_gc_par_phases[ClearCardTable] = new WorkerDataArray<double>("ClearLoggedCards", "Clear Logged Cards (ms):", max_gc_threads);
|
||||
_gc_par_phases[ClearCardTable] = new WorkerDataArray<double>("ClearPendingCards", "Clear Pending Cards (ms):", max_gc_threads);
|
||||
_gc_par_phases[RecalculateUsed] = new WorkerDataArray<double>("RecalculateUsed", "Recalculate Used Memory (ms):", max_gc_threads);
|
||||
#if COMPILER2_OR_JVMCI
|
||||
_gc_par_phases[UpdateDerivedPointers] = new WorkerDataArray<double>("UpdateDerivedPointers", "Update Derived Pointers (ms):", max_gc_threads);
|
||||
@ -107,11 +106,15 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
_gc_par_phases[ResetPartialArrayStateManager] = new WorkerDataArray<double>("ResetPartialArrayStateManager", "Reset Partial Array State Manager (ms):", max_gc_threads);
|
||||
_gc_par_phases[ProcessEvacuationFailedRegions] = new WorkerDataArray<double>("ProcessEvacuationFailedRegions", "Process Evacuation Failed Regions (ms):", max_gc_threads);
|
||||
|
||||
_gc_par_phases[ScanHR]->create_thread_work_items("Pending Cards:", ScanHRPendingCards);
|
||||
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Empty:", ScanHRScannedEmptyCards);
|
||||
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Cards:", ScanHRScannedCards);
|
||||
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Blocks:", ScanHRScannedBlocks);
|
||||
_gc_par_phases[ScanHR]->create_thread_work_items("Claimed Chunks:", ScanHRClaimedChunks);
|
||||
_gc_par_phases[ScanHR]->create_thread_work_items("Found Roots:", ScanHRFoundRoots);
|
||||
|
||||
_gc_par_phases[OptScanHR]->create_thread_work_items("Pending Cards:", ScanHRPendingCards);
|
||||
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Empty:", ScanHRScannedEmptyCards);
|
||||
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Cards:", ScanHRScannedCards);
|
||||
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Blocks:", ScanHRScannedBlocks);
|
||||
_gc_par_phases[OptScanHR]->create_thread_work_items("Claimed Chunks:", ScanHRClaimedChunks);
|
||||
@ -119,9 +122,6 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Refs:", ScanHRScannedOptRefs);
|
||||
_gc_par_phases[OptScanHR]->create_thread_work_items("Used Memory:", ScanHRUsedMemory);
|
||||
|
||||
_gc_par_phases[MergeLB]->create_thread_work_items("Dirty Cards:", MergeLBDirtyCards);
|
||||
_gc_par_phases[MergeLB]->create_thread_work_items("Skipped Cards:", MergeLBSkippedCards);
|
||||
|
||||
_gc_par_phases[CodeRoots]->create_thread_work_items("Scanned Nmethods:", CodeRootsScannedNMethods);
|
||||
|
||||
_gc_par_phases[OptCodeRoots]->create_thread_work_items("Scanned Nmethods:", CodeRootsScannedNMethods);
|
||||
@ -129,7 +129,10 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("Copied Bytes:", MergePSSCopiedBytes);
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("LAB Waste:", MergePSSLABWasteBytes);
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("LAB Undo Waste:", MergePSSLABUndoWasteBytes);
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("Evac Fail Extra Cards:", MergePSSEvacFailExtra);
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("Pending Cards:", MergePSSPendingCards);
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("To-Young-Gen Cards:", MergePSSToYoungGenCards);
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("Evac-Fail Cards:", MergePSSEvacFail);
|
||||
_gc_par_phases[MergePSS]->create_thread_work_items("Marked Cards:", MergePSSMarked);
|
||||
|
||||
_gc_par_phases[RestoreEvacuationFailedRegions]->create_thread_work_items("Evacuation Failed Regions:", RestoreEvacFailureRegionsEvacFailedNum);
|
||||
_gc_par_phases[RestoreEvacuationFailedRegions]->create_thread_work_items("Pinned Regions:", RestoreEvacFailureRegionsPinnedNum);
|
||||
@ -150,9 +153,6 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
|
||||
|
||||
_gc_par_phases[OptTermination]->create_thread_work_items("Optional Termination Attempts:");
|
||||
|
||||
_gc_par_phases[RedirtyCards] = new WorkerDataArray<double>("RedirtyCards", "Redirty Logged Cards (ms):", max_gc_threads);
|
||||
_gc_par_phases[RedirtyCards]->create_thread_work_items("Redirtied Cards:");
|
||||
|
||||
_gc_par_phases[ResizeThreadLABs] = new WorkerDataArray<double>("ResizeTLABs", "Resize TLABs (ms):", max_gc_threads);
|
||||
|
||||
_gc_par_phases[FreeCollectionSet] = new WorkerDataArray<double>("FreeCSet", "Free Collection Set (ms):", max_gc_threads);
|
||||
@ -171,9 +171,9 @@ void G1GCPhaseTimes::reset() {
|
||||
_cur_optional_evac_time_ms = 0.0;
|
||||
_cur_collection_nmethod_list_cleanup_time_ms = 0.0;
|
||||
_cur_merge_heap_roots_time_ms = 0.0;
|
||||
_cur_merge_refinement_table_time_ms = 0.0;
|
||||
_cur_optional_merge_heap_roots_time_ms = 0.0;
|
||||
_cur_prepare_merge_heap_roots_time_ms = 0.0;
|
||||
_cur_distribute_log_buffers_time_ms = 0.0;
|
||||
_cur_optional_prepare_merge_heap_roots_time_ms = 0.0;
|
||||
_cur_pre_evacuate_prepare_time_ms = 0.0;
|
||||
_cur_post_evacuate_cleanup_1_time_ms = 0.0;
|
||||
@ -249,7 +249,7 @@ void G1GCPhaseTimes::record_gc_pause_end() {
|
||||
ASSERT_PHASE_UNINITIALIZED(MergeER);
|
||||
ASSERT_PHASE_UNINITIALIZED(MergeRS);
|
||||
ASSERT_PHASE_UNINITIALIZED(OptMergeRS);
|
||||
ASSERT_PHASE_UNINITIALIZED(MergeLB);
|
||||
ASSERT_PHASE_UNINITIALIZED(SweepRT);
|
||||
ASSERT_PHASE_UNINITIALIZED(ScanHR);
|
||||
ASSERT_PHASE_UNINITIALIZED(CodeRoots);
|
||||
ASSERT_PHASE_UNINITIALIZED(OptCodeRoots);
|
||||
@ -425,8 +425,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
|
||||
}
|
||||
|
||||
debug_time("Pre Evacuate Prepare", _cur_pre_evacuate_prepare_time_ms);
|
||||
debug_phase(_gc_par_phases[RetireTLABsAndFlushLogs], 1);
|
||||
debug_phase(_gc_par_phases[NonJavaThreadFlushLogs], 1);
|
||||
debug_phase(_gc_par_phases[RetireTLABs], 1);
|
||||
debug_time("Choose Collection Set", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms));
|
||||
debug_time("Region Register", _cur_region_register_time);
|
||||
|
||||
@ -458,8 +457,8 @@ double G1GCPhaseTimes::print_evacuate_initial_collection_set() const {
|
||||
debug_time("Prepare Merge Heap Roots", _cur_prepare_merge_heap_roots_time_ms);
|
||||
debug_phase_merge_remset();
|
||||
|
||||
debug_time("Distribute Log Buffers", _cur_distribute_log_buffers_time_ms);
|
||||
debug_phase(_gc_par_phases[MergeLB]);
|
||||
debug_time("Merge Refinement Table", _cur_merge_refinement_table_time_ms);
|
||||
debug_phase(_gc_par_phases[SweepRT], 1);
|
||||
|
||||
info_time("Evacuate Collection Set", _cur_collection_initial_evac_time_ms);
|
||||
|
||||
@ -521,7 +520,6 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
|
||||
if (G1CollectedHeap::heap()->should_sample_collection_set_candidates()) {
|
||||
debug_phase(_gc_par_phases[SampleCollectionSetCandidates], 1);
|
||||
}
|
||||
debug_phase(_gc_par_phases[RedirtyCards], 1);
|
||||
if (UseTLAB && ResizeTLAB) {
|
||||
debug_phase(_gc_par_phases[ResizeThreadLABs], 1);
|
||||
}
|
||||
|
||||
@ -46,8 +46,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
|
||||
public:
|
||||
enum GCParPhases {
|
||||
RetireTLABsAndFlushLogs,
|
||||
NonJavaThreadFlushLogs,
|
||||
RetireTLABs,
|
||||
GCWorkerStart,
|
||||
ExtRootScan,
|
||||
ThreadRoots,
|
||||
@ -59,7 +58,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
MergeER = StrongOopStorageSetRoots + EnumRange<OopStorageSet::StrongId>().size(),
|
||||
MergeRS,
|
||||
OptMergeRS,
|
||||
MergeLB,
|
||||
SweepRT,
|
||||
ScanHR,
|
||||
OptScanHR,
|
||||
CodeRoots,
|
||||
@ -71,7 +70,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
Other,
|
||||
GCWorkerTotal,
|
||||
GCWorkerEnd,
|
||||
RedirtyCards,
|
||||
FreeCollectionSet,
|
||||
YoungFreeCSet,
|
||||
NonYoungFreeCSet,
|
||||
@ -111,16 +109,19 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
MergeRSHowlArrayOfCards,
|
||||
MergeRSHowlBitmap,
|
||||
MergeRSHowlFull,
|
||||
MergeRSCards,
|
||||
MergeRSFromRemSetCards,
|
||||
MergeRSTotalCards,
|
||||
MergeRSContainersSentinel
|
||||
};
|
||||
|
||||
static constexpr const char* GCMergeRSWorkItemsStrings[MergeRSContainersSentinel] =
|
||||
{ "Merged Inline:", "Merged ArrayOfCards:", "Merged Howl:", "Merged Full:",
|
||||
"Merged Howl Inline:", "Merged Howl ArrayOfCards:", "Merged Howl BitMap:", "Merged Howl Full:",
|
||||
"Merged Cards:" };
|
||||
"Merged From RS Cards:", "Total Cards:" };
|
||||
|
||||
enum GCScanHRWorkItems {
|
||||
ScanHRPendingCards,
|
||||
ScanHRScannedEmptyCards,
|
||||
ScanHRScannedCards,
|
||||
ScanHRScannedBlocks,
|
||||
ScanHRClaimedChunks,
|
||||
@ -129,11 +130,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
ScanHRUsedMemory
|
||||
};
|
||||
|
||||
enum GCMergeLBWorkItems {
|
||||
MergeLBDirtyCards,
|
||||
MergeLBSkippedCards
|
||||
};
|
||||
|
||||
enum GCCodeRootsWorkItems {
|
||||
CodeRootsScannedNMethods
|
||||
};
|
||||
@ -143,7 +139,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
MergePSSLABSize,
|
||||
MergePSSLABWasteBytes,
|
||||
MergePSSLABUndoWasteBytes,
|
||||
MergePSSEvacFailExtra
|
||||
MergePSSPendingCards, // To be scanned cards generated by GC (from cross-references and evacuation failure).
|
||||
MergePSSToYoungGenCards, // To-young-gen cards generated by GC.
|
||||
MergePSSEvacFail, // Evacuation failure generated dirty cards by GC.
|
||||
MergePSSMarked, // Total newly marked cards.
|
||||
};
|
||||
|
||||
enum RestoreEvacFailureRegionsWorkItems {
|
||||
@ -176,9 +175,9 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
double _cur_collection_nmethod_list_cleanup_time_ms;
|
||||
|
||||
double _cur_merge_heap_roots_time_ms;
|
||||
// Merge refinement table time. Note that this time is included in _cur_merge_heap_roots_time_ms.
|
||||
double _cur_merge_refinement_table_time_ms;
|
||||
double _cur_optional_merge_heap_roots_time_ms;
|
||||
// Included in above merge and optional-merge time.
|
||||
double _cur_distribute_log_buffers_time_ms;
|
||||
|
||||
double _cur_prepare_merge_heap_roots_time_ms;
|
||||
double _cur_optional_prepare_merge_heap_roots_time_ms;
|
||||
@ -302,6 +301,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
_cur_merge_heap_roots_time_ms += ms;
|
||||
}
|
||||
|
||||
void record_merge_refinement_table_time(double ms) {
|
||||
_cur_merge_refinement_table_time_ms = ms;
|
||||
}
|
||||
|
||||
void record_or_add_optional_merge_heap_roots_time(double ms) {
|
||||
_cur_optional_merge_heap_roots_time_ms += ms;
|
||||
}
|
||||
@ -310,10 +313,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
_cur_prepare_merge_heap_roots_time_ms += ms;
|
||||
}
|
||||
|
||||
void record_distribute_log_buffers_time_ms(double ms) {
|
||||
_cur_distribute_log_buffers_time_ms += ms;
|
||||
}
|
||||
|
||||
void record_or_add_optional_prepare_merge_heap_roots_time(double ms) {
|
||||
_cur_optional_prepare_merge_heap_roots_time_ms += ms;
|
||||
}
|
||||
@ -382,10 +381,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
_recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms;
|
||||
}
|
||||
|
||||
double cur_distribute_log_buffers_time_ms() {
|
||||
return _cur_distribute_log_buffers_time_ms;
|
||||
}
|
||||
|
||||
double cur_collection_par_time_ms() {
|
||||
return _cur_collection_initial_evac_time_ms +
|
||||
_cur_optional_evac_time_ms +
|
||||
@ -396,6 +391,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
|
||||
_cur_collection_nmethod_list_cleanup_time_ms;
|
||||
}
|
||||
|
||||
double cur_merge_refinement_table_time() const {
|
||||
return _cur_merge_refinement_table_time_ms;
|
||||
}
|
||||
|
||||
double cur_resize_heap_time_ms() {
|
||||
return _cur_resize_heap_time_ms;
|
||||
}
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
#include "logging/log.hpp"
|
||||
#include "logging/logStream.hpp"
|
||||
#include "memory/iterator.inline.hpp"
|
||||
#include "memory/memRegion.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "oops/access.inline.hpp"
|
||||
#include "oops/compressedOops.inline.hpp"
|
||||
@ -137,11 +138,21 @@ void G1HeapRegion::hr_clear(bool clear_space) {
|
||||
if (clear_space) clear(SpaceDecorator::Mangle);
|
||||
}
|
||||
|
||||
void G1HeapRegion::clear_cardtable() {
|
||||
void G1HeapRegion::clear_card_table() {
|
||||
G1CardTable* ct = G1CollectedHeap::heap()->card_table();
|
||||
ct->clear_MemRegion(MemRegion(bottom(), end()));
|
||||
}
|
||||
|
||||
void G1HeapRegion::clear_refinement_table() {
|
||||
G1CardTable* ct = G1CollectedHeap::heap()->refinement_table();
|
||||
ct->clear_MemRegion(MemRegion(bottom(), end()));
|
||||
}
|
||||
|
||||
void G1HeapRegion::clear_both_card_tables() {
|
||||
clear_card_table();
|
||||
clear_refinement_table();
|
||||
}
|
||||
|
||||
void G1HeapRegion::set_free() {
|
||||
if (!is_free()) {
|
||||
report_region_type_change(G1HeapRegionTraceType::Free);
|
||||
@ -591,8 +602,12 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
|
||||
|
||||
G1HeapRegion* _from;
|
||||
G1HeapRegion* _to;
|
||||
CardValue _cv_obj;
|
||||
CardValue _cv_field;
|
||||
|
||||
CardValue _cv_obj_ct; // In card table.
|
||||
CardValue _cv_field_ct;
|
||||
|
||||
CardValue _cv_obj_rt; // In refinement table.
|
||||
CardValue _cv_field_rt;
|
||||
|
||||
RemSetChecker(G1VerifyFailureCounter* failures, oop containing_obj, T* p, oop obj)
|
||||
: Checker<T>(failures, containing_obj, p, obj) {
|
||||
@ -600,19 +615,23 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
|
||||
_to = this->_g1h->heap_region_containing(obj);
|
||||
|
||||
CardTable* ct = this->_g1h->card_table();
|
||||
_cv_obj = *ct->byte_for_const(this->_containing_obj);
|
||||
_cv_field = *ct->byte_for_const(p);
|
||||
_cv_obj_ct = *ct->byte_for_const(this->_containing_obj);
|
||||
_cv_field_ct = *ct->byte_for_const(p);
|
||||
|
||||
ct = this->_g1h->refinement_table();
|
||||
_cv_obj_rt = *ct->byte_for_const(this->_containing_obj);
|
||||
_cv_field_rt = *ct->byte_for_const(p);
|
||||
}
|
||||
|
||||
bool failed() const {
|
||||
if (_from != _to && !_from->is_young() &&
|
||||
_to->rem_set()->is_complete() &&
|
||||
_from->rem_set()->cset_group() != _to->rem_set()->cset_group()) {
|
||||
const CardValue dirty = G1CardTable::dirty_card_val();
|
||||
const CardValue clean = G1CardTable::clean_card_val();
|
||||
return !(_to->rem_set()->contains_reference(this->_p) ||
|
||||
(this->_containing_obj->is_objArray() ?
|
||||
_cv_field == dirty :
|
||||
_cv_obj == dirty || _cv_field == dirty));
|
||||
(_cv_field_ct != clean || _cv_field_rt != clean) :
|
||||
(_cv_obj_ct != clean || _cv_field_ct != clean || _cv_obj_rt != clean || _cv_field_rt != clean)));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -630,7 +649,8 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
|
||||
log.error("Missing rem set entry:");
|
||||
this->print_containing_obj(&ls, _from);
|
||||
this->print_referenced_obj(&ls, _to, "");
|
||||
log.error("Obj head CV = %d, field CV = %d.", _cv_obj, _cv_field);
|
||||
log.error("CT obj head CV = %d, field CV = %d.", _cv_obj_ct, _cv_field_ct);
|
||||
log.error("RT Obj head CV = %d, field CV = %d.", _cv_obj_rt, _cv_field_rt);
|
||||
log.error("----------");
|
||||
}
|
||||
};
|
||||
|
||||
@ -42,7 +42,6 @@ class G1CollectedHeap;
|
||||
class G1CMBitMap;
|
||||
class G1CSetCandidateGroup;
|
||||
class G1Predictions;
|
||||
class G1HeapRegion;
|
||||
class G1HeapRegionRemSet;
|
||||
class G1HeapRegionSetBase;
|
||||
class nmethod;
|
||||
@ -478,7 +477,10 @@ public:
|
||||
// Callers must ensure this is not called by multiple threads at the same time.
|
||||
void hr_clear(bool clear_space);
|
||||
// Clear the card table corresponding to this region.
|
||||
void clear_cardtable();
|
||||
void clear_card_table();
|
||||
void clear_refinement_table();
|
||||
|
||||
void clear_both_card_tables();
|
||||
|
||||
// Notify the region that an evacuation failure occurred for an object within this
|
||||
// region.
|
||||
|
||||
@ -63,7 +63,8 @@ public:
|
||||
|
||||
G1HeapRegionManager::G1HeapRegionManager() :
|
||||
_bot_mapper(nullptr),
|
||||
_cardtable_mapper(nullptr),
|
||||
_card_table_mapper(nullptr),
|
||||
_refinement_table_mapper(nullptr),
|
||||
_committed_map(),
|
||||
_next_highest_used_hrm_index(0),
|
||||
_regions(), _heap_mapper(nullptr),
|
||||
@ -74,7 +75,8 @@ G1HeapRegionManager::G1HeapRegionManager() :
|
||||
void G1HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage,
|
||||
G1RegionToSpaceMapper* bitmap,
|
||||
G1RegionToSpaceMapper* bot,
|
||||
G1RegionToSpaceMapper* cardtable) {
|
||||
G1RegionToSpaceMapper* card_table,
|
||||
G1RegionToSpaceMapper* refinement_table) {
|
||||
_next_highest_used_hrm_index = 0;
|
||||
|
||||
_heap_mapper = heap_storage;
|
||||
@ -82,7 +84,8 @@ void G1HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage,
|
||||
_bitmap_mapper = bitmap;
|
||||
|
||||
_bot_mapper = bot;
|
||||
_cardtable_mapper = cardtable;
|
||||
_card_table_mapper = card_table;
|
||||
_refinement_table_mapper = refinement_table;
|
||||
|
||||
_regions.initialize(heap_storage->reserved(), G1HeapRegion::GrainBytes);
|
||||
|
||||
@ -186,7 +189,8 @@ void G1HeapRegionManager::commit_regions(uint index, size_t num_regions, WorkerT
|
||||
_bitmap_mapper->commit_regions(index, num_regions, pretouch_workers);
|
||||
|
||||
_bot_mapper->commit_regions(index, num_regions, pretouch_workers);
|
||||
_cardtable_mapper->commit_regions(index, num_regions, pretouch_workers);
|
||||
_card_table_mapper->commit_regions(index, num_regions, pretouch_workers);
|
||||
_refinement_table_mapper->commit_regions(index, num_regions, pretouch_workers);
|
||||
}
|
||||
|
||||
void G1HeapRegionManager::uncommit_regions(uint start, uint num_regions) {
|
||||
@ -209,7 +213,8 @@ void G1HeapRegionManager::uncommit_regions(uint start, uint num_regions) {
|
||||
_bitmap_mapper->uncommit_regions(start, num_regions);
|
||||
|
||||
_bot_mapper->uncommit_regions(start, num_regions);
|
||||
_cardtable_mapper->uncommit_regions(start, num_regions);
|
||||
_card_table_mapper->uncommit_regions(start, num_regions);
|
||||
_refinement_table_mapper->uncommit_regions(start, num_regions);
|
||||
|
||||
_committed_map.uncommit(start, end);
|
||||
}
|
||||
@ -261,19 +266,23 @@ void G1HeapRegionManager::clear_auxiliary_data_structures(uint start, uint num_r
|
||||
// Signal G1BlockOffsetTable to clear the given regions.
|
||||
_bot_mapper->signal_mapping_changed(start, num_regions);
|
||||
// Signal G1CardTable to clear the given regions.
|
||||
_cardtable_mapper->signal_mapping_changed(start, num_regions);
|
||||
_card_table_mapper->signal_mapping_changed(start, num_regions);
|
||||
// Signal refinement table to clear the given regions.
|
||||
_refinement_table_mapper->signal_mapping_changed(start, num_regions);
|
||||
}
|
||||
|
||||
MemoryUsage G1HeapRegionManager::get_auxiliary_data_memory_usage() const {
|
||||
size_t used_sz =
|
||||
_bitmap_mapper->committed_size() +
|
||||
_bot_mapper->committed_size() +
|
||||
_cardtable_mapper->committed_size();
|
||||
_card_table_mapper->committed_size() +
|
||||
_refinement_table_mapper->committed_size();
|
||||
|
||||
size_t committed_sz =
|
||||
_bitmap_mapper->reserved_size() +
|
||||
_bot_mapper->reserved_size() +
|
||||
_cardtable_mapper->reserved_size();
|
||||
_card_table_mapper->reserved_size() +
|
||||
_refinement_table_mapper->reserved_size();
|
||||
|
||||
return MemoryUsage(0, used_sz, committed_sz, committed_sz);
|
||||
}
|
||||
|
||||
@ -74,7 +74,8 @@ class G1HeapRegionManager: public CHeapObj<mtGC> {
|
||||
friend class G1HeapRegionClaimer;
|
||||
|
||||
G1RegionToSpaceMapper* _bot_mapper;
|
||||
G1RegionToSpaceMapper* _cardtable_mapper;
|
||||
G1RegionToSpaceMapper* _card_table_mapper;
|
||||
G1RegionToSpaceMapper* _refinement_table_mapper;
|
||||
|
||||
// Keeps track of the currently committed regions in the heap. The committed regions
|
||||
// can either be active (ready for use) or inactive (ready for uncommit).
|
||||
@ -161,7 +162,8 @@ public:
|
||||
void initialize(G1RegionToSpaceMapper* heap_storage,
|
||||
G1RegionToSpaceMapper* bitmap,
|
||||
G1RegionToSpaceMapper* bot,
|
||||
G1RegionToSpaceMapper* cardtable);
|
||||
G1RegionToSpaceMapper* card_table,
|
||||
G1RegionToSpaceMapper* refinement_table);
|
||||
|
||||
// Return the "dummy" region used for G1AllocRegion. This is currently a hardwired
|
||||
// new G1HeapRegion that owns G1HeapRegion at index 0. Since at the moment we commit
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
#include "oops/compressedOops.inline.hpp"
|
||||
#include "oops/oop.inline.hpp"
|
||||
#include "runtime/handles.inline.hpp"
|
||||
#include "runtime/threads.hpp"
|
||||
|
||||
int G1HeapVerifier::_enabled_verification_types = G1HeapVerifier::G1VerifyAll;
|
||||
|
||||
@ -528,6 +529,7 @@ void G1HeapVerifier::verify_before_gc() {
|
||||
|
||||
void G1HeapVerifier::verify_after_gc() {
|
||||
verify(VerifyOption::G1UseConcMarking, "After GC");
|
||||
verify_card_tables_in_sync();
|
||||
}
|
||||
|
||||
void G1HeapVerifier::verify_bitmap_clear(bool from_tams) {
|
||||
@ -556,17 +558,17 @@ void G1HeapVerifier::verify_bitmap_clear(bool from_tams) {
|
||||
G1CollectedHeap::heap()->heap_region_iterate(&cl);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
class G1VerifyCardTableCleanup: public G1HeapRegionClosure {
|
||||
G1HeapVerifier* _verifier;
|
||||
public:
|
||||
G1VerifyCardTableCleanup(G1HeapVerifier* verifier)
|
||||
: _verifier(verifier) { }
|
||||
virtual bool do_heap_region(G1HeapRegion* r) {
|
||||
_verifier->verify_ct_clean_region(r);
|
||||
if (r->is_survivor()) {
|
||||
_verifier->verify_dirty_region(r);
|
||||
_verifier->verify_rt_clean_region(r);
|
||||
} else {
|
||||
_verifier->verify_not_dirty_region(r);
|
||||
_verifier->verify_rt_clean_from_top(r);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -579,14 +581,35 @@ void G1HeapVerifier::verify_card_table_cleanup() {
|
||||
}
|
||||
}
|
||||
|
||||
void G1HeapVerifier::verify_not_dirty_region(G1HeapRegion* hr) {
|
||||
// All of the region should be clean.
|
||||
G1CardTable* ct = _g1h->card_table();
|
||||
MemRegion mr(hr->bottom(), hr->end());
|
||||
ct->verify_not_dirty_region(mr);
|
||||
class G1VerifyCardTablesClean: public G1HeapRegionClosure {
|
||||
G1HeapVerifier* _verifier;
|
||||
bool _both_card_tables;
|
||||
|
||||
public:
|
||||
G1VerifyCardTablesClean(G1HeapVerifier* verifier, bool both_card_tables = true)
|
||||
: _verifier(verifier), _both_card_tables(both_card_tables) { }
|
||||
|
||||
virtual bool do_heap_region(G1HeapRegion* r) {
|
||||
_verifier->verify_rt_clean_region(r); // Must be all Clean from bottom -> end.
|
||||
if (_both_card_tables) {
|
||||
_verifier->verify_ct_clean_region(r);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
void G1HeapVerifier::verify_card_tables_clean(bool both_card_tables) {
|
||||
G1VerifyCardTablesClean cl(this, both_card_tables);
|
||||
_g1h->heap_region_iterate(&cl);
|
||||
}
|
||||
|
||||
void G1HeapVerifier::verify_dirty_region(G1HeapRegion* hr) {
|
||||
void G1HeapVerifier::verify_rt_clean_from_top(G1HeapRegion* hr) {
|
||||
G1CardTable* ct = _g1h->refinement_table();
|
||||
MemRegion mr(align_up(hr->top(), G1CardTable::card_size()), hr->end());
|
||||
ct->verify_region(mr, G1CardTable::clean_card_val(), true);
|
||||
}
|
||||
|
||||
void G1HeapVerifier::verify_rt_dirty_to_dummy_top(G1HeapRegion* hr) {
|
||||
// We cannot guarantee that [bottom(),end()] is dirty. Threads
|
||||
// dirty allocated blocks as they allocate them. The thread that
|
||||
// retires each region and replaces it with a new one will do a
|
||||
@ -594,29 +617,56 @@ void G1HeapVerifier::verify_dirty_region(G1HeapRegion* hr) {
|
||||
// not dirty that area (one less thing to have to do while holding
|
||||
// a lock). So we can only verify that [bottom(),pre_dummy_top()]
|
||||
// is dirty.
|
||||
G1CardTable* ct = _g1h->card_table();
|
||||
G1CardTable* ct = _g1h->refinement_table();
|
||||
MemRegion mr(hr->bottom(), hr->pre_dummy_top());
|
||||
if (hr->is_young()) {
|
||||
ct->verify_g1_young_region(mr);
|
||||
} else {
|
||||
ct->verify_dirty_region(mr);
|
||||
}
|
||||
ct->verify_dirty_region(mr);
|
||||
}
|
||||
|
||||
class G1VerifyDirtyYoungListClosure : public G1HeapRegionClosure {
|
||||
private:
|
||||
G1HeapVerifier* _verifier;
|
||||
public:
|
||||
G1VerifyDirtyYoungListClosure(G1HeapVerifier* verifier) : G1HeapRegionClosure(), _verifier(verifier) { }
|
||||
virtual bool do_heap_region(G1HeapRegion* r) {
|
||||
_verifier->verify_dirty_region(r);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
void G1HeapVerifier::verify_ct_clean_region(G1HeapRegion* hr) {
|
||||
G1CardTable* ct = _g1h->card_table();
|
||||
MemRegion mr(hr->bottom(), hr->end());
|
||||
ct->verify_region(mr, G1CardTable::clean_card_val(), true);
|
||||
}
|
||||
|
||||
void G1HeapVerifier::verify_dirty_young_regions() {
|
||||
G1VerifyDirtyYoungListClosure cl(this);
|
||||
_g1h->collection_set()->iterate(&cl);
|
||||
void G1HeapVerifier::verify_rt_clean_region(G1HeapRegion* hr) {
|
||||
G1CardTable* ct = _g1h->refinement_table();
|
||||
MemRegion mr(hr->bottom(), hr->end());
|
||||
ct->verify_region(mr, G1CardTable::clean_card_val(), true);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
|
||||
void G1HeapVerifier::verify_card_tables_in_sync() {
|
||||
|
||||
// Non-Java thread card tables must be null.
|
||||
class AssertCardTableBaseNull : public ThreadClosure {
|
||||
public:
|
||||
|
||||
void do_thread(Thread* thread) {
|
||||
ResourceMark rm;
|
||||
assert(G1ThreadLocalData::get_byte_map_base(thread) == nullptr, "thread " PTR_FORMAT " (%s) has non-null card table base",
|
||||
p2i(thread), thread->name());
|
||||
}
|
||||
} check_null_cl;
|
||||
|
||||
Threads::non_java_threads_do(&check_null_cl);
|
||||
|
||||
// Java thread card tables must be the same as the global card table.
|
||||
class AssertSameCardTableClosure : public ThreadClosure {
|
||||
public:
|
||||
|
||||
void do_thread(Thread* thread) {
|
||||
G1CardTable::CardValue* global_ct_base = G1CollectedHeap::heap()->card_table_base();
|
||||
G1CardTable::CardValue* cur_ct_base = G1ThreadLocalData::get_byte_map_base(thread);
|
||||
|
||||
ResourceMark rm;
|
||||
assert(cur_ct_base == global_ct_base,
|
||||
"thread " PTR_FORMAT " (%s) has wrong card table base, should be " PTR_FORMAT " is " PTR_FORMAT,
|
||||
p2i(thread), thread->name(), p2i(global_ct_base), p2i(cur_ct_base));
|
||||
}
|
||||
} check_same_cl;
|
||||
|
||||
Threads::java_threads_do(&check_same_cl);
|
||||
}
|
||||
|
||||
class G1CheckRegionAttrTableClosure : public G1HeapRegionClosure {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -78,11 +78,16 @@ public:
|
||||
// Do sanity check on the contents of the in-cset fast test table.
|
||||
bool check_region_attr_table() PRODUCT_RETURN_( return true; );
|
||||
|
||||
void verify_card_table_cleanup() PRODUCT_RETURN;
|
||||
void verify_card_table_cleanup();
|
||||
void verify_card_tables_clean(bool both_card_tables);
|
||||
|
||||
void verify_not_dirty_region(G1HeapRegion* hr) PRODUCT_RETURN;
|
||||
void verify_dirty_region(G1HeapRegion* hr) PRODUCT_RETURN;
|
||||
void verify_dirty_young_regions() PRODUCT_RETURN;
|
||||
void verify_ct_clean_region(G1HeapRegion* hr);
|
||||
void verify_rt_dirty_to_dummy_top(G1HeapRegion* hr);
|
||||
void verify_rt_clean_from_top(G1HeapRegion* hr);
|
||||
void verify_rt_clean_region(G1HeapRegion* hr);
|
||||
|
||||
// Verify that the global card table and the thread's card tables are in sync.
|
||||
void verify_card_tables_in_sync() PRODUCT_RETURN;
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1HEAPVERIFIER_HPP
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -86,19 +86,19 @@ public:
|
||||
|
||||
// This closure is applied to the fields of the objects that have just been copied during evacuation.
|
||||
class G1ScanEvacuatedObjClosure : public G1ScanClosureBase {
|
||||
friend class G1SkipCardEnqueueSetter;
|
||||
friend class G1SkipCardMarkSetter;
|
||||
|
||||
enum SkipCardEnqueueTristate {
|
||||
enum SkipCardMarkTristate {
|
||||
False = 0,
|
||||
True,
|
||||
Uninitialized
|
||||
};
|
||||
|
||||
SkipCardEnqueueTristate _skip_card_enqueue;
|
||||
SkipCardMarkTristate _skip_card_mark;
|
||||
|
||||
public:
|
||||
G1ScanEvacuatedObjClosure(G1CollectedHeap* g1h, G1ParScanThreadState* par_scan_state) :
|
||||
G1ScanClosureBase(g1h, par_scan_state), _skip_card_enqueue(Uninitialized) { }
|
||||
G1ScanClosureBase(g1h, par_scan_state), _skip_card_mark(Uninitialized) { }
|
||||
|
||||
template <class T> void do_oop_work(T* p);
|
||||
virtual void do_oop(oop* p) { do_oop_work(p); }
|
||||
@ -109,22 +109,22 @@ public:
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
bool skip_card_enqueue_set() const { return _skip_card_enqueue != Uninitialized; }
|
||||
bool skip_card_mark_set() const { return _skip_card_mark != Uninitialized; }
|
||||
#endif
|
||||
};
|
||||
|
||||
// RAII object to properly set the _skip_card_enqueue field in G1ScanEvacuatedObjClosure.
|
||||
class G1SkipCardEnqueueSetter : public StackObj {
|
||||
// RAII object to properly set the _skip_card_mark field in G1ScanEvacuatedObjClosure.
|
||||
class G1SkipCardMarkSetter : public StackObj {
|
||||
G1ScanEvacuatedObjClosure* _closure;
|
||||
|
||||
public:
|
||||
G1SkipCardEnqueueSetter(G1ScanEvacuatedObjClosure* closure, bool skip_card_enqueue) : _closure(closure) {
|
||||
assert(_closure->_skip_card_enqueue == G1ScanEvacuatedObjClosure::Uninitialized, "Must not be set");
|
||||
_closure->_skip_card_enqueue = skip_card_enqueue ? G1ScanEvacuatedObjClosure::True : G1ScanEvacuatedObjClosure::False;
|
||||
G1SkipCardMarkSetter(G1ScanEvacuatedObjClosure* closure, bool skip_card_mark) : _closure(closure) {
|
||||
assert(_closure->_skip_card_mark == G1ScanEvacuatedObjClosure::Uninitialized, "Must not be set");
|
||||
_closure->_skip_card_mark = skip_card_mark ? G1ScanEvacuatedObjClosure::True : G1ScanEvacuatedObjClosure::False;
|
||||
}
|
||||
|
||||
~G1SkipCardEnqueueSetter() {
|
||||
DEBUG_ONLY(_closure->_skip_card_enqueue = G1ScanEvacuatedObjClosure::Uninitialized;)
|
||||
~G1SkipCardMarkSetter() {
|
||||
DEBUG_ONLY(_closure->_skip_card_mark = G1ScanEvacuatedObjClosure::Uninitialized;)
|
||||
}
|
||||
};
|
||||
|
||||
@ -206,13 +206,20 @@ public:
|
||||
class G1ConcurrentRefineOopClosure: public BasicOopIterateClosure {
|
||||
G1CollectedHeap* _g1h;
|
||||
uint _worker_id;
|
||||
bool _has_ref_to_cset;
|
||||
bool _has_ref_to_old;
|
||||
|
||||
public:
|
||||
G1ConcurrentRefineOopClosure(G1CollectedHeap* g1h, uint worker_id) :
|
||||
_g1h(g1h),
|
||||
_worker_id(worker_id) {
|
||||
_worker_id(worker_id),
|
||||
_has_ref_to_cset(false),
|
||||
_has_ref_to_old(false) {
|
||||
}
|
||||
|
||||
bool has_ref_to_cset() const { return _has_ref_to_cset; }
|
||||
bool has_ref_to_old() const { return _has_ref_to_old; }
|
||||
|
||||
virtual ReferenceIterationMode reference_iteration_mode() { return DO_FIELDS; }
|
||||
|
||||
template <class T> void do_oop_work(T* p);
|
||||
@ -223,6 +230,7 @@ public:
|
||||
class G1RebuildRemSetClosure : public BasicOopIterateClosure {
|
||||
G1CollectedHeap* _g1h;
|
||||
uint _worker_id;
|
||||
|
||||
public:
|
||||
G1RebuildRemSetClosure(G1CollectedHeap* g1h, uint worker_id) : _g1h(g1h), _worker_id(worker_id) {
|
||||
}
|
||||
|
||||
@ -90,11 +90,11 @@ inline void G1ScanEvacuatedObjClosure::do_oop_work(T* p) {
|
||||
prefetch_and_push(p, obj);
|
||||
} else if (!G1HeapRegion::is_in_same_region(p, obj)) {
|
||||
handle_non_cset_obj_common(region_attr, p, obj);
|
||||
assert(_skip_card_enqueue != Uninitialized, "Scan location has not been initialized.");
|
||||
if (_skip_card_enqueue == True) {
|
||||
assert(_skip_card_mark != Uninitialized, "Scan location has not been initialized.");
|
||||
if (_skip_card_mark == True) {
|
||||
return;
|
||||
}
|
||||
_par_scan_state->enqueue_card_if_tracked(region_attr, p, obj);
|
||||
_par_scan_state->mark_card_if_tracked(region_attr, p, obj);
|
||||
}
|
||||
}
|
||||
|
||||
@ -127,6 +127,11 @@ inline static void check_obj_during_refinement(T* p, oop const obj) {
|
||||
|
||||
template <class T>
|
||||
inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
|
||||
// Early out if we already found a to-young reference.
|
||||
if (_has_ref_to_cset) {
|
||||
return;
|
||||
}
|
||||
|
||||
T o = RawAccess<MO_RELAXED>::oop_load(p);
|
||||
if (CompressedOops::is_null(o)) {
|
||||
return;
|
||||
@ -146,7 +151,12 @@ inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
|
||||
return;
|
||||
}
|
||||
|
||||
G1HeapRegionRemSet* to_rem_set = _g1h->heap_region_containing(obj)->rem_set();
|
||||
G1HeapRegion* to_region = _g1h->heap_region_containing(obj);
|
||||
if (to_region->is_young()) {
|
||||
_has_ref_to_cset = true;
|
||||
return;
|
||||
}
|
||||
G1HeapRegionRemSet* to_rem_set = to_region->rem_set();
|
||||
|
||||
assert(to_rem_set != nullptr, "Need per-region 'into' remsets.");
|
||||
if (to_rem_set->is_tracked()) {
|
||||
@ -154,6 +164,7 @@ inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
|
||||
|
||||
if (from->rem_set()->cset_group() != to_rem_set->cset_group()) {
|
||||
to_rem_set->add_reference(p, _worker_id);
|
||||
_has_ref_to_old = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -180,7 +191,7 @@ inline void G1ScanCardClosure::do_oop_work(T* p) {
|
||||
_heap_roots_found++;
|
||||
} else if (!G1HeapRegion::is_in_same_region(p, obj)) {
|
||||
handle_non_cset_obj_common(region_attr, p, obj);
|
||||
_par_scan_state->enqueue_card_if_tracked(region_attr, p, obj);
|
||||
_par_scan_state->mark_card_if_tracked(region_attr, p, obj);
|
||||
}
|
||||
}
|
||||
|
||||
@ -272,10 +283,14 @@ template <class T> void G1RebuildRemSetClosure::do_oop_work(T* p) {
|
||||
G1HeapRegion* to = _g1h->heap_region_containing(obj);
|
||||
G1HeapRegionRemSet* rem_set = to->rem_set();
|
||||
if (rem_set->is_tracked()) {
|
||||
G1HeapRegion* from = _g1h->heap_region_containing(p);
|
||||
if (to->is_young()) {
|
||||
G1BarrierSet::g1_barrier_set()->write_ref_field_post(p);
|
||||
} else {
|
||||
G1HeapRegion* from = _g1h->heap_region_containing(p);
|
||||
|
||||
if (from->rem_set()->cset_group() != rem_set->cset_group()) {
|
||||
rem_set->add_reference(p, _worker_id);
|
||||
if (from->rem_set()->cset_group() != rem_set->cset_group()) {
|
||||
rem_set->add_reference(p, _worker_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -57,22 +57,21 @@
|
||||
#define MAYBE_INLINE_EVACUATION NOT_DEBUG(inline) DEBUG_ONLY(NOINLINE)
|
||||
|
||||
G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
|
||||
G1RedirtyCardsQueueSet* rdcqs,
|
||||
uint worker_id,
|
||||
uint num_workers,
|
||||
G1CollectionSet* collection_set,
|
||||
G1EvacFailureRegions* evac_failure_regions)
|
||||
: _g1h(g1h),
|
||||
_task_queue(g1h->task_queue(worker_id)),
|
||||
_rdc_local_qset(rdcqs),
|
||||
_ct(g1h->card_table()),
|
||||
_ct(g1h->refinement_table()),
|
||||
_closures(nullptr),
|
||||
_plab_allocator(nullptr),
|
||||
_age_table(false),
|
||||
_tenuring_threshold(g1h->policy()->tenuring_threshold()),
|
||||
_scanner(g1h, this),
|
||||
_worker_id(worker_id),
|
||||
_last_enqueued_card(SIZE_MAX),
|
||||
_num_cards_marked_dirty(0),
|
||||
_num_cards_marked_to_cset(0),
|
||||
_stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
|
||||
_stack_trim_lower_threshold(GCDrainStackTargetSize),
|
||||
_trim_ticks(),
|
||||
@ -88,7 +87,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
|
||||
ALLOCATION_FAILURE_INJECTOR_ONLY(_allocation_failure_inject_counter(0) COMMA)
|
||||
_evacuation_failed_info(),
|
||||
_evac_failure_regions(evac_failure_regions),
|
||||
_evac_failure_enqueued_cards(0)
|
||||
_num_cards_from_evac_failure(0)
|
||||
{
|
||||
// We allocate number of young gen regions in the collection set plus one
|
||||
// entries, since entry 0 keeps track of surviving bytes for non-young regions.
|
||||
@ -112,8 +111,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
|
||||
initialize_numa_stats();
|
||||
}
|
||||
|
||||
size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* rdc_buffers) {
|
||||
*rdc_buffers = _rdc_local_qset.flush();
|
||||
size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers) {
|
||||
flush_numa_stats();
|
||||
// Update allocation statistics.
|
||||
_plab_allocator->flush_and_retire_stats(num_workers);
|
||||
@ -147,8 +145,16 @@ size_t G1ParScanThreadState::lab_undo_waste_words() const {
|
||||
return _plab_allocator->undo_waste();
|
||||
}
|
||||
|
||||
size_t G1ParScanThreadState::evac_failure_enqueued_cards() const {
|
||||
return _evac_failure_enqueued_cards;
|
||||
size_t G1ParScanThreadState::num_cards_pending() const {
|
||||
return _num_cards_marked_dirty + _num_cards_from_evac_failure;
|
||||
}
|
||||
|
||||
size_t G1ParScanThreadState::num_cards_marked() const {
|
||||
return num_cards_pending() + _num_cards_marked_to_cset;
|
||||
}
|
||||
|
||||
size_t G1ParScanThreadState::num_cards_from_evac_failure() const {
|
||||
return _num_cards_from_evac_failure;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
@ -230,7 +236,7 @@ void G1ParScanThreadState::do_partial_array(PartialArrayState* state, bool stole
|
||||
PartialArraySplitter::Claim claim =
|
||||
_partial_array_splitter.claim(state, _task_queue, stolen);
|
||||
G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
|
||||
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
|
||||
G1SkipCardMarkSetter x(&_scanner, dest_attr.is_new_survivor());
|
||||
// Process claimed task.
|
||||
to_array->oop_iterate_range(&_scanner,
|
||||
checked_cast<int>(claim._start),
|
||||
@ -250,7 +256,7 @@ void G1ParScanThreadState::start_partial_objarray(oop from_obj,
|
||||
// The source array is unused when processing states.
|
||||
_partial_array_splitter.start(_task_queue, nullptr, to_array, array_length);
|
||||
|
||||
assert(_scanner.skip_card_enqueue_set(), "must be");
|
||||
assert(_scanner.skip_card_mark_set(), "must be");
|
||||
// Process the initial chunk. No need to process the type in the
|
||||
// klass, as it will already be handled by processing the built-in
|
||||
// module.
|
||||
@ -451,7 +457,7 @@ void G1ParScanThreadState::do_iterate_object(oop const obj,
|
||||
_string_dedup_requests.add(old);
|
||||
}
|
||||
|
||||
assert(_scanner.skip_card_enqueue_set(), "must be");
|
||||
assert(_scanner.skip_card_mark_set(), "must be");
|
||||
obj->oop_iterate_backwards(&_scanner, klass);
|
||||
}
|
||||
|
||||
@ -546,7 +552,7 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
|
||||
// Instead, we use dest_attr.is_young() because the two values are always
|
||||
// equal: successfully allocated young regions must be survivor regions.
|
||||
assert(dest_attr.is_young() == _g1h->heap_region_containing(obj)->is_survivor(), "must be");
|
||||
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_young());
|
||||
G1SkipCardMarkSetter x(&_scanner, dest_attr.is_young());
|
||||
do_iterate_object(obj, old, klass, region_attr, dest_attr, age);
|
||||
}
|
||||
|
||||
@ -569,7 +575,7 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
|
||||
assert(worker_id < _num_workers, "out of bounds access");
|
||||
if (_states[worker_id] == nullptr) {
|
||||
_states[worker_id] =
|
||||
new G1ParScanThreadState(_g1h, rdcqs(),
|
||||
new G1ParScanThreadState(_g1h,
|
||||
worker_id,
|
||||
_num_workers,
|
||||
_collection_set,
|
||||
@ -595,22 +601,24 @@ void G1ParScanThreadStateSet::flush_stats() {
|
||||
// because it resets the PLAB allocator where we get this info from.
|
||||
size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize;
|
||||
size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize;
|
||||
size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers, &_rdc_buffers[worker_id]) * HeapWordSize;
|
||||
size_t evac_fail_enqueued_cards = pss->evac_failure_enqueued_cards();
|
||||
size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers) * HeapWordSize;
|
||||
size_t pending_cards = pss->num_cards_pending();
|
||||
size_t to_young_gen_cards = pss->num_cards_marked() - pss->num_cards_pending();
|
||||
size_t evac_failure_cards = pss->num_cards_from_evac_failure();
|
||||
size_t marked_cards = pss->num_cards_marked();
|
||||
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_undo_waste_bytes, G1GCPhaseTimes::MergePSSLABUndoWasteBytes);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, evac_fail_enqueued_cards, G1GCPhaseTimes::MergePSSEvacFailExtra);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, pending_cards, G1GCPhaseTimes::MergePSSPendingCards);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, to_young_gen_cards, G1GCPhaseTimes::MergePSSToYoungGenCards);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, evac_failure_cards, G1GCPhaseTimes::MergePSSEvacFail);
|
||||
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, marked_cards, G1GCPhaseTimes::MergePSSMarked);
|
||||
|
||||
delete pss;
|
||||
_states[worker_id] = nullptr;
|
||||
}
|
||||
|
||||
G1DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
|
||||
dcq.merge_bufferlists(rdcqs());
|
||||
rdcqs()->verify_empty();
|
||||
|
||||
_flushed = true;
|
||||
}
|
||||
|
||||
@ -652,7 +660,7 @@ oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, Kla
|
||||
// existing closure to scan evacuated objects; since we are iterating from a
|
||||
// collection set region (i.e. never a Survivor region), we always need to
|
||||
// gather cards for this case.
|
||||
G1SkipCardEnqueueSetter x(&_scanner, false /* skip_card_enqueue */);
|
||||
G1SkipCardMarkSetter x(&_scanner, false /* skip_card_mark */);
|
||||
do_iterate_object(old, old, klass, attr, attr, m.age());
|
||||
}
|
||||
|
||||
@ -709,9 +717,7 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
|
||||
G1EvacFailureRegions* evac_failure_regions) :
|
||||
_g1h(g1h),
|
||||
_collection_set(collection_set),
|
||||
_rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
|
||||
_states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
|
||||
_rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
|
||||
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
|
||||
_num_workers(num_workers),
|
||||
_flushed(false),
|
||||
@ -719,7 +725,6 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
|
||||
{
|
||||
for (uint i = 0; i < num_workers; ++i) {
|
||||
_states[i] = nullptr;
|
||||
_rdc_buffers[i] = BufferNodeList();
|
||||
}
|
||||
memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
|
||||
}
|
||||
@ -728,7 +733,6 @@ G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
|
||||
assert(_flushed, "thread local state from the per thread states should have been flushed");
|
||||
FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
|
||||
FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
|
||||
FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
|
||||
}
|
||||
|
||||
#if TASKQUEUE_STATS
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,7 +27,6 @@
|
||||
|
||||
#include "gc/g1/g1CollectedHeap.hpp"
|
||||
#include "gc/g1/g1OopClosures.hpp"
|
||||
#include "gc/g1/g1RedirtyCardsQueue.hpp"
|
||||
#include "gc/g1/g1YoungGCAllocationFailureInjector.hpp"
|
||||
#include "gc/shared/ageTable.hpp"
|
||||
#include "gc/shared/copyFailedInfo.hpp"
|
||||
@ -52,7 +51,6 @@ class outputStream;
|
||||
class G1ParScanThreadState : public CHeapObj<mtGC> {
|
||||
G1CollectedHeap* _g1h;
|
||||
G1ScannerTasksQueue* _task_queue;
|
||||
G1RedirtyCardsLocalQueueSet _rdc_local_qset;
|
||||
G1CardTable* _ct;
|
||||
G1EvacuationRootClosures* _closures;
|
||||
|
||||
@ -65,9 +63,8 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
|
||||
|
||||
uint _worker_id;
|
||||
|
||||
// Remember the last enqueued card to avoid enqueuing the same card over and over;
|
||||
// since we only ever scan a card once, this is sufficient.
|
||||
size_t _last_enqueued_card;
|
||||
size_t _num_cards_marked_dirty;
|
||||
size_t _num_cards_marked_to_cset;
|
||||
|
||||
// Upper and lower threshold to start and end work queue draining.
|
||||
uint const _stack_trim_upper_threshold;
|
||||
@ -104,22 +101,19 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
|
||||
|
||||
EvacuationFailedInfo _evacuation_failed_info;
|
||||
G1EvacFailureRegions* _evac_failure_regions;
|
||||
// Number of additional cards into evacuation failed regions enqueued into
|
||||
// the local DCQS. This is an approximation, as cards that would be added later
|
||||
// outside of evacuation failure will not be subtracted again.
|
||||
size_t _evac_failure_enqueued_cards;
|
||||
// Number of additional cards into evacuation failed regions.
|
||||
size_t _num_cards_from_evac_failure;
|
||||
|
||||
// Enqueue the card if not already in the set; this is a best-effort attempt on
|
||||
// Mark the card if not already in the set; this is a best-effort attempt on
|
||||
// detecting duplicates.
|
||||
template <class T> bool enqueue_if_new(T* p);
|
||||
// Enqueue the card of p into the (evacuation failed) region.
|
||||
template <class T> void enqueue_card_into_evac_fail_region(T* p, oop obj);
|
||||
template <class T> bool mark_if_new(T* p, bool into_survivor);
|
||||
// Mark the card of p into the (evacuation failed) region.
|
||||
template <class T> void mark_card_into_evac_fail_region(T* p, oop obj);
|
||||
|
||||
bool inject_allocation_failure(uint region_idx) ALLOCATION_FAILURE_INJECTOR_RETURN_( return false; );
|
||||
|
||||
public:
|
||||
G1ParScanThreadState(G1CollectedHeap* g1h,
|
||||
G1RedirtyCardsQueueSet* rdcqs,
|
||||
uint worker_id,
|
||||
uint num_workers,
|
||||
G1CollectionSet* collection_set,
|
||||
@ -139,16 +133,16 @@ public:
|
||||
|
||||
void push_on_queue(ScannerTask task);
|
||||
|
||||
// Apply the post barrier to the given reference field. Enqueues the card of p
|
||||
// Apply the post barrier to the given reference field. Marks the card of p
|
||||
// if the barrier does not filter out the reference for some reason (e.g.
|
||||
// p and q are in the same region, p is in survivor, p is in collection set)
|
||||
// To be called during GC if nothing particular about p and obj are known.
|
||||
template <class T> void write_ref_field_post(T* p, oop obj);
|
||||
|
||||
// Enqueue the card if the reference's target region's remembered set is tracked.
|
||||
// Mark the card if the reference's target region's remembered set is tracked.
|
||||
// Assumes that a significant amount of pre-filtering (like done by
|
||||
// write_ref_field_post() above) has already been performed.
|
||||
template <class T> void enqueue_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o);
|
||||
template <class T> void mark_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o);
|
||||
|
||||
G1EvacuationRootClosures* closures() { return _closures; }
|
||||
uint worker_id() { return _worker_id; }
|
||||
@ -156,11 +150,22 @@ public:
|
||||
size_t lab_waste_words() const;
|
||||
size_t lab_undo_waste_words() const;
|
||||
|
||||
size_t evac_failure_enqueued_cards() const;
|
||||
// Newly marked cards during this garbage collection, to be refined concurrently
|
||||
// later. Contains both marks generated by new cross-region references as well
|
||||
// as cards generated from regions into evacuation failed regions.
|
||||
// Does not contain cards into the next collection set (e.g. survivors) - they will not
|
||||
// be refined concurrently. Calculation is done on a best-effort basis.
|
||||
size_t num_cards_pending() const;
|
||||
// Number of cards newly generated by references into evacuation failed regions.
|
||||
// Calculation is done on a best-effort basis.
|
||||
size_t num_cards_from_evac_failure() const;
|
||||
// Sum of cards marked by evacuation. Contains both pending cards as well as cards
|
||||
// into the next collection set (e.g. survivors).
|
||||
size_t num_cards_marked() const;
|
||||
|
||||
// Pass locally gathered statistics to global state. Returns the total number of
|
||||
// HeapWords copied.
|
||||
size_t flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* buffer_log);
|
||||
size_t flush_stats(size_t* surviving_young_words, uint num_workers);
|
||||
|
||||
#if TASKQUEUE_STATS
|
||||
PartialArrayTaskStats* partial_array_task_stats();
|
||||
@ -249,9 +254,7 @@ public:
|
||||
class G1ParScanThreadStateSet : public StackObj {
|
||||
G1CollectedHeap* _g1h;
|
||||
G1CollectionSet* _collection_set;
|
||||
G1RedirtyCardsQueueSet _rdcqs;
|
||||
G1ParScanThreadState** _states;
|
||||
BufferNodeList* _rdc_buffers;
|
||||
size_t* _surviving_young_words_total;
|
||||
uint _num_workers;
|
||||
bool _flushed;
|
||||
@ -264,9 +267,6 @@ class G1ParScanThreadStateSet : public StackObj {
|
||||
G1EvacFailureRegions* evac_failure_regions);
|
||||
~G1ParScanThreadStateSet();
|
||||
|
||||
G1RedirtyCardsQueueSet* rdcqs() { return &_rdcqs; }
|
||||
BufferNodeList* rdc_buffers() { return _rdc_buffers; }
|
||||
|
||||
void flush_stats();
|
||||
void record_unused_optional_region(G1HeapRegion* hr);
|
||||
#if TASKQUEUE_STATS
|
||||
|
||||
@ -96,25 +96,24 @@ G1OopStarChunkedList* G1ParScanThreadState::oops_into_optional_region(const G1He
|
||||
return &_oops_into_optional_regions[hr->index_in_opt_cset()];
|
||||
}
|
||||
|
||||
template <class T> bool G1ParScanThreadState::enqueue_if_new(T* p) {
|
||||
size_t card_index = ct()->index_for(p);
|
||||
// If the card hasn't been added to the buffer, do it.
|
||||
if (_last_enqueued_card != card_index) {
|
||||
_rdc_local_qset.enqueue(ct()->byte_for_index(card_index));
|
||||
_last_enqueued_card = card_index;
|
||||
template <class T> bool G1ParScanThreadState::mark_if_new(T* p, bool into_new_survivor) {
|
||||
G1CardTable::CardValue* card = ct()->byte_for(p);
|
||||
G1CardTable::CardValue value = *card;
|
||||
if (value == G1CardTable::clean_card_val()) {
|
||||
*card = into_new_survivor ? G1CardTable::g1_to_cset_card : G1CardTable::g1_dirty_card;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T> void G1ParScanThreadState::enqueue_card_into_evac_fail_region(T* p, oop obj) {
|
||||
template <class T> void G1ParScanThreadState::mark_card_into_evac_fail_region(T* p, oop obj) {
|
||||
assert(!G1HeapRegion::is_in_same_region(p, obj), "Should have filtered out cross-region references already.");
|
||||
assert(!_g1h->heap_region_containing(p)->is_survivor(), "Should have filtered out from-newly allocated survivor references already.");
|
||||
assert(_g1h->heap_region_containing(obj)->in_collection_set(), "Only for enqeueing reference into collection set region");
|
||||
|
||||
if (enqueue_if_new(p)) {
|
||||
_evac_failure_enqueued_cards++;
|
||||
if (mark_if_new(p, false /* into_new_survivor */)) { // The reference is never into survivor regions.
|
||||
_num_cards_from_evac_failure++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,18 +136,18 @@ template <class T> void G1ParScanThreadState::write_ref_field_post(T* p, oop obj
|
||||
if (dest_attr.is_in_cset()) {
|
||||
assert(obj->is_forwarded(), "evac-failed but not forwarded: " PTR_FORMAT, p2i(obj));
|
||||
assert(obj->forwardee() == obj, "evac-failed but not self-forwarded: " PTR_FORMAT, p2i(obj));
|
||||
enqueue_card_into_evac_fail_region(p, obj);
|
||||
mark_card_into_evac_fail_region(p, obj);
|
||||
return;
|
||||
}
|
||||
enqueue_card_if_tracked(dest_attr, p, obj);
|
||||
mark_card_if_tracked(dest_attr, p, obj);
|
||||
}
|
||||
|
||||
template <class T> void G1ParScanThreadState::enqueue_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o) {
|
||||
template <class T> void G1ParScanThreadState::mark_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o) {
|
||||
assert(!G1HeapRegion::is_in_same_region(p, o), "Should have filtered out cross-region references already.");
|
||||
assert(!_g1h->heap_region_containing(p)->is_survivor(), "Should have filtered out from-newly allocated survivor references already.");
|
||||
// We relabel all regions that failed evacuation as old gen without remembered,
|
||||
// and so pre-filter them out in the caller.
|
||||
assert(!_g1h->heap_region_containing(o)->in_collection_set(), "Should not try to enqueue reference into collection set region");
|
||||
assert(!_g1h->heap_region_containing(o)->in_collection_set(), "Should not try to mark reference into collection set region");
|
||||
|
||||
#ifdef ASSERT
|
||||
G1HeapRegion* const hr_obj = _g1h->heap_region_containing(o);
|
||||
@ -161,7 +160,14 @@ template <class T> void G1ParScanThreadState::enqueue_card_if_tracked(G1HeapRegi
|
||||
if (!region_attr.remset_is_tracked()) {
|
||||
return;
|
||||
}
|
||||
enqueue_if_new(p);
|
||||
bool into_survivor = region_attr.is_new_survivor();
|
||||
if (mark_if_new(p, into_survivor)) {
|
||||
if (into_survivor) {
|
||||
_num_cards_marked_to_cset++;
|
||||
} else {
|
||||
_num_cards_marked_dirty++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // SHARE_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP
|
||||
|
||||
@ -67,8 +67,7 @@ G1Policy::G1Policy(STWGCTimer* gc_timer) :
|
||||
_reserve_regions(0),
|
||||
_young_gen_sizer(),
|
||||
_free_regions_at_end_of_collection(0),
|
||||
_card_rs_length(0),
|
||||
_pending_cards_at_gc_start(0),
|
||||
_pending_cards_from_gc(0),
|
||||
_concurrent_start_to_mixed(),
|
||||
_collection_set(nullptr),
|
||||
_g1h(nullptr),
|
||||
@ -553,12 +552,9 @@ G1GCPhaseTimes* G1Policy::phase_times() const {
|
||||
return _phase_times;
|
||||
}
|
||||
|
||||
void G1Policy::revise_young_list_target_length(size_t card_rs_length, size_t code_root_rs_length) {
|
||||
void G1Policy::revise_young_list_target_length(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length) {
|
||||
guarantee(use_adaptive_young_list_length(), "should not call this otherwise" );
|
||||
|
||||
size_t thread_buffer_cards = _analytics->predict_dirtied_cards_in_thread_buffers();
|
||||
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
|
||||
size_t pending_cards = dcqs.num_cards() + thread_buffer_cards;
|
||||
update_young_length_bounds(pending_cards, card_rs_length, code_root_rs_length);
|
||||
}
|
||||
|
||||
@ -567,7 +563,7 @@ void G1Policy::record_full_collection_start() {
|
||||
// Release the future to-space so that it is available for compaction into.
|
||||
collector_state()->set_in_young_only_phase(false);
|
||||
collector_state()->set_in_full_gc(true);
|
||||
_pending_cards_at_gc_start = 0;
|
||||
_collection_set->abandon_all_candidates();
|
||||
}
|
||||
|
||||
void G1Policy::record_full_collection_end() {
|
||||
@ -600,59 +596,70 @@ void G1Policy::record_full_collection_end() {
|
||||
record_pause(G1GCPauseType::FullGC, start_time_sec, end_sec);
|
||||
}
|
||||
|
||||
static void log_refinement_stats(const char* kind, const G1ConcurrentRefineStats& stats) {
|
||||
static void log_refinement_stats(const G1ConcurrentRefineStats& stats) {
|
||||
log_debug(gc, refine, stats)
|
||||
("%s refinement: %.2fms, refined: %zu"
|
||||
", precleaned: %zu, dirtied: %zu",
|
||||
kind,
|
||||
stats.refinement_time().seconds() * MILLIUNITS,
|
||||
("Refinement: sweep: %.2fms, yield: %.2fms refined: %zu, dirtied: %zu",
|
||||
TimeHelper::counter_to_millis(stats.sweep_duration()),
|
||||
TimeHelper::counter_to_millis(stats.yield_during_sweep_duration()),
|
||||
stats.refined_cards(),
|
||||
stats.precleaned_cards(),
|
||||
stats.dirtied_cards());
|
||||
stats.cards_pending());
|
||||
}
|
||||
|
||||
void G1Policy::record_concurrent_refinement_stats(size_t pending_cards,
|
||||
size_t thread_buffer_cards) {
|
||||
_pending_cards_at_gc_start = pending_cards;
|
||||
_analytics->report_dirtied_cards_in_thread_buffers(thread_buffer_cards);
|
||||
|
||||
// Collect per-thread stats, mostly from mutator activity.
|
||||
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
|
||||
G1ConcurrentRefineStats mut_stats = dcqs.concatenated_refinement_stats();
|
||||
|
||||
// Collect specialized concurrent refinement thread stats.
|
||||
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
|
||||
G1ConcurrentRefineStats cr_stats = cr->get_and_reset_refinement_stats();
|
||||
|
||||
G1ConcurrentRefineStats total_stats = mut_stats + cr_stats;
|
||||
|
||||
log_refinement_stats("Mutator", mut_stats);
|
||||
log_refinement_stats("Concurrent", cr_stats);
|
||||
log_refinement_stats("Total", total_stats);
|
||||
void G1Policy::record_refinement_stats(G1ConcurrentRefineStats* refine_stats) {
|
||||
log_refinement_stats(*refine_stats);
|
||||
|
||||
// Record the rate at which cards were refined.
|
||||
// Don't update the rate if the current sample is empty or time is zero.
|
||||
Tickspan refinement_time = total_stats.refinement_time();
|
||||
size_t refined_cards = total_stats.refined_cards();
|
||||
if ((refined_cards > 0) && (refinement_time > Tickspan())) {
|
||||
double rate = refined_cards / (refinement_time.seconds() * MILLIUNITS);
|
||||
// Don't update the rate if the current sample is empty or time is zero (which is
|
||||
// the case during GC).
|
||||
double refinement_time = TimeHelper::counter_to_millis(refine_stats->sweep_duration());
|
||||
size_t refined_cards = refine_stats->refined_cards();
|
||||
if ((refined_cards > 0) && (refinement_time > 0)) {
|
||||
double rate = refined_cards / refinement_time;
|
||||
_analytics->report_concurrent_refine_rate_ms(rate);
|
||||
log_debug(gc, refine, stats)("Concurrent refinement rate: %.2f cards/ms", rate);
|
||||
log_debug(gc, refine, stats)("Concurrent refinement rate: %.2f cards/ms predicted: %.2f cards/ms", rate, _analytics->predict_concurrent_refine_rate_ms());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static T saturated_sub(T x, T y) {
|
||||
return (x < y) ? T() : (x - y);
|
||||
}
|
||||
|
||||
void G1Policy::record_dirtying_stats(double last_mutator_start_dirty_ms,
|
||||
double last_mutator_end_dirty_ms,
|
||||
size_t pending_cards,
|
||||
double yield_duration_ms,
|
||||
size_t next_pending_cards_from_gc,
|
||||
size_t next_to_collection_set_cards) {
|
||||
assert(SafepointSynchronize::is_at_safepoint() || G1ReviseYoungLength_lock->is_locked(),
|
||||
"must be (at safepoint %s locked %s)",
|
||||
BOOL_TO_STR(SafepointSynchronize::is_at_safepoint()), BOOL_TO_STR(G1ReviseYoungLength_lock->is_locked()));
|
||||
// Record mutator's card logging rate.
|
||||
double mut_start_time = _analytics->prev_collection_pause_end_ms();
|
||||
double mut_end_time = cur_pause_start_sec() * MILLIUNITS;
|
||||
double mut_time = mut_end_time - mut_start_time;
|
||||
|
||||
// Unlike above for conc-refine rate, here we should not require a
|
||||
// non-empty sample, since an application could go some time with only
|
||||
// young-gen or filtered out writes. But we'll ignore unusually short
|
||||
// sample periods, as they may just pollute the predictions.
|
||||
if (mut_time > 1.0) { // Require > 1ms sample time.
|
||||
double dirtied_rate = total_stats.dirtied_cards() / mut_time;
|
||||
double const mutator_dirty_time_ms = (last_mutator_end_dirty_ms - last_mutator_start_dirty_ms) - yield_duration_ms;
|
||||
assert(mutator_dirty_time_ms >= 0.0,
|
||||
"must be (start: %.2f end: %.2f yield: %.2f)",
|
||||
last_mutator_start_dirty_ms, last_mutator_end_dirty_ms, yield_duration_ms);
|
||||
|
||||
if (mutator_dirty_time_ms > 1.0) { // Require > 1ms sample time.
|
||||
// The subtractive term is pending_cards_from_gc() which includes both dirtied and dirty-as-young cards,
|
||||
// which can be larger than what is actually considered as "pending" (dirty cards only).
|
||||
size_t dirtied_cards = saturated_sub(pending_cards, pending_cards_from_gc());
|
||||
double dirtied_rate = dirtied_cards / mutator_dirty_time_ms;
|
||||
_analytics->report_dirtied_cards_rate_ms(dirtied_rate);
|
||||
log_debug(gc, refine, stats)("Generate dirty cards rate: %.2f cards/ms", dirtied_rate);
|
||||
log_debug(gc, refine, stats)("Generate dirty cards rate: %.2f cards/ms dirtying time %.2f (start %.2f end %.2f yield %.2f) dirtied %zu (pending %zu during_gc %zu)",
|
||||
dirtied_rate,
|
||||
mutator_dirty_time_ms,
|
||||
last_mutator_start_dirty_ms, last_mutator_end_dirty_ms, yield_duration_ms,
|
||||
dirtied_cards, pending_cards, pending_cards_from_gc());
|
||||
}
|
||||
|
||||
_pending_cards_from_gc = next_pending_cards_from_gc;
|
||||
_to_collection_set_cards = next_to_collection_set_cards;
|
||||
}
|
||||
|
||||
bool G1Policy::should_retain_evac_failed_region(uint index) const {
|
||||
@ -761,27 +768,27 @@ bool G1Policy::concurrent_operation_is_full_mark(const char* msg) {
|
||||
((_g1h->gc_cause() != GCCause::_g1_humongous_allocation) || need_to_start_conc_mark(msg));
|
||||
}
|
||||
|
||||
double G1Policy::logged_cards_processing_time() const {
|
||||
double G1Policy::pending_cards_processing_time() const {
|
||||
double all_cards_processing_time = average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR);
|
||||
size_t logged_dirty_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
|
||||
size_t pending_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
|
||||
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
|
||||
size_t scan_heap_roots_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
|
||||
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
|
||||
|
||||
double merge_logged_cards_time = average_time_ms(G1GCPhaseTimes::MergeLB) +
|
||||
phase_times()->cur_distribute_log_buffers_time_ms();
|
||||
double merge_pending_cards_time = phase_times()->cur_merge_refinement_table_time();
|
||||
|
||||
// Approximate the time spent processing cards from log buffers by scaling
|
||||
// the total processing time by the ratio of logged cards to total cards
|
||||
// Approximate the time spent processing cards from pending cards by scaling
|
||||
// the total processing time by the ratio of pending cards to total cards
|
||||
// processed. There might be duplicate cards in different log buffers,
|
||||
// leading to an overestimate. That effect should be relatively small
|
||||
// unless there are few cards to process, because cards in buffers are
|
||||
// dirtied to limit duplication. Also need to avoid scaling when both
|
||||
// counts are zero, which happens especially during early GCs. So ascribe
|
||||
// all of the time to the logged cards unless there are more total cards.
|
||||
if (logged_dirty_cards >= scan_heap_roots_cards) {
|
||||
return all_cards_processing_time + merge_logged_cards_time;
|
||||
// all of the time to the pending cards unless there are more total cards.
|
||||
if (pending_cards >= scan_heap_roots_cards) {
|
||||
return all_cards_processing_time + merge_pending_cards_time;
|
||||
}
|
||||
return (all_cards_processing_time * logged_dirty_cards / scan_heap_roots_cards) + merge_logged_cards_time;
|
||||
return (all_cards_processing_time * pending_cards / scan_heap_roots_cards) + merge_pending_cards_time;
|
||||
}
|
||||
|
||||
// Anything below that is considered to be zero
|
||||
@ -815,6 +822,22 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
|
||||
// We make the assumption that these are rare.
|
||||
bool update_stats = !allocation_failure;
|
||||
|
||||
size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
|
||||
|
||||
// Number of scanned cards with "Dirty" value (and nothing else).
|
||||
size_t const pending_cards_from_refinement_table = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
|
||||
// Number of cards actually merged in the Merge RS phase. MergeRSCards below includes the cards from the Eager Reclaim phase.
|
||||
size_t const merged_cards_from_card_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSFromRemSetCards) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSFromRemSetCards);
|
||||
// Number of cards attempted to merge in the Merge RS phase.
|
||||
size_t const total_cards_from_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSTotalCards) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSTotalCards);
|
||||
|
||||
// Cards marked as being to collection set. May be inaccurate due to races.
|
||||
size_t const total_non_young_rs_cards = MIN2(pending_cards_from_refinement_table + merged_cards_from_card_rs, total_cards_scanned);
|
||||
|
||||
if (update_stats) {
|
||||
// We maintain the invariant that all objects allocated by mutator
|
||||
// threads will be allocated out of eden regions. So, we can use
|
||||
@ -827,6 +850,98 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
|
||||
uint regions_allocated = _collection_set->eden_region_length();
|
||||
double alloc_rate_ms = (double) regions_allocated / app_time_ms;
|
||||
_analytics->report_alloc_rate_ms(alloc_rate_ms);
|
||||
|
||||
double merge_refinement_table_time = p->cur_merge_refinement_table_time();
|
||||
if (merge_refinement_table_time != 0.0) {
|
||||
_analytics->report_merge_refinement_table_time_ms(merge_refinement_table_time);
|
||||
}
|
||||
if (merged_cards_from_card_rs >= G1NumCardsCostSampleThreshold) {
|
||||
double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
|
||||
average_time_ms(G1GCPhaseTimes::MergeRS) +
|
||||
average_time_ms(G1GCPhaseTimes::OptMergeRS);
|
||||
_analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / merged_cards_from_card_rs, is_young_only_pause);
|
||||
log_debug(gc, ergo, cset)("cost per card merge (young %s): avg time %.2f merged cards %zu cost(1m) %.2f pred_cost(1m-yo) %.2f pred_cost(1m-old) %.2f",
|
||||
BOOL_TO_STR(is_young_only_pause),
|
||||
avg_time_merge_cards, merged_cards_from_card_rs, 1e6 * avg_time_merge_cards / merged_cards_from_card_rs, _analytics->predict_card_merge_time_ms(1e6, true), _analytics->predict_card_merge_time_ms(1e6, false));
|
||||
} else {
|
||||
log_debug(gc, ergo, cset)("cost per card merge (young: %s): skipped, total cards %zu", BOOL_TO_STR(is_young_only_pause), total_non_young_rs_cards);
|
||||
}
|
||||
|
||||
// Update prediction for card scan
|
||||
|
||||
if (total_cards_scanned >= G1NumCardsCostSampleThreshold) {
|
||||
double avg_card_scan_time = average_time_ms(G1GCPhaseTimes::ScanHR) +
|
||||
average_time_ms(G1GCPhaseTimes::OptScanHR);
|
||||
|
||||
_analytics->report_cost_per_card_scan_ms(avg_card_scan_time / total_cards_scanned, is_young_only_pause);
|
||||
|
||||
log_debug(gc, ergo, cset)("cost per card scan (young: %s): avg time %.2f total cards %zu cost(1m) %.2f pred_cost(1m-yo) %.2f pred_cost(1m-old) %.2f",
|
||||
BOOL_TO_STR(is_young_only_pause),
|
||||
avg_card_scan_time, total_cards_scanned, 1e6 * avg_card_scan_time / total_cards_scanned, _analytics->predict_card_scan_time_ms(1e6, true), _analytics->predict_card_scan_time_ms(1e6, false));
|
||||
} else {
|
||||
log_debug(gc, ergo, cset)("cost per card scan (young: %s): skipped, total cards %zu", BOOL_TO_STR(is_young_only_pause), total_cards_scanned);
|
||||
}
|
||||
|
||||
// Update prediction for the ratio between cards actually merged onto the card
|
||||
// table from the remembered sets and the total number of cards attempted to
|
||||
// merge.
|
||||
double merge_to_scan_ratio = 1.0;
|
||||
if (total_cards_from_rs > 0) {
|
||||
merge_to_scan_ratio = (double)merged_cards_from_card_rs / total_cards_from_rs;
|
||||
}
|
||||
_analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, is_young_only_pause);
|
||||
|
||||
// Update prediction for code root scan
|
||||
size_t const total_code_roots_scanned = p->sum_thread_work_items(G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods);
|
||||
|
||||
if (total_code_roots_scanned >= G1NumCodeRootsCostSampleThreshold) {
|
||||
double avg_time_code_root_scan = average_time_ms(G1GCPhaseTimes::CodeRoots) +
|
||||
average_time_ms(G1GCPhaseTimes::OptCodeRoots);
|
||||
|
||||
_analytics->report_cost_per_code_root_scan_ms(avg_time_code_root_scan / total_code_roots_scanned, is_young_only_pause);
|
||||
}
|
||||
|
||||
// Update prediction for copy cost per byte
|
||||
size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
|
||||
|
||||
if (copied_bytes > 0) {
|
||||
double avg_copy_time = average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy);
|
||||
double cost_per_byte_ms = avg_copy_time / copied_bytes;
|
||||
_analytics->report_cost_per_byte_ms(cost_per_byte_ms, is_young_only_pause);
|
||||
}
|
||||
|
||||
if (_collection_set->young_region_length() > 0) {
|
||||
_analytics->report_young_other_cost_per_region_ms(young_other_time_ms() /
|
||||
_collection_set->young_region_length());
|
||||
}
|
||||
|
||||
if (_collection_set->initial_old_region_length() > 0) {
|
||||
_analytics->report_non_young_other_cost_per_region_ms(non_young_other_time_ms() /
|
||||
_collection_set->initial_old_region_length());
|
||||
}
|
||||
|
||||
_analytics->report_constant_other_time_ms(constant_other_time_ms(pause_time_ms));
|
||||
|
||||
_analytics->report_pending_cards(pending_cards_from_refinement_table, is_young_only_pause);
|
||||
|
||||
_analytics->report_card_rs_length(total_cards_scanned - total_non_young_rs_cards, is_young_only_pause);
|
||||
_analytics->report_code_root_rs_length((double)total_code_roots_scanned, is_young_only_pause);
|
||||
}
|
||||
|
||||
{
|
||||
double mutator_end_time = cur_pause_start_sec() * MILLIUNITS;
|
||||
G1ConcurrentRefineStats* stats = _g1h->concurrent_refine()->sweep_state().stats();
|
||||
// Record any available refinement statistics.
|
||||
record_refinement_stats(stats);
|
||||
|
||||
double yield_duration_ms = TimeHelper::counter_to_millis(_g1h->yield_duration_in_refinement_epoch());
|
||||
record_dirtying_stats(TimeHelper::counter_to_millis(_g1h->last_refinement_epoch_start()),
|
||||
mutator_end_time,
|
||||
pending_cards_from_refinement_table,
|
||||
yield_duration_ms,
|
||||
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSPendingCards),
|
||||
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSToYoungGenCards));
|
||||
}
|
||||
|
||||
record_pause(this_pause, start_time_sec, end_time_sec, allocation_failure);
|
||||
@ -857,82 +972,6 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
|
||||
|
||||
_eden_surv_rate_group->start_adding_regions();
|
||||
|
||||
if (update_stats) {
|
||||
// Update prediction for card merge.
|
||||
size_t const merged_cards_from_log_buffers = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
|
||||
// MergeRSCards includes the cards from the Eager Reclaim phase.
|
||||
size_t const merged_cards_from_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSCards) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSCards);
|
||||
size_t const total_cards_merged = merged_cards_from_rs +
|
||||
merged_cards_from_log_buffers;
|
||||
|
||||
if (total_cards_merged >= G1NumCardsCostSampleThreshold) {
|
||||
double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
|
||||
average_time_ms(G1GCPhaseTimes::MergeRS) +
|
||||
average_time_ms(G1GCPhaseTimes::MergeLB) +
|
||||
p->cur_distribute_log_buffers_time_ms() +
|
||||
average_time_ms(G1GCPhaseTimes::OptMergeRS);
|
||||
_analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, is_young_only_pause);
|
||||
}
|
||||
|
||||
// Update prediction for card scan
|
||||
size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
|
||||
|
||||
if (total_cards_scanned >= G1NumCardsCostSampleThreshold) {
|
||||
double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) +
|
||||
average_time_ms(G1GCPhaseTimes::OptScanHR);
|
||||
|
||||
_analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, is_young_only_pause);
|
||||
}
|
||||
|
||||
// Update prediction for the ratio between cards from the remembered
|
||||
// sets and actually scanned cards from the remembered sets.
|
||||
// Due to duplicates in the log buffers, the number of scanned cards
|
||||
// can be smaller than the cards in the log buffers.
|
||||
const size_t scanned_cards_from_rs = (total_cards_scanned > merged_cards_from_log_buffers) ? total_cards_scanned - merged_cards_from_log_buffers : 0;
|
||||
double scan_to_merge_ratio = 0.0;
|
||||
if (merged_cards_from_rs > 0) {
|
||||
scan_to_merge_ratio = (double)scanned_cards_from_rs / merged_cards_from_rs;
|
||||
}
|
||||
_analytics->report_card_scan_to_merge_ratio(scan_to_merge_ratio, is_young_only_pause);
|
||||
|
||||
// Update prediction for code root scan
|
||||
size_t const total_code_roots_scanned = p->sum_thread_work_items(G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods) +
|
||||
p->sum_thread_work_items(G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods);
|
||||
|
||||
if (total_code_roots_scanned >= G1NumCodeRootsCostSampleThreshold) {
|
||||
double avg_time_code_root_scan = average_time_ms(G1GCPhaseTimes::CodeRoots) +
|
||||
average_time_ms(G1GCPhaseTimes::OptCodeRoots);
|
||||
|
||||
_analytics->report_cost_per_code_root_scan_ms(avg_time_code_root_scan / total_code_roots_scanned, is_young_only_pause);
|
||||
}
|
||||
|
||||
// Update prediction for copy cost per byte
|
||||
size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
|
||||
|
||||
if (copied_bytes > 0) {
|
||||
double cost_per_byte_ms = (average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy)) / copied_bytes;
|
||||
_analytics->report_cost_per_byte_ms(cost_per_byte_ms, is_young_only_pause);
|
||||
}
|
||||
|
||||
if (_collection_set->young_region_length() > 0) {
|
||||
_analytics->report_young_other_cost_per_region_ms(young_other_time_ms() /
|
||||
_collection_set->young_region_length());
|
||||
}
|
||||
|
||||
if (_collection_set->initial_old_region_length() > 0) {
|
||||
_analytics->report_non_young_other_cost_per_region_ms(non_young_other_time_ms() /
|
||||
_collection_set->initial_old_region_length());
|
||||
}
|
||||
|
||||
_analytics->report_constant_other_time_ms(constant_other_time_ms(pause_time_ms));
|
||||
|
||||
_analytics->report_pending_cards((double)pending_cards_at_gc_start(), is_young_only_pause);
|
||||
_analytics->report_card_rs_length((double)_card_rs_length, is_young_only_pause);
|
||||
_analytics->report_code_root_rs_length((double)total_code_roots_scanned, is_young_only_pause);
|
||||
}
|
||||
|
||||
assert(!(G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause) && collector_state()->mark_or_rebuild_in_progress()),
|
||||
"If the last pause has been concurrent start, we should not have been in the marking window");
|
||||
if (G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause)) {
|
||||
@ -963,29 +1002,26 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
|
||||
}
|
||||
|
||||
// Note that _mmu_tracker->max_gc_time() returns the time in seconds.
|
||||
double logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
|
||||
double pending_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
|
||||
|
||||
double const logged_cards_time_ms = logged_cards_processing_time();
|
||||
size_t logged_cards =
|
||||
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB,
|
||||
G1GCPhaseTimes::MergeLBDirtyCards);
|
||||
bool exceeded_goal = logged_cards_time_goal_ms < logged_cards_time_ms;
|
||||
size_t predicted_thread_buffer_cards = _analytics->predict_dirtied_cards_in_thread_buffers();
|
||||
double const pending_cards_time_ms = pending_cards_processing_time();
|
||||
size_t pending_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
|
||||
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
|
||||
|
||||
bool exceeded_goal = pending_cards_time_goal_ms < pending_cards_time_ms;
|
||||
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
|
||||
|
||||
log_debug(gc, ergo, refine)
|
||||
("GC refinement: goal: %zu + %zu / %1.2fms, actual: %zu / %1.2fms, %s",
|
||||
("GC refinement: goal: %zu / %1.2fms, actual: %zu / %1.2fms, %s",
|
||||
cr->pending_cards_target(),
|
||||
predicted_thread_buffer_cards,
|
||||
logged_cards_time_goal_ms,
|
||||
logged_cards,
|
||||
logged_cards_time_ms,
|
||||
pending_cards_time_goal_ms,
|
||||
pending_cards,
|
||||
pending_cards_time_ms,
|
||||
(exceeded_goal ? " (exceeded goal)" : ""));
|
||||
|
||||
cr->adjust_after_gc(logged_cards_time_ms,
|
||||
logged_cards,
|
||||
predicted_thread_buffer_cards,
|
||||
logged_cards_time_goal_ms);
|
||||
cr->adjust_after_gc(pending_cards_time_ms,
|
||||
pending_cards,
|
||||
pending_cards_time_goal_ms);
|
||||
}
|
||||
|
||||
G1IHOPControl* G1Policy::create_ihop_control(const G1OldGenAllocationTracker* old_gen_alloc_tracker,
|
||||
@ -1057,34 +1093,27 @@ double G1Policy::predict_base_time_ms(size_t pending_cards,
|
||||
size_t code_root_rs_length) const {
|
||||
bool in_young_only_phase = collector_state()->in_young_only_phase();
|
||||
|
||||
size_t unique_cards_from_rs = _analytics->predict_scan_card_num(card_rs_length, in_young_only_phase);
|
||||
// Assume that all cards from the log buffers will be scanned, i.e. there are no
|
||||
// duplicates in that set.
|
||||
size_t effective_scanned_cards = unique_cards_from_rs + pending_cards;
|
||||
// Cards from the refinement table and the cards from the young gen remset are
|
||||
// unique to each other as they are located on the card table.
|
||||
size_t effective_scanned_cards = card_rs_length + pending_cards;
|
||||
|
||||
double card_merge_time = _analytics->predict_card_merge_time_ms(pending_cards + card_rs_length, in_young_only_phase);
|
||||
double refinement_table_merge_time = _analytics->predict_merge_refinement_table_time_ms();
|
||||
double card_scan_time = _analytics->predict_card_scan_time_ms(effective_scanned_cards, in_young_only_phase);
|
||||
double code_root_scan_time = _analytics->predict_code_root_scan_time_ms(code_root_rs_length, in_young_only_phase);
|
||||
double constant_other_time = _analytics->predict_constant_other_time_ms();
|
||||
double survivor_evac_time = predict_survivor_regions_evac_time();
|
||||
|
||||
double total_time = card_merge_time + card_scan_time + code_root_scan_time + constant_other_time + survivor_evac_time;
|
||||
double total_time = refinement_table_merge_time + card_scan_time + code_root_scan_time + constant_other_time + survivor_evac_time;
|
||||
|
||||
log_trace(gc, ergo, heap)("Predicted base time: total %f lb_cards %zu card_rs_length %zu effective_scanned_cards %zu "
|
||||
"card_merge_time %f card_scan_time %f code_root_rs_length %zu code_root_scan_time %f "
|
||||
"refinement_table_merge_time %f card_scan_time %f code_root_rs_length %zu code_root_scan_time %f "
|
||||
"constant_other_time %f survivor_evac_time %f",
|
||||
total_time, pending_cards, card_rs_length, effective_scanned_cards,
|
||||
card_merge_time, card_scan_time, code_root_rs_length, code_root_scan_time,
|
||||
refinement_table_merge_time, card_scan_time, code_root_rs_length, code_root_scan_time,
|
||||
constant_other_time, survivor_evac_time);
|
||||
return total_time;
|
||||
}
|
||||
|
||||
double G1Policy::predict_base_time_ms(size_t pending_cards) const {
|
||||
bool for_young_only_phase = collector_state()->in_young_only_phase();
|
||||
size_t card_rs_length = _analytics->predict_card_rs_length(for_young_only_phase);
|
||||
return predict_base_time_ms(pending_cards, card_rs_length);
|
||||
}
|
||||
|
||||
double G1Policy::predict_base_time_ms(size_t pending_cards, size_t card_rs_length) const {
|
||||
bool for_young_only_phase = collector_state()->in_young_only_phase();
|
||||
size_t code_root_rs_length = _analytics->predict_code_root_rs_length(for_young_only_phase);
|
||||
@ -1428,6 +1457,64 @@ size_t G1Policy::allowed_waste_in_collection_set() const {
|
||||
return G1HeapWastePercent * _g1h->capacity() / 100;
|
||||
}
|
||||
|
||||
bool G1Policy::try_get_available_bytes_estimate(size_t& available_bytes) const {
|
||||
// Getting used young bytes requires holding Heap_lock. But we can't use
|
||||
// normal lock and block until available. Blocking on the lock could
|
||||
// deadlock with a GC VMOp that is holding the lock and requesting a
|
||||
// safepoint. Instead try to lock, and return the result of that attempt,
|
||||
// and the estimate if successful.
|
||||
if (Heap_lock->try_lock()) {
|
||||
size_t used_bytes = estimate_used_young_bytes_locked();
|
||||
Heap_lock->unlock();
|
||||
|
||||
size_t young_bytes = young_list_target_length() * G1HeapRegion::GrainBytes;
|
||||
available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
|
||||
return true;
|
||||
} else {
|
||||
available_bytes = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
double G1Policy::predict_time_to_next_gc_ms(size_t available_bytes) const {
|
||||
double alloc_region_rate = _analytics->predict_alloc_rate_ms();
|
||||
double alloc_bytes_rate = alloc_region_rate * G1HeapRegion::GrainBytes;
|
||||
if (alloc_bytes_rate == 0.0) {
|
||||
// A zero rate indicates we don't yet have data to use for predictions.
|
||||
// Since we don't have any idea how long until the next GC, use a time of
|
||||
// zero.
|
||||
return 0.0;
|
||||
} else {
|
||||
// If the heap size is large and the allocation rate is small, we can get
|
||||
// a predicted time until next GC that is so large it can cause problems
|
||||
// (such as overflow) in other calculations. Limit the prediction to one
|
||||
// hour, which is still large in this context.
|
||||
const double one_hour_ms = 60.0 * 60.0 * MILLIUNITS;
|
||||
double raw_time_ms = available_bytes / alloc_bytes_rate;
|
||||
return MIN2(raw_time_ms, one_hour_ms);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t G1Policy::adjust_wait_time_ms(double wait_time_ms, uint64_t min_time_ms) {
|
||||
return MAX2(static_cast<uint64_t>(sqrt(wait_time_ms) * 4.0), min_time_ms);
|
||||
}
|
||||
|
||||
double G1Policy::last_mutator_dirty_start_time_ms() {
|
||||
return TimeHelper::counter_to_millis(_g1h->last_refinement_epoch_start());
|
||||
}
|
||||
|
||||
size_t G1Policy::current_pending_cards() {
|
||||
double now = os::elapsedTime() * MILLIUNITS;
|
||||
return _pending_cards_from_gc + _analytics->predict_dirtied_cards_rate_ms() * (now - last_mutator_dirty_start_time_ms());
|
||||
}
|
||||
|
||||
size_t G1Policy::current_to_collection_set_cards() {
|
||||
// The incremental part is covered by the dirtied_cards_rate, i.e. pending cards
|
||||
// cover both to collection set cards and other interesting cards because we do not
|
||||
// know which is which until we look.
|
||||
return _to_collection_set_cards;
|
||||
}
|
||||
|
||||
uint G1Policy::min_retained_old_cset_length() const {
|
||||
// Guarantee some progress with retained regions regardless of available time by
|
||||
// taking at least one region.
|
||||
|
||||
@ -48,6 +48,7 @@ class G1HeapRegion;
|
||||
class G1CollectionSet;
|
||||
class G1CollectionSetCandidates;
|
||||
class G1CollectionSetChooser;
|
||||
class G1ConcurrentRefineStats;
|
||||
class G1IHOPControl;
|
||||
class G1Analytics;
|
||||
class G1SurvivorRegions;
|
||||
@ -101,9 +102,18 @@ class G1Policy: public CHeapObj<mtGC> {
|
||||
|
||||
uint _free_regions_at_end_of_collection;
|
||||
|
||||
size_t _card_rs_length;
|
||||
|
||||
size_t _pending_cards_at_gc_start;
|
||||
// Tracks the number of cards marked as dirty (only) during garbage collection
|
||||
// (evacuation) on the card table.
|
||||
// This is needed to properly account for those cards in the heuristics to start
|
||||
// refinement at the correct time which needs to know how many cards are currently
|
||||
// approximately on the card table.
|
||||
// After the first completed refinement sweep of the refinement table between two
|
||||
// garbage collections this value is reset to zero as that refinement processed all
|
||||
// those cards.
|
||||
size_t _pending_cards_from_gc;
|
||||
// Tracks the approximate number of cards found as to-collection-set by either the
|
||||
// garbage collection or the most recent refinement sweep.
|
||||
size_t _to_collection_set_cards;
|
||||
|
||||
G1ConcurrentStartToMixedTimeTracker _concurrent_start_to_mixed;
|
||||
|
||||
@ -111,7 +121,7 @@ class G1Policy: public CHeapObj<mtGC> {
|
||||
return collector_state()->in_young_only_phase() && !collector_state()->mark_or_rebuild_in_progress();
|
||||
}
|
||||
|
||||
double logged_cards_processing_time() const;
|
||||
double pending_cards_processing_time() const;
|
||||
public:
|
||||
const G1Predictions& predictor() const { return _predictor; }
|
||||
const G1Analytics* analytics() const { return const_cast<const G1Analytics*>(_analytics); }
|
||||
@ -129,16 +139,10 @@ public:
|
||||
hr->install_surv_rate_group(_survivor_surv_rate_group);
|
||||
}
|
||||
|
||||
void record_card_rs_length(size_t num_cards) {
|
||||
_card_rs_length = num_cards;
|
||||
}
|
||||
|
||||
double cur_pause_start_sec() const {
|
||||
return _cur_pause_start_sec;
|
||||
}
|
||||
|
||||
double predict_base_time_ms(size_t pending_cards) const;
|
||||
|
||||
double predict_base_time_ms(size_t pending_cards, size_t card_rs_length) const;
|
||||
|
||||
// Base time contains handling remembered sets and constant other time of the
|
||||
@ -239,7 +243,13 @@ private:
|
||||
|
||||
public:
|
||||
size_t predict_bytes_to_copy(G1HeapRegion* hr) const;
|
||||
size_t pending_cards_at_gc_start() const { return _pending_cards_at_gc_start; }
|
||||
|
||||
double last_mutator_dirty_start_time_ms();
|
||||
size_t pending_cards_from_gc() const { return _pending_cards_from_gc; }
|
||||
|
||||
size_t current_pending_cards();
|
||||
|
||||
size_t current_to_collection_set_cards();
|
||||
|
||||
// GC efficiency for collecting the region based on the time estimate for
|
||||
// merging and scanning incoming references.
|
||||
@ -286,7 +296,7 @@ public:
|
||||
// Check the current value of the young list RSet length and
|
||||
// compare it against the last prediction. If the current value is
|
||||
// higher, recalculate the young list target length prediction.
|
||||
void revise_young_list_target_length(size_t card_rs_length, size_t code_root_rs_length);
|
||||
void revise_young_list_target_length(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length);
|
||||
|
||||
// This should be called after the heap is resized.
|
||||
void record_new_heap_size(uint new_number_of_regions);
|
||||
@ -325,7 +335,6 @@ public:
|
||||
// Amount of allowed waste in bytes in the collection set.
|
||||
size_t allowed_waste_in_collection_set() const;
|
||||
|
||||
|
||||
private:
|
||||
|
||||
// Predict the number of bytes of surviving objects from survivor and old
|
||||
@ -359,17 +368,39 @@ public:
|
||||
|
||||
bool use_adaptive_young_list_length() const;
|
||||
|
||||
// Try to get an estimate of the currently available bytes in the young gen. This
|
||||
// operation considers itself low-priority: if other threads need the resources
|
||||
// required to get the information, return false to indicate that the caller
|
||||
// should retry "soon".
|
||||
bool try_get_available_bytes_estimate(size_t& bytes) const;
|
||||
// Estimate time until next GC, based on remaining bytes available for
|
||||
// allocation and the allocation rate.
|
||||
double predict_time_to_next_gc_ms(size_t available_bytes) const;
|
||||
|
||||
// Adjust wait times to make them less frequent the longer the next GC is away.
|
||||
// But don't increase the wait time too rapidly, further bound it by min_time_ms.
|
||||
// This reduces the number of thread wakeups that just immediately
|
||||
// go back to waiting, while still being responsive to behavior changes.
|
||||
uint64_t adjust_wait_time_ms(double wait_time_ms, uint64_t min_time_ms);
|
||||
|
||||
private:
|
||||
// Return an estimate of the number of bytes used in young gen.
|
||||
// precondition: holding Heap_lock
|
||||
size_t estimate_used_young_bytes_locked() const;
|
||||
|
||||
public:
|
||||
|
||||
void transfer_survivors_to_cset(const G1SurvivorRegions* survivors);
|
||||
|
||||
// Record and log stats and pending cards before not-full collection.
|
||||
// thread_buffer_cards is the number of cards that were in per-thread
|
||||
// buffers. pending_cards includes thread_buffer_cards.
|
||||
void record_concurrent_refinement_stats(size_t pending_cards,
|
||||
size_t thread_buffer_cards);
|
||||
// Record and log stats and pending cards to update predictors.
|
||||
void record_refinement_stats(G1ConcurrentRefineStats* stats);
|
||||
|
||||
void record_dirtying_stats(double last_mutator_start_dirty_ms,
|
||||
double last_mutator_end_dirty_ms,
|
||||
size_t pending_cards,
|
||||
double yield_duration,
|
||||
size_t next_pending_cards_from_gc,
|
||||
size_t next_to_collection_set_cards);
|
||||
|
||||
bool should_retain_evac_failed_region(G1HeapRegion* r) const {
|
||||
return should_retain_evac_failed_region(r->hrm_index());
|
||||
|
||||
@ -1,148 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1RedirtyCardsQueue.hpp"
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "runtime/atomicAccess.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
// G1RedirtyCardsLocalQueueSet
|
||||
|
||||
G1RedirtyCardsLocalQueueSet::G1RedirtyCardsLocalQueueSet(G1RedirtyCardsQueueSet* shared_qset) :
|
||||
PtrQueueSet(shared_qset->allocator()),
|
||||
_shared_qset(shared_qset),
|
||||
_buffers(),
|
||||
_queue(this)
|
||||
{}
|
||||
|
||||
#ifdef ASSERT
|
||||
G1RedirtyCardsLocalQueueSet::~G1RedirtyCardsLocalQueueSet() {
|
||||
assert(_buffers._head == nullptr, "unflushed qset");
|
||||
assert(_buffers._tail == nullptr, "invariant");
|
||||
assert(_buffers._entry_count == 0, "invariant");
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
void G1RedirtyCardsLocalQueueSet::enqueue_completed_buffer(BufferNode* node) {
|
||||
_buffers._entry_count += node->size();
|
||||
node->set_next(_buffers._head);
|
||||
_buffers._head = node;
|
||||
if (_buffers._tail == nullptr) {
|
||||
_buffers._tail = node;
|
||||
}
|
||||
}
|
||||
|
||||
void G1RedirtyCardsLocalQueueSet::enqueue(void* value) {
|
||||
if (!try_enqueue(_queue, value)) {
|
||||
BufferNode* old_node = exchange_buffer_with_new(_queue);
|
||||
if (old_node != nullptr) {
|
||||
enqueue_completed_buffer(old_node);
|
||||
}
|
||||
retry_enqueue(_queue, value);
|
||||
}
|
||||
}
|
||||
|
||||
BufferNodeList G1RedirtyCardsLocalQueueSet::flush() {
|
||||
flush_queue(_queue);
|
||||
BufferNodeList cur_buffers = _buffers;
|
||||
_shared_qset->add_bufferlist(_buffers);
|
||||
_buffers = BufferNodeList();
|
||||
return cur_buffers;
|
||||
}
|
||||
|
||||
// G1RedirtyCardsLocalQueueSet::Queue
|
||||
|
||||
G1RedirtyCardsLocalQueueSet::Queue::Queue(G1RedirtyCardsLocalQueueSet* qset) :
|
||||
PtrQueue(qset)
|
||||
{}
|
||||
|
||||
#ifdef ASSERT
|
||||
G1RedirtyCardsLocalQueueSet::Queue::~Queue() {
|
||||
assert(buffer() == nullptr, "unflushed queue");
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
// G1RedirtyCardsQueueSet
|
||||
|
||||
G1RedirtyCardsQueueSet::G1RedirtyCardsQueueSet(BufferNode::Allocator* allocator) :
|
||||
PtrQueueSet(allocator),
|
||||
_list(),
|
||||
_entry_count(0),
|
||||
_tail(nullptr)
|
||||
DEBUG_ONLY(COMMA _collecting(true))
|
||||
{}
|
||||
|
||||
G1RedirtyCardsQueueSet::~G1RedirtyCardsQueueSet() {
|
||||
verify_empty();
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void G1RedirtyCardsQueueSet::verify_empty() const {
|
||||
assert(_list.empty(), "precondition");
|
||||
assert(_tail == nullptr, "invariant");
|
||||
assert(_entry_count == 0, "invariant");
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
BufferNode* G1RedirtyCardsQueueSet::all_completed_buffers() const {
|
||||
DEBUG_ONLY(_collecting = false;)
|
||||
return _list.top();
|
||||
}
|
||||
|
||||
BufferNodeList G1RedirtyCardsQueueSet::take_all_completed_buffers() {
|
||||
DEBUG_ONLY(_collecting = false;)
|
||||
BufferNodeList result(_list.pop_all(), _tail, _entry_count);
|
||||
_tail = nullptr;
|
||||
_entry_count = 0;
|
||||
DEBUG_ONLY(_collecting = true;)
|
||||
return result;
|
||||
}
|
||||
|
||||
void G1RedirtyCardsQueueSet::update_tail(BufferNode* node) {
|
||||
// Node is the tail of a (possibly single element) list just prepended to
|
||||
// _list. If, after that prepend, node's follower is null, then node is
|
||||
// also the tail of _list, so record it as such.
|
||||
if (node->next() == nullptr) {
|
||||
assert(_tail == nullptr, "invariant");
|
||||
_tail = node;
|
||||
}
|
||||
}
|
||||
|
||||
void G1RedirtyCardsQueueSet::enqueue_completed_buffer(BufferNode* node) {
|
||||
assert(_collecting, "precondition");
|
||||
AtomicAccess::add(&_entry_count, node->size());
|
||||
_list.push(*node);
|
||||
update_tail(node);
|
||||
}
|
||||
|
||||
void G1RedirtyCardsQueueSet::add_bufferlist(const BufferNodeList& buffers) {
|
||||
assert(_collecting, "precondition");
|
||||
if (buffers._head != nullptr) {
|
||||
assert(buffers._tail != nullptr, "invariant");
|
||||
AtomicAccess::add(&_entry_count, buffers._entry_count);
|
||||
_list.prepend(*buffers._head, *buffers._tail);
|
||||
update_tail(buffers._tail);
|
||||
}
|
||||
}
|
||||
@ -1,98 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
|
||||
#define SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
|
||||
|
||||
#include "gc/shared/bufferNode.hpp"
|
||||
#include "gc/shared/bufferNodeList.hpp"
|
||||
#include "gc/shared/ptrQueue.hpp"
|
||||
#include "memory/padded.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
class G1RedirtyCardsQueueSet;
|
||||
|
||||
// A thread-local qset and queue. It provides an uncontended staging
|
||||
// area for completed buffers, to be flushed to the shared qset en masse.
|
||||
class G1RedirtyCardsLocalQueueSet : private PtrQueueSet {
|
||||
class Queue : public PtrQueue {
|
||||
public:
|
||||
Queue(G1RedirtyCardsLocalQueueSet* qset);
|
||||
~Queue() NOT_DEBUG(= default);
|
||||
};
|
||||
|
||||
G1RedirtyCardsQueueSet* _shared_qset;
|
||||
BufferNodeList _buffers;
|
||||
Queue _queue;
|
||||
|
||||
// Add the buffer to the local list.
|
||||
virtual void enqueue_completed_buffer(BufferNode* node);
|
||||
|
||||
public:
|
||||
G1RedirtyCardsLocalQueueSet(G1RedirtyCardsQueueSet* shared_qset);
|
||||
~G1RedirtyCardsLocalQueueSet() NOT_DEBUG(= default);
|
||||
|
||||
void enqueue(void* value);
|
||||
|
||||
// Transfer all completed buffers to the shared qset.
|
||||
// Returns the flushed BufferNodeList which is later used
|
||||
// as a shortcut into the shared qset.
|
||||
BufferNodeList flush();
|
||||
};
|
||||
|
||||
// Card table entries to be redirtied and the cards reprocessed later.
|
||||
// Has two phases, collecting and processing. During the collecting
|
||||
// phase buffers are added to the set. Once collecting is complete and
|
||||
// processing starts, buffers can no longer be added. Taking all the
|
||||
// collected (and processed) buffers reverts back to collecting, allowing
|
||||
// the set to be reused for another round of redirtying.
|
||||
class G1RedirtyCardsQueueSet : public PtrQueueSet {
|
||||
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
|
||||
BufferNode::Stack _list;
|
||||
DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(size_t));
|
||||
volatile size_t _entry_count;
|
||||
DEFINE_PAD_MINUS_SIZE(3, DEFAULT_PADDING_SIZE, sizeof(BufferNode*));
|
||||
BufferNode* _tail;
|
||||
DEBUG_ONLY(mutable bool _collecting;)
|
||||
|
||||
void update_tail(BufferNode* node);
|
||||
|
||||
public:
|
||||
G1RedirtyCardsQueueSet(BufferNode::Allocator* allocator);
|
||||
~G1RedirtyCardsQueueSet();
|
||||
|
||||
void verify_empty() const NOT_DEBUG_RETURN;
|
||||
|
||||
// Collect buffers. These functions are thread-safe.
|
||||
// precondition: Must not be concurrent with buffer processing.
|
||||
virtual void enqueue_completed_buffer(BufferNode* node);
|
||||
void add_bufferlist(const BufferNodeList& buffers);
|
||||
|
||||
// Processing phase operations.
|
||||
// precondition: Must not be concurrent with buffer collection.
|
||||
BufferNode* all_completed_buffers() const;
|
||||
BufferNodeList take_all_completed_buffers();
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
|
||||
File diff suppressed because it is too large
Load Diff
@ -26,6 +26,7 @@
|
||||
#define SHARE_GC_G1_G1REMSET_HPP
|
||||
|
||||
#include "gc/g1/g1CardTable.hpp"
|
||||
#include "gc/g1/g1CardTableClaimTable.hpp"
|
||||
#include "gc/g1/g1GCPhaseTimes.hpp"
|
||||
#include "gc/g1/g1HeapRegion.hpp"
|
||||
#include "gc/g1/g1OopClosures.hpp"
|
||||
@ -65,20 +66,15 @@ private:
|
||||
|
||||
G1CollectedHeap* _g1h;
|
||||
|
||||
G1CardTable* _ct;
|
||||
G1Policy* _g1p;
|
||||
|
||||
void print_merge_heap_roots_stats();
|
||||
G1Policy* _g1p;
|
||||
|
||||
void assert_scan_top_is_null(uint hrm_index) NOT_DEBUG_RETURN;
|
||||
|
||||
void enqueue_for_reprocessing(CardValue* card_ptr);
|
||||
|
||||
public:
|
||||
// Initialize data that depends on the heap size being known.
|
||||
void initialize(uint max_num_regions);
|
||||
|
||||
G1RemSet(G1CollectedHeap* g1h, G1CardTable* ct);
|
||||
G1RemSet(G1CollectedHeap* g1h);
|
||||
~G1RemSet();
|
||||
|
||||
// Scan all cards in the non-collection set regions that potentially contain
|
||||
@ -101,7 +97,7 @@ public:
|
||||
|
||||
// Print coarsening stats.
|
||||
void print_coarsen_stats();
|
||||
// Creates a task for cleaining up temporary data structures and the
|
||||
// Creates a task for cleaning up temporary data structures and the
|
||||
// card table, removing temporary duplicate detection information.
|
||||
G1AbstractSubTask* create_cleanup_after_scan_heap_roots_task();
|
||||
// Excludes the given region from heap root scanning.
|
||||
@ -122,16 +118,19 @@ public:
|
||||
G1GCPhaseTimes::GCParPhases scan_phase,
|
||||
G1GCPhaseTimes::GCParPhases objcopy_phase);
|
||||
|
||||
// Two methods for concurrent refinement support, executed concurrently to
|
||||
// the mutator:
|
||||
// Cleans the card at "*card_ptr_addr" before refinement, returns true iff the
|
||||
// card needs later refinement.
|
||||
bool clean_card_before_refine(CardValue** const card_ptr_addr);
|
||||
enum RefineResult {
|
||||
HasRefToCSet, // The (dirty) card has a reference to the collection set.
|
||||
AlreadyToCSet, // The card is already one marked as having a reference to the collection set.
|
||||
HasRefToOld, // The dirty card contains references to other old regions (not the collection set).
|
||||
NoCrossRegion, // There is no interesting reference in the card any more. The mutator changed all
|
||||
// references to such after dirtying the card.
|
||||
CouldNotParse // The card is unparsable, need to retry later.
|
||||
};
|
||||
// Refine the region corresponding to "card_ptr". Must be called after
|
||||
// being filtered by clean_card_before_refine(), and after proper
|
||||
// fence/synchronization.
|
||||
void refine_card_concurrently(CardValue* const card_ptr,
|
||||
const uint worker_id);
|
||||
RefineResult refine_card_concurrently(CardValue* const card_ptr,
|
||||
const uint worker_id);
|
||||
|
||||
// Print accumulated summary info from the start of the VM.
|
||||
void print_summary_info();
|
||||
|
||||
@ -27,7 +27,6 @@
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefine.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineThread.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1HeapRegion.hpp"
|
||||
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
|
||||
#include "gc/g1/g1RemSet.hpp"
|
||||
@ -37,39 +36,61 @@
|
||||
#include "runtime/javaThread.hpp"
|
||||
|
||||
void G1RemSetSummary::update() {
|
||||
class CollectData : public ThreadClosure {
|
||||
G1ConcurrentRefine* refine = G1CollectedHeap::heap()->concurrent_refine();
|
||||
|
||||
class CollectWorkerData : public ThreadClosure {
|
||||
G1RemSetSummary* _summary;
|
||||
uint _counter;
|
||||
public:
|
||||
CollectData(G1RemSetSummary * summary) : _summary(summary), _counter(0) {}
|
||||
CollectWorkerData(G1RemSetSummary* summary) : _summary(summary), _counter(0) {}
|
||||
virtual void do_thread(Thread* t) {
|
||||
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
|
||||
_summary->set_refine_thread_cpu_time(_counter, crt->cpu_time());
|
||||
_summary->set_worker_thread_cpu_time(_counter, crt->cpu_time());
|
||||
_counter++;
|
||||
}
|
||||
} collector(this);
|
||||
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
g1h->concurrent_refine()->threads_do(&collector);
|
||||
refine->worker_threads_do(&collector);
|
||||
|
||||
class CollectControlData : public ThreadClosure {
|
||||
G1RemSetSummary* _summary;
|
||||
public:
|
||||
CollectControlData(G1RemSetSummary* summary) : _summary(summary) {}
|
||||
virtual void do_thread(Thread* t) {
|
||||
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
|
||||
_summary->set_control_thread_cpu_time(crt->cpu_time());
|
||||
}
|
||||
} control(this);
|
||||
|
||||
refine->control_thread_do(&control);
|
||||
}
|
||||
|
||||
void G1RemSetSummary::set_refine_thread_cpu_time(uint thread, jlong value) {
|
||||
assert(_refine_threads_cpu_times != nullptr, "just checking");
|
||||
assert(thread < _num_refine_threads, "just checking");
|
||||
_refine_threads_cpu_times[thread] = value;
|
||||
void G1RemSetSummary::set_worker_thread_cpu_time(uint thread, jlong value) {
|
||||
assert(_worker_threads_cpu_times != nullptr, "just checking");
|
||||
assert(thread < _num_worker_threads, "just checking");
|
||||
_worker_threads_cpu_times[thread] = value;
|
||||
}
|
||||
|
||||
jlong G1RemSetSummary::refine_thread_cpu_time(uint thread) const {
|
||||
assert(_refine_threads_cpu_times != nullptr, "just checking");
|
||||
assert(thread < _num_refine_threads, "just checking");
|
||||
return _refine_threads_cpu_times[thread];
|
||||
void G1RemSetSummary::set_control_thread_cpu_time(jlong value) {
|
||||
_control_thread_cpu_time = value;
|
||||
}
|
||||
|
||||
jlong G1RemSetSummary::worker_thread_cpu_time(uint thread) const {
|
||||
assert(_worker_threads_cpu_times != nullptr, "just checking");
|
||||
assert(thread < _num_worker_threads, "just checking");
|
||||
return _worker_threads_cpu_times[thread];
|
||||
}
|
||||
|
||||
jlong G1RemSetSummary::control_thread_cpu_time() const {
|
||||
return _control_thread_cpu_time;
|
||||
}
|
||||
|
||||
G1RemSetSummary::G1RemSetSummary(bool should_update) :
|
||||
_num_refine_threads(G1ConcRefinementThreads),
|
||||
_refine_threads_cpu_times(NEW_C_HEAP_ARRAY(jlong, _num_refine_threads, mtGC)) {
|
||||
_num_worker_threads(G1ConcRefinementThreads),
|
||||
_worker_threads_cpu_times(NEW_C_HEAP_ARRAY(jlong, _num_worker_threads, mtGC)),
|
||||
_control_thread_cpu_time(0) {
|
||||
|
||||
memset(_refine_threads_cpu_times, 0, sizeof(jlong) * _num_refine_threads);
|
||||
memset(_worker_threads_cpu_times, 0, sizeof(jlong) * _num_worker_threads);
|
||||
|
||||
if (should_update) {
|
||||
update();
|
||||
@ -77,23 +98,25 @@ G1RemSetSummary::G1RemSetSummary(bool should_update) :
|
||||
}
|
||||
|
||||
G1RemSetSummary::~G1RemSetSummary() {
|
||||
FREE_C_HEAP_ARRAY(jlong, _refine_threads_cpu_times);
|
||||
FREE_C_HEAP_ARRAY(jlong, _worker_threads_cpu_times);
|
||||
}
|
||||
|
||||
void G1RemSetSummary::set(G1RemSetSummary* other) {
|
||||
assert(other != nullptr, "just checking");
|
||||
assert(_num_refine_threads == other->_num_refine_threads, "just checking");
|
||||
assert(_num_worker_threads == other->_num_worker_threads, "just checking");
|
||||
|
||||
memcpy(_refine_threads_cpu_times, other->_refine_threads_cpu_times, sizeof(jlong) * _num_refine_threads);
|
||||
memcpy(_worker_threads_cpu_times, other->_worker_threads_cpu_times, sizeof(jlong) * _num_worker_threads);
|
||||
_control_thread_cpu_time = other->_control_thread_cpu_time;
|
||||
}
|
||||
|
||||
void G1RemSetSummary::subtract_from(G1RemSetSummary* other) {
|
||||
assert(other != nullptr, "just checking");
|
||||
assert(_num_refine_threads == other->_num_refine_threads, "just checking");
|
||||
assert(_num_worker_threads == other->_num_worker_threads, "just checking");
|
||||
|
||||
for (uint i = 0; i < _num_refine_threads; i++) {
|
||||
set_refine_thread_cpu_time(i, other->refine_thread_cpu_time(i) - refine_thread_cpu_time(i));
|
||||
for (uint i = 0; i < _num_worker_threads; i++) {
|
||||
set_worker_thread_cpu_time(i, other->worker_thread_cpu_time(i) - worker_thread_cpu_time(i));
|
||||
}
|
||||
_control_thread_cpu_time = other->_control_thread_cpu_time - _control_thread_cpu_time;
|
||||
}
|
||||
|
||||
class G1PerRegionTypeRemSetCounters {
|
||||
@ -376,9 +399,10 @@ public:
|
||||
void G1RemSetSummary::print_on(outputStream* out, bool show_thread_times) {
|
||||
if (show_thread_times) {
|
||||
out->print_cr(" Concurrent refinement threads times (s)");
|
||||
out->print_cr(" Control %5.2f Workers", (double)control_thread_cpu_time() / NANOSECS_PER_SEC);
|
||||
out->print(" ");
|
||||
for (uint i = 0; i < _num_refine_threads; i++) {
|
||||
out->print(" %5.2f", (double)refine_thread_cpu_time(i) / NANOSECS_PER_SEC);
|
||||
for (uint i = 0; i < _num_worker_threads; i++) {
|
||||
out->print(" %5.2f", (double)worker_thread_cpu_time(i) / NANOSECS_PER_SEC);
|
||||
}
|
||||
out->cr();
|
||||
}
|
||||
|
||||
@ -33,10 +33,12 @@ class G1RemSet;
|
||||
|
||||
// A G1RemSetSummary manages statistical information about the remembered set.
|
||||
class G1RemSetSummary {
|
||||
size_t _num_refine_threads;
|
||||
jlong* _refine_threads_cpu_times;
|
||||
size_t _num_worker_threads;
|
||||
jlong* _worker_threads_cpu_times;
|
||||
jlong _control_thread_cpu_time;
|
||||
|
||||
void set_refine_thread_cpu_time(uint thread, jlong value);
|
||||
void set_worker_thread_cpu_time(uint thread, jlong value);
|
||||
void set_control_thread_cpu_time(jlong value);
|
||||
|
||||
// Update this summary with current data from various places.
|
||||
void update();
|
||||
@ -53,7 +55,8 @@ public:
|
||||
|
||||
void print_on(outputStream* out, bool show_thread_times);
|
||||
|
||||
jlong refine_thread_cpu_time(uint thread) const;
|
||||
jlong worker_thread_cpu_time(uint thread) const;
|
||||
jlong control_thread_cpu_time() const;
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1REMSETSUMMARY_HPP
|
||||
|
||||
96
src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.cpp
Normal file
96
src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/g1/g1CollectedHeap.hpp"
|
||||
#include "gc/g1/g1Policy.hpp"
|
||||
#include "gc/g1/g1ReviseYoungLengthTask.hpp"
|
||||
#include "gc/g1/g1ServiceThread.hpp"
|
||||
#include "gc/shared/suspendibleThreadSet.hpp"
|
||||
|
||||
|
||||
jlong G1ReviseYoungLengthTask::reschedule_delay_ms() const {
|
||||
G1Policy* policy = G1CollectedHeap::heap()->policy();
|
||||
size_t available_bytes;
|
||||
if (policy->try_get_available_bytes_estimate(available_bytes)) {
|
||||
double predicted_time_to_next_gc_ms = policy->predict_time_to_next_gc_ms(available_bytes);
|
||||
|
||||
// Use a prime number close to 50ms as minimum time, different to other components
|
||||
// that derive their wait time from the try_get_available_bytes_estimate() call
|
||||
// to minimize interference.
|
||||
uint64_t const min_wait_time_ms = 47;
|
||||
|
||||
return policy->adjust_wait_time_ms(predicted_time_to_next_gc_ms, min_wait_time_ms);
|
||||
} else {
|
||||
// Failed to get estimate of available bytes. Try again asap.
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
class G1ReviseYoungLengthTask::RemSetSamplingClosure : public G1HeapRegionClosure {
|
||||
size_t _sampled_code_root_rs_length;
|
||||
|
||||
public:
|
||||
RemSetSamplingClosure() : _sampled_code_root_rs_length(0) { }
|
||||
|
||||
bool do_heap_region(G1HeapRegion* r) override {
|
||||
G1HeapRegionRemSet* rem_set = r->rem_set();
|
||||
_sampled_code_root_rs_length += rem_set->code_roots_list_length();
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
|
||||
};
|
||||
|
||||
void G1ReviseYoungLengthTask::adjust_young_list_target_length() {
|
||||
G1CollectedHeap* g1h = G1CollectedHeap::heap();
|
||||
G1Policy* policy = g1h->policy();
|
||||
|
||||
assert(policy->use_adaptive_young_list_length(), "should not call otherwise");
|
||||
|
||||
size_t pending_cards;
|
||||
size_t current_to_collection_set_cards;
|
||||
{
|
||||
MutexLocker x(G1ReviseYoungLength_lock, Mutex::_no_safepoint_check_flag);
|
||||
pending_cards = policy->current_pending_cards();
|
||||
current_to_collection_set_cards = policy->current_to_collection_set_cards();
|
||||
}
|
||||
|
||||
RemSetSamplingClosure cl;
|
||||
g1h->collection_set()->iterate(&cl);
|
||||
|
||||
policy->revise_young_list_target_length(pending_cards,
|
||||
current_to_collection_set_cards,
|
||||
cl.sampled_code_root_rs_length());
|
||||
}
|
||||
|
||||
G1ReviseYoungLengthTask::G1ReviseYoungLengthTask(const char* name) :
|
||||
G1ServiceTask(name) { }
|
||||
|
||||
void G1ReviseYoungLengthTask::execute() {
|
||||
SuspendibleThreadSetJoiner sts;
|
||||
|
||||
adjust_young_list_target_length();
|
||||
|
||||
schedule(reschedule_delay_ms());
|
||||
}
|
||||
63
src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.hpp
Normal file
63
src/hotspot/share/gc/g1/g1ReviseYoungLengthTask.hpp
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
|
||||
#define SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
|
||||
|
||||
#include "gc/g1/g1CardSetMemory.hpp"
|
||||
#include "gc/g1/g1HeapRegionRemSet.hpp"
|
||||
#include "gc/g1/g1MonotonicArenaFreePool.hpp"
|
||||
#include "gc/g1/g1ServiceThread.hpp"
|
||||
#include "utilities/growableArray.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
// ServiceTask to revise the young generation target length.
|
||||
class G1ReviseYoungLengthTask : public G1ServiceTask {
|
||||
|
||||
// The delay used to reschedule this task.
|
||||
jlong reschedule_delay_ms() const;
|
||||
|
||||
class RemSetSamplingClosure; // Helper class for calculating remembered set summary.
|
||||
|
||||
// Adjust the target length (in regions) of the young gen, based on the
|
||||
// current length of the remembered sets.
|
||||
//
|
||||
// At the end of the GC G1 determines the length of the young gen based on
|
||||
// how much time the next GC can take, and when the next GC may occur
|
||||
// according to the MMU.
|
||||
//
|
||||
// The assumption is that a significant part of the GC is spent on scanning
|
||||
// the remembered sets (and many other components), so this thread constantly
|
||||
// reevaluates the prediction for the remembered set scanning costs, and potentially
|
||||
// resizes the young gen. This may do a premature GC or even increase the young
|
||||
// gen size to keep pause time length goal.
|
||||
void adjust_young_list_target_length();
|
||||
|
||||
public:
|
||||
explicit G1ReviseYoungLengthTask(const char* name);
|
||||
|
||||
void execute() override;
|
||||
};
|
||||
|
||||
#endif // SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -25,7 +25,7 @@
|
||||
#define SHARE_GC_G1_G1THREADLOCALDATA_HPP
|
||||
|
||||
#include "gc/g1/g1BarrierSet.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1CardTable.hpp"
|
||||
#include "gc/g1/g1RegionPinCache.hpp"
|
||||
#include "gc/shared/gc_globals.hpp"
|
||||
#include "gc/shared/satbMarkQueue.hpp"
|
||||
@ -36,7 +36,7 @@
|
||||
class G1ThreadLocalData {
|
||||
private:
|
||||
SATBMarkQueue _satb_mark_queue;
|
||||
G1DirtyCardQueue _dirty_card_queue;
|
||||
G1CardTable::CardValue* _byte_map_base;
|
||||
|
||||
// Per-thread cache of pinned object count to reduce atomic operation traffic
|
||||
// due to region pinning. Holds the last region where the mutator pinned an
|
||||
@ -45,8 +45,8 @@ private:
|
||||
|
||||
G1ThreadLocalData() :
|
||||
_satb_mark_queue(&G1BarrierSet::satb_mark_queue_set()),
|
||||
_dirty_card_queue(&G1BarrierSet::dirty_card_queue_set()),
|
||||
_pin_cache() {}
|
||||
_byte_map_base(nullptr),
|
||||
_pin_cache() { }
|
||||
|
||||
static G1ThreadLocalData* data(Thread* thread) {
|
||||
assert(UseG1GC, "Sanity");
|
||||
@ -57,10 +57,6 @@ private:
|
||||
return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _satb_mark_queue);
|
||||
}
|
||||
|
||||
static ByteSize dirty_card_queue_offset() {
|
||||
return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _dirty_card_queue);
|
||||
}
|
||||
|
||||
public:
|
||||
static void create(Thread* thread) {
|
||||
new (data(thread)) G1ThreadLocalData();
|
||||
@ -74,10 +70,6 @@ public:
|
||||
return data(thread)->_satb_mark_queue;
|
||||
}
|
||||
|
||||
static G1DirtyCardQueue& dirty_card_queue(Thread* thread) {
|
||||
return data(thread)->_dirty_card_queue;
|
||||
}
|
||||
|
||||
static ByteSize satb_mark_queue_active_offset() {
|
||||
return satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active();
|
||||
}
|
||||
@ -90,14 +82,20 @@ public:
|
||||
return satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf();
|
||||
}
|
||||
|
||||
static ByteSize dirty_card_queue_index_offset() {
|
||||
return dirty_card_queue_offset() + G1DirtyCardQueue::byte_offset_of_index();
|
||||
static ByteSize card_table_base_offset() {
|
||||
return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _byte_map_base);
|
||||
}
|
||||
|
||||
static ByteSize dirty_card_queue_buffer_offset() {
|
||||
return dirty_card_queue_offset() + G1DirtyCardQueue::byte_offset_of_buf();
|
||||
static void set_byte_map_base(Thread* thread, G1CardTable::CardValue* new_byte_map_base) {
|
||||
data(thread)->_byte_map_base = new_byte_map_base;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
static G1CardTable::CardValue* get_byte_map_base(Thread* thread) {
|
||||
return data(thread)->_byte_map_base;
|
||||
}
|
||||
#endif
|
||||
|
||||
static G1RegionPinCache& pin_count_cache(Thread* thread) {
|
||||
return data(thread)->_pin_cache;
|
||||
}
|
||||
|
||||
@ -39,7 +39,6 @@
|
||||
#include "gc/g1/g1MonitoringSupport.hpp"
|
||||
#include "gc/g1/g1ParScanThreadState.inline.hpp"
|
||||
#include "gc/g1/g1Policy.hpp"
|
||||
#include "gc/g1/g1RedirtyCardsQueue.hpp"
|
||||
#include "gc/g1/g1RegionPinCache.inline.hpp"
|
||||
#include "gc/g1/g1RemSet.hpp"
|
||||
#include "gc/g1/g1RootProcessor.hpp"
|
||||
@ -914,13 +913,8 @@ class G1STWRefProcProxyTask : public RefProcProxyTask {
|
||||
TaskTerminator _terminator;
|
||||
G1ScannerTasksQueueSet& _task_queues;
|
||||
|
||||
// Special closure for enqueuing discovered fields: during enqueue the card table
|
||||
// may not be in shape to properly handle normal barrier calls (e.g. card marks
|
||||
// in regions that failed evacuation, scribbling of various values by card table
|
||||
// scan code). Additionally the regular barrier enqueues into the "global"
|
||||
// DCQS, but during GC we need these to-be-refined entries in the GC local queue
|
||||
// so that after clearing the card table, the redirty cards phase will properly
|
||||
// mark all dirty cards to be picked up by refinement.
|
||||
// G1 specific closure for marking discovered fields. Need to mark the card in the
|
||||
// refinement table as the card table is in use by garbage collection.
|
||||
class G1EnqueueDiscoveredFieldClosure : public EnqueueDiscoveredFieldClosure {
|
||||
G1CollectedHeap* _g1h;
|
||||
G1ParScanThreadState* _pss;
|
||||
|
||||
@ -45,7 +45,6 @@ class G1MonotonicArenaMemoryStats;
|
||||
class G1NewTracer;
|
||||
class G1ParScanThreadStateSet;
|
||||
class G1Policy;
|
||||
class G1RedirtyCardsQueueSet;
|
||||
class G1RemSet;
|
||||
class G1SurvivorRegions;
|
||||
class G1YoungGCAllocationFailureInjector;
|
||||
|
||||
@ -287,7 +287,7 @@ public:
|
||||
_chunk_bitmap(mtGC) {
|
||||
|
||||
_num_evac_fail_regions = _evac_failure_regions->num_regions_evac_failed();
|
||||
_num_chunks_per_region = G1CollectedHeap::get_chunks_per_region();
|
||||
_num_chunks_per_region = G1CollectedHeap::get_chunks_per_region_for_scan();
|
||||
|
||||
_chunk_size = static_cast<uint>(G1HeapRegion::GrainWords / _num_chunks_per_region);
|
||||
|
||||
@ -300,7 +300,7 @@ public:
|
||||
double worker_cost() const override {
|
||||
assert(_evac_failure_regions->has_regions_evac_failed(), "Should not call this if there were no evacuation failures");
|
||||
|
||||
double workers_per_region = (double)G1CollectedHeap::get_chunks_per_region() / G1RestoreRetainedRegionChunksPerWorker;
|
||||
double workers_per_region = (double)G1CollectedHeap::get_chunks_per_region_for_scan() / G1RestoreRetainedRegionChunksPerWorker;
|
||||
return workers_per_region * _evac_failure_regions->num_regions_evac_failed();
|
||||
}
|
||||
|
||||
@ -480,43 +480,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class RedirtyLoggedCardTableEntryClosure : public G1CardTableEntryClosure {
|
||||
size_t _num_dirtied;
|
||||
G1CollectedHeap* _g1h;
|
||||
G1CardTable* _g1_ct;
|
||||
G1EvacFailureRegions* _evac_failure_regions;
|
||||
|
||||
G1HeapRegion* region_for_card(CardValue* card_ptr) const {
|
||||
return _g1h->heap_region_containing(_g1_ct->addr_for(card_ptr));
|
||||
}
|
||||
|
||||
bool will_become_free(G1HeapRegion* hr) const {
|
||||
// A region will be freed by during the FreeCollectionSet phase if the region is in the
|
||||
// collection set and has not had an evacuation failure.
|
||||
return _g1h->is_in_cset(hr) && !_evac_failure_regions->contains(hr->hrm_index());
|
||||
}
|
||||
|
||||
public:
|
||||
RedirtyLoggedCardTableEntryClosure(G1CollectedHeap* g1h, G1EvacFailureRegions* evac_failure_regions) :
|
||||
G1CardTableEntryClosure(),
|
||||
_num_dirtied(0),
|
||||
_g1h(g1h),
|
||||
_g1_ct(g1h->card_table()),
|
||||
_evac_failure_regions(evac_failure_regions) { }
|
||||
|
||||
void do_card_ptr(CardValue* card_ptr) override {
|
||||
G1HeapRegion* hr = region_for_card(card_ptr);
|
||||
|
||||
// Should only dirty cards in regions that won't be freed.
|
||||
if (!will_become_free(hr)) {
|
||||
*card_ptr = G1CardTable::dirty_card_val();
|
||||
_num_dirtied++;
|
||||
}
|
||||
}
|
||||
|
||||
size_t num_dirtied() const { return _num_dirtied; }
|
||||
};
|
||||
|
||||
class G1PostEvacuateCollectionSetCleanupTask2::ProcessEvacuationFailedRegionsTask : public G1AbstractSubTask {
|
||||
G1EvacFailureRegions* _evac_failure_regions;
|
||||
G1HeapRegionClaimer _claimer;
|
||||
@ -572,48 +535,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class G1PostEvacuateCollectionSetCleanupTask2::RedirtyLoggedCardsTask : public G1AbstractSubTask {
|
||||
BufferNodeList* _rdc_buffers;
|
||||
uint _num_buffer_lists;
|
||||
G1EvacFailureRegions* _evac_failure_regions;
|
||||
|
||||
public:
|
||||
RedirtyLoggedCardsTask(G1EvacFailureRegions* evac_failure_regions, BufferNodeList* rdc_buffers, uint num_buffer_lists) :
|
||||
G1AbstractSubTask(G1GCPhaseTimes::RedirtyCards),
|
||||
_rdc_buffers(rdc_buffers),
|
||||
_num_buffer_lists(num_buffer_lists),
|
||||
_evac_failure_regions(evac_failure_regions) { }
|
||||
|
||||
double worker_cost() const override {
|
||||
// Needs more investigation.
|
||||
return G1CollectedHeap::heap()->workers()->active_workers();
|
||||
}
|
||||
|
||||
void do_work(uint worker_id) override {
|
||||
RedirtyLoggedCardTableEntryClosure cl(G1CollectedHeap::heap(), _evac_failure_regions);
|
||||
|
||||
uint start = worker_id;
|
||||
for (uint i = 0; i < _num_buffer_lists; i++) {
|
||||
uint index = (start + i) % _num_buffer_lists;
|
||||
|
||||
BufferNode* next = AtomicAccess::load(&_rdc_buffers[index]._head);
|
||||
BufferNode* tail = AtomicAccess::load(&_rdc_buffers[index]._tail);
|
||||
|
||||
while (next != nullptr) {
|
||||
BufferNode* node = next;
|
||||
next = AtomicAccess::cmpxchg(&_rdc_buffers[index]._head, node, (node != tail ) ? node->next() : nullptr);
|
||||
if (next == node) {
|
||||
cl.apply_to_buffer(node, worker_id);
|
||||
next = (node != tail ) ? node->next() : nullptr;
|
||||
} else {
|
||||
break; // If there is contention, move to the next BufferNodeList
|
||||
}
|
||||
}
|
||||
}
|
||||
record_work_item(worker_id, 0, cl.num_dirtied());
|
||||
}
|
||||
};
|
||||
|
||||
// Helper class to keep statistics for the collection set freeing
|
||||
class FreeCSetStats {
|
||||
size_t _before_used_bytes; // Usage in regions successfully evacuate
|
||||
@ -797,7 +718,6 @@ public:
|
||||
JFREventForRegion event(r, _worker_id);
|
||||
TimerForRegion timer(timer_for_region(r));
|
||||
|
||||
|
||||
if (r->is_young()) {
|
||||
assert_tracks_surviving_words(r);
|
||||
r->record_surv_words_in_group(_surviving_young_words[r->young_index_in_cset()]);
|
||||
@ -908,24 +828,34 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsTask : public G1AbstractSubTask {
|
||||
class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsAndSwapCardTableTask : public G1AbstractSubTask {
|
||||
G1JavaThreadsListClaimer _claimer;
|
||||
|
||||
// There is not much work per thread so the number of threads per worker is high.
|
||||
static const uint ThreadsPerWorker = 250;
|
||||
|
||||
public:
|
||||
ResizeTLABsTask() : G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker) { }
|
||||
ResizeTLABsAndSwapCardTableTask()
|
||||
: G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker)
|
||||
{
|
||||
G1BarrierSet::g1_barrier_set()->swap_global_card_table();
|
||||
}
|
||||
|
||||
void do_work(uint worker_id) override {
|
||||
class ResizeClosure : public ThreadClosure {
|
||||
|
||||
class ResizeAndSwapCardTableClosure : public ThreadClosure {
|
||||
public:
|
||||
|
||||
void do_thread(Thread* thread) {
|
||||
static_cast<JavaThread*>(thread)->tlab().resize();
|
||||
if (UseTLAB && ResizeTLAB) {
|
||||
static_cast<JavaThread*>(thread)->tlab().resize();
|
||||
}
|
||||
|
||||
G1BarrierSet::g1_barrier_set()->update_card_table_base(thread);
|
||||
}
|
||||
} cl;
|
||||
_claimer.apply(&cl);
|
||||
} resize_and_swap_cl;
|
||||
|
||||
_claimer.apply(&resize_and_swap_cl);
|
||||
}
|
||||
|
||||
double worker_cost() const override {
|
||||
@ -968,13 +898,8 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2
|
||||
if (evac_failure_regions->has_regions_evac_failed()) {
|
||||
add_parallel_task(new ProcessEvacuationFailedRegionsTask(evac_failure_regions));
|
||||
}
|
||||
add_parallel_task(new RedirtyLoggedCardsTask(evac_failure_regions,
|
||||
per_thread_states->rdc_buffers(),
|
||||
per_thread_states->num_workers()));
|
||||
|
||||
if (UseTLAB && ResizeTLAB) {
|
||||
add_parallel_task(new ResizeTLABsTask());
|
||||
}
|
||||
add_parallel_task(new ResizeTLABsAndSwapCardTableTask());
|
||||
add_parallel_task(new FreeCollectionSetTask(evacuation_info,
|
||||
per_thread_states->surviving_young_words(),
|
||||
evac_failure_regions));
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -55,9 +55,8 @@ public:
|
||||
// - Eagerly Reclaim Humongous Objects (s)
|
||||
// - Update Derived Pointers (s)
|
||||
// - Clear Retained Region Data (on evacuation failure)
|
||||
// - Redirty Logged Cards
|
||||
// - Free Collection Set
|
||||
// - Resize TLABs
|
||||
// - Resize TLABs and Swap Card Table
|
||||
// - Reset the reusable PartialArrayStateManager.
|
||||
class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
|
||||
class EagerlyReclaimHumongousObjectsTask;
|
||||
@ -66,9 +65,8 @@ class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
|
||||
#endif
|
||||
|
||||
class ProcessEvacuationFailedRegionsTask;
|
||||
class RedirtyLoggedCardsTask;
|
||||
class FreeCollectionSetTask;
|
||||
class ResizeTLABsTask;
|
||||
class ResizeTLABsAndSwapCardTableTask;
|
||||
class ResetPartialArrayStateManagerTask;
|
||||
|
||||
public:
|
||||
|
||||
@ -24,7 +24,6 @@
|
||||
|
||||
#include "gc/g1/g1CollectedHeap.inline.hpp"
|
||||
#include "gc/g1/g1ConcurrentRefineStats.hpp"
|
||||
#include "gc/g1/g1DirtyCardQueue.hpp"
|
||||
#include "gc/g1/g1RegionPinCache.inline.hpp"
|
||||
#include "gc/g1/g1ThreadLocalData.hpp"
|
||||
#include "gc/g1/g1YoungGCPreEvacuateTasks.hpp"
|
||||
@ -35,23 +34,21 @@
|
||||
#include "runtime/thread.inline.hpp"
|
||||
#include "runtime/threads.hpp"
|
||||
|
||||
class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABAndFlushLogs : public G1AbstractSubTask {
|
||||
class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABs : public G1AbstractSubTask {
|
||||
G1JavaThreadsListClaimer _claimer;
|
||||
|
||||
// Per worker thread statistics.
|
||||
ThreadLocalAllocStats* _local_tlab_stats;
|
||||
G1ConcurrentRefineStats* _local_refinement_stats;
|
||||
|
||||
uint _num_workers;
|
||||
|
||||
// There is relatively little work to do per thread.
|
||||
static const uint ThreadsPerWorker = 250;
|
||||
|
||||
struct RetireTLABAndFlushLogsClosure : public ThreadClosure {
|
||||
struct RetireTLABClosure : public ThreadClosure {
|
||||
ThreadLocalAllocStats _tlab_stats;
|
||||
G1ConcurrentRefineStats _refinement_stats;
|
||||
|
||||
RetireTLABAndFlushLogsClosure() : _tlab_stats(), _refinement_stats() { }
|
||||
RetireTLABClosure() : _tlab_stats() { }
|
||||
|
||||
void do_thread(Thread* thread) override {
|
||||
assert(thread->is_Java_thread(), "must be");
|
||||
@ -61,37 +58,29 @@ class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABAndFlushLogs : pu
|
||||
if (UseTLAB) {
|
||||
thread->retire_tlab(&_tlab_stats);
|
||||
}
|
||||
// Concatenate logs.
|
||||
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
|
||||
_refinement_stats += qset.concatenate_log_and_stats(thread);
|
||||
// Flush region pin count cache.
|
||||
G1ThreadLocalData::pin_count_cache(thread).flush();
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
JavaThreadRetireTLABAndFlushLogs() :
|
||||
G1AbstractSubTask(G1GCPhaseTimes::RetireTLABsAndFlushLogs),
|
||||
JavaThreadRetireTLABs() :
|
||||
G1AbstractSubTask(G1GCPhaseTimes::RetireTLABs),
|
||||
_claimer(ThreadsPerWorker),
|
||||
_local_tlab_stats(nullptr),
|
||||
_local_refinement_stats(nullptr),
|
||||
_num_workers(0) {
|
||||
}
|
||||
|
||||
~JavaThreadRetireTLABAndFlushLogs() {
|
||||
static_assert(std::is_trivially_destructible<G1ConcurrentRefineStats>::value, "must be");
|
||||
FREE_C_HEAP_ARRAY(G1ConcurrentRefineStats, _local_refinement_stats);
|
||||
|
||||
~JavaThreadRetireTLABs() {
|
||||
static_assert(std::is_trivially_destructible<ThreadLocalAllocStats>::value, "must be");
|
||||
FREE_C_HEAP_ARRAY(ThreadLocalAllocStats, _local_tlab_stats);
|
||||
}
|
||||
|
||||
void do_work(uint worker_id) override {
|
||||
RetireTLABAndFlushLogsClosure tc;
|
||||
RetireTLABClosure tc;
|
||||
_claimer.apply(&tc);
|
||||
|
||||
_local_tlab_stats[worker_id] = tc._tlab_stats;
|
||||
_local_refinement_stats[worker_id] = tc._refinement_stats;
|
||||
}
|
||||
|
||||
double worker_cost() const override {
|
||||
@ -101,11 +90,9 @@ public:
|
||||
void set_max_workers(uint max_workers) override {
|
||||
_num_workers = max_workers;
|
||||
_local_tlab_stats = NEW_C_HEAP_ARRAY(ThreadLocalAllocStats, _num_workers, mtGC);
|
||||
_local_refinement_stats = NEW_C_HEAP_ARRAY(G1ConcurrentRefineStats, _num_workers, mtGC);
|
||||
|
||||
for (uint i = 0; i < _num_workers; i++) {
|
||||
::new (&_local_tlab_stats[i]) ThreadLocalAllocStats();
|
||||
::new (&_local_refinement_stats[i]) G1ConcurrentRefineStats();
|
||||
}
|
||||
}
|
||||
|
||||
@ -116,85 +103,15 @@ public:
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
G1ConcurrentRefineStats refinement_stats() const {
|
||||
G1ConcurrentRefineStats result;
|
||||
for (uint i = 0; i < _num_workers; i++) {
|
||||
result += _local_refinement_stats[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class G1PreEvacuateCollectionSetBatchTask::NonJavaThreadFlushLogs : public G1AbstractSubTask {
|
||||
struct FlushLogsClosure : public ThreadClosure {
|
||||
G1ConcurrentRefineStats _refinement_stats;
|
||||
|
||||
FlushLogsClosure() : _refinement_stats() { }
|
||||
|
||||
void do_thread(Thread* thread) override {
|
||||
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
|
||||
_refinement_stats += qset.concatenate_log_and_stats(thread);
|
||||
|
||||
assert(G1ThreadLocalData::pin_count_cache(thread).count() == 0, "NonJava thread has pinned Java objects");
|
||||
}
|
||||
} _tc;
|
||||
|
||||
public:
|
||||
NonJavaThreadFlushLogs() : G1AbstractSubTask(G1GCPhaseTimes::NonJavaThreadFlushLogs), _tc() { }
|
||||
|
||||
void do_work(uint worker_id) override {
|
||||
Threads::non_java_threads_do(&_tc);
|
||||
}
|
||||
|
||||
double worker_cost() const override {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
G1ConcurrentRefineStats refinement_stats() const { return _tc._refinement_stats; }
|
||||
};
|
||||
|
||||
G1PreEvacuateCollectionSetBatchTask::G1PreEvacuateCollectionSetBatchTask() :
|
||||
G1BatchedTask("Pre Evacuate Prepare", G1CollectedHeap::heap()->phase_times()),
|
||||
_old_pending_cards(G1BarrierSet::dirty_card_queue_set().num_cards()),
|
||||
_java_retire_task(new JavaThreadRetireTLABAndFlushLogs()),
|
||||
_non_java_retire_task(new NonJavaThreadFlushLogs()) {
|
||||
_java_retire_task(new JavaThreadRetireTLABs()) {
|
||||
|
||||
// Disable mutator refinement until concurrent refinement decides otherwise.
|
||||
G1BarrierSet::dirty_card_queue_set().set_mutator_refinement_threshold(SIZE_MAX);
|
||||
|
||||
add_serial_task(_non_java_retire_task);
|
||||
add_parallel_task(_java_retire_task);
|
||||
}
|
||||
|
||||
static void verify_empty_dirty_card_logs() {
|
||||
#ifdef ASSERT
|
||||
ResourceMark rm;
|
||||
|
||||
struct Verifier : public ThreadClosure {
|
||||
Verifier() {}
|
||||
void do_thread(Thread* t) override {
|
||||
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
|
||||
assert(queue.is_empty(), "non-empty dirty card queue for thread %s", t->name());
|
||||
}
|
||||
} verifier;
|
||||
Threads::threads_do(&verifier);
|
||||
#endif
|
||||
}
|
||||
|
||||
G1PreEvacuateCollectionSetBatchTask::~G1PreEvacuateCollectionSetBatchTask() {
|
||||
_java_retire_task->tlab_stats().publish();
|
||||
|
||||
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
|
||||
|
||||
G1ConcurrentRefineStats total_refinement_stats;
|
||||
total_refinement_stats += _java_retire_task->refinement_stats();
|
||||
total_refinement_stats += _non_java_retire_task->refinement_stats();
|
||||
qset.update_refinement_stats(total_refinement_stats);
|
||||
|
||||
verify_empty_dirty_card_logs();
|
||||
|
||||
size_t pending_cards = qset.num_cards();
|
||||
size_t thread_buffer_cards = pending_cards - _old_pending_cards;
|
||||
G1CollectedHeap::heap()->policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -28,18 +28,13 @@
|
||||
#include "gc/g1/g1BatchedTask.hpp"
|
||||
|
||||
// Set of pre evacuate collection set tasks containing ("s" means serial):
|
||||
// - Retire TLAB and Flush Logs (Java threads)
|
||||
// - Retire TLABs (Java threads)
|
||||
// - Flush pin count cache (Java threads)
|
||||
// - Flush Logs (s) (Non-Java threads)
|
||||
class G1PreEvacuateCollectionSetBatchTask : public G1BatchedTask {
|
||||
class JavaThreadRetireTLABAndFlushLogs;
|
||||
class NonJavaThreadFlushLogs;
|
||||
|
||||
size_t _old_pending_cards;
|
||||
class JavaThreadRetireTLABs;
|
||||
|
||||
// References to the tasks to retain access to statistics.
|
||||
JavaThreadRetireTLABAndFlushLogs* _java_retire_task;
|
||||
NonJavaThreadFlushLogs* _non_java_retire_task;
|
||||
JavaThreadRetireTLABs* _java_retire_task;
|
||||
|
||||
public:
|
||||
G1PreEvacuateCollectionSetBatchTask();
|
||||
|
||||
@ -162,6 +162,11 @@
|
||||
"a single expand attempt.") \
|
||||
range(0, 100) \
|
||||
\
|
||||
product(size_t, G1PerThreadPendingCardThreshold, 256, DIAGNOSTIC, \
|
||||
"Number of pending cards allowed on the card table per GC " \
|
||||
"worker thread before considering starting refinement.") \
|
||||
range(0, UINT_MAX) \
|
||||
\
|
||||
product(uint, G1ShrinkByPercentOfAvailable, 50, DIAGNOSTIC, \
|
||||
"When shrinking, maximum % of free space to free for a single " \
|
||||
"shrink attempt.") \
|
||||
@ -188,10 +193,6 @@
|
||||
"bound of acceptable deviation range.") \
|
||||
constraint(G1CPUUsageShrinkConstraintFunc, AfterErgo) \
|
||||
\
|
||||
product(size_t, G1UpdateBufferSize, 256, \
|
||||
"Size of an update buffer") \
|
||||
constraint(G1UpdateBufferSizeConstraintFunc, AfterErgo) \
|
||||
\
|
||||
product(uint, G1RSetUpdatingPauseTimePercent, 10, \
|
||||
"A target percentage of time that is allowed to be spend on " \
|
||||
"processing remembered set update buffers during the collection " \
|
||||
|
||||
@ -206,12 +206,6 @@ JVMFlag::Error G1SATBBufferSizeConstraintFunc(size_t value, bool verbose) {
|
||||
verbose);
|
||||
}
|
||||
|
||||
JVMFlag::Error G1UpdateBufferSizeConstraintFunc(size_t value, bool verbose) {
|
||||
return buffer_size_constraint_helper(FLAG_MEMBER_ENUM(G1UpdateBufferSize),
|
||||
value,
|
||||
verbose);
|
||||
}
|
||||
|
||||
JVMFlag::Error gc_cpu_usage_threshold_helper(JVMFlagsEnum flagid,
|
||||
uint value,
|
||||
bool verbose) {
|
||||
|
||||
@ -47,7 +47,6 @@
|
||||
\
|
||||
/* G1 PtrQueue buffer size constraints */ \
|
||||
f(size_t, G1SATBBufferSizeConstraintFunc) \
|
||||
f(size_t, G1UpdateBufferSizeConstraintFunc) \
|
||||
\
|
||||
/* G1 GC deviation counter threshold constraints */ \
|
||||
f(uint, G1CPUUsageExpandConstraintFunc) \
|
||||
|
||||
@ -82,8 +82,7 @@
|
||||
declare_constant(G1HeapRegionType::StartsHumongousTag) \
|
||||
declare_constant(G1HeapRegionType::ContinuesHumongousTag) \
|
||||
declare_constant(G1HeapRegionType::OldMask) \
|
||||
declare_constant(BarrierSet::G1BarrierSet) \
|
||||
declare_constant(G1CardTable::g1_young_gen)
|
||||
declare_constant(BarrierSet::G1BarrierSet)
|
||||
|
||||
#define VM_TYPES_G1GC(declare_type, \
|
||||
declare_toplevel_type, \
|
||||
@ -100,7 +99,6 @@
|
||||
declare_toplevel_type(PtrQueue) \
|
||||
declare_toplevel_type(G1HeapRegionType) \
|
||||
declare_toplevel_type(SATBMarkQueue) \
|
||||
declare_toplevel_type(G1DirtyCardQueue) \
|
||||
\
|
||||
declare_toplevel_type(G1CollectedHeap*) \
|
||||
declare_toplevel_type(G1HeapRegion*) \
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gc/shared/bufferNodeList.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
|
||||
BufferNodeList::BufferNodeList() :
|
||||
_head(nullptr), _tail(nullptr), _entry_count(0) {}
|
||||
|
||||
BufferNodeList::BufferNodeList(BufferNode* head,
|
||||
BufferNode* tail,
|
||||
size_t entry_count) :
|
||||
_head(head), _tail(tail), _entry_count(entry_count)
|
||||
{
|
||||
assert((_head == nullptr) == (_tail == nullptr), "invariant");
|
||||
assert((_head == nullptr) == (_entry_count == 0), "invariant");
|
||||
}
|
||||
@ -225,6 +225,9 @@ uintx CardTable::ct_max_alignment_constraint() {
|
||||
|
||||
#ifndef PRODUCT
|
||||
void CardTable::verify_region(MemRegion mr, CardValue val, bool val_equals) {
|
||||
if (mr.is_empty()) {
|
||||
return;
|
||||
}
|
||||
CardValue* start = byte_for(mr.start());
|
||||
CardValue* end = byte_for(mr.last());
|
||||
bool failures = false;
|
||||
@ -255,7 +258,8 @@ void CardTable::verify_dirty_region(MemRegion mr) {
|
||||
}
|
||||
#endif
|
||||
|
||||
void CardTable::print_on(outputStream* st) const {
|
||||
st->print_cr("Card table byte_map: [" PTR_FORMAT "," PTR_FORMAT "] _byte_map_base: " PTR_FORMAT,
|
||||
void CardTable::print_on(outputStream* st, const char* description) const {
|
||||
st->print_cr("%s table byte_map: [" PTR_FORMAT "," PTR_FORMAT "] _byte_map_base: " PTR_FORMAT,
|
||||
description,
|
||||
p2i(_byte_map), p2i(_byte_map + _byte_map_size), p2i(_byte_map_base));
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -203,12 +203,12 @@ public:
|
||||
|
||||
virtual bool is_in_young(const void* p) const = 0;
|
||||
|
||||
// Print a description of the memory for the card table
|
||||
virtual void print_on(outputStream* st) const;
|
||||
// Print card table information.
|
||||
void print_on(outputStream* st, const char* description = "Card") const;
|
||||
|
||||
// val_equals -> it will check that all cards covered by mr equal val
|
||||
// !val_equals -> it will check that all cards covered by mr do not equal val
|
||||
void verify_region(MemRegion mr, CardValue val, bool val_equals) PRODUCT_RETURN;
|
||||
virtual void verify_region(MemRegion mr, CardValue val, bool val_equals) PRODUCT_RETURN;
|
||||
void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN;
|
||||
void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
|
||||
};
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,7 +34,7 @@ template <class T>
|
||||
class WorkerDataArray : public CHeapObj<mtGC> {
|
||||
friend class WDAPrinter;
|
||||
public:
|
||||
static const uint MaxThreadWorkItems = 9;
|
||||
static const uint MaxThreadWorkItems = 10;
|
||||
private:
|
||||
T* _data;
|
||||
uint _length;
|
||||
|
||||
@ -589,10 +589,6 @@ void JVMCIRuntime::write_barrier_pre(JavaThread* thread, oopDesc* obj) {
|
||||
G1BarrierSetRuntime::write_ref_field_pre_entry(obj, thread);
|
||||
}
|
||||
|
||||
void JVMCIRuntime::write_barrier_post(JavaThread* thread, volatile CardValue* card_addr) {
|
||||
G1BarrierSetRuntime::write_ref_field_post_entry(card_addr, thread);
|
||||
}
|
||||
|
||||
#endif // INCLUDE_G1GC
|
||||
|
||||
JRT_LEAF(jboolean, JVMCIRuntime::validate_object(JavaThread* thread, oopDesc* parent, oopDesc* child))
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user