8342382: Implement JEP 522: G1 GC: Improve Throughput by Reducing Synchronization

Co-authored-by: Amit Kumar <amitkumar@openjdk.org>
Co-authored-by: Martin Doerr <mdoerr@openjdk.org>
Co-authored-by: Carlo Refice <carlo.refice@oracle.com>
Co-authored-by: Fei Yang <fyang@openjdk.org>
Reviewed-by: iwalulya, rcastanedalo, aph, ayang
This commit is contained in:
Thomas Schatzl 2025-09-22 13:47:45 +00:00
parent ca182912a3
commit 8d5c005642
114 changed files with 3625 additions and 4681 deletions

View File

@ -86,15 +86,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
}
}
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register start, Register count, Register scratch, RegSet saved_regs) {
__ push(saved_regs, sp);
assert_different_registers(start, count, scratch);
assert_different_registers(c_rarg0, count);
__ mov(c_rarg0, start);
__ mov(c_rarg1, count);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
__ pop(saved_regs, sp);
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm,
DecoratorSet decorators,
Register start,
Register count,
Register scratch,
RegSet saved_regs) {
Label done;
Label loop;
Label next;
__ cbz(count, done);
// Calculate the number of card marks to set. Since the object might start and
// end within a card, we need to calculate this via the card table indexes of
// the actual start and last addresses covered by the object.
// Temporarily use the count register for the last element address.
__ lea(count, Address(start, count, Address::lsl(LogBytesPerHeapOop))); // end = start + count << LogBytesPerHeapOop
__ sub(count, count, BytesPerHeapOop); // Use last element address for end.
__ lsr(start, start, CardTable::card_shift());
__ lsr(count, count, CardTable::card_shift());
__ sub(count, count, start); // Number of bytes to mark - 1.
// Add card table base offset to start.
__ ldr(scratch, Address(rthread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
__ add(start, start, scratch);
__ bind(loop);
if (UseCondCardMark) {
__ ldrb(scratch, Address(start, count));
// Instead of loading clean_card_val and comparing, we exploit the fact that
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
__ tbz(scratch, 0, next);
}
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
__ strb(zr, Address(start, count));
__ bind(next);
__ subs(count, count, 1);
__ br(Assembler::GE, loop);
__ bind(done);
}
static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
@ -202,10 +235,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_may_be_null) {
assert(thread == rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg, rscratch1);
// Does store cross heap regions?
__ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
__ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
@ -214,33 +251,19 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
if (new_val_may_be_null) {
__ cbz(new_val, done);
}
// Storing region crossing non-null, is card young?
// Storing region crossing non-null.
__ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
__ load_byte_map_base(tmp2); // tmp2 := card table base address
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
__ ldrb(tmp2, Address(tmp1)); // tmp2 := card
__ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val?
}
static void generate_post_barrier_slow_path(MacroAssembler* masm,
const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
Label& runtime) {
__ membar(Assembler::StoreLoad); // StoreLoad membar
__ ldrb(tmp2, Address(tmp1)); // tmp2 := card
__ cbzw(tmp2, done);
// Storing a region crossing, non-null oop, card is clean.
// Dirty card and log.
STATIC_ASSERT(CardTable::dirty_card_val() == 0);
__ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val
generate_queue_test_and_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
runtime,
thread, tmp1, tmp2, rscratch1);
__ b(done);
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
__ ldr(tmp2, card_table_addr); // tmp2 := card table base address
if (UseCondCardMark) {
__ ldrb(rscratch1, Address(tmp1, tmp2)); // rscratch1 := card
// Instead of loading clean_card_val and comparing, we exploit the fact that
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
__ tbz(rscratch1, 0, done);
}
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
__ strb(zr, Address(tmp1, tmp2)); // *(card address) := dirty_card_val
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
@ -249,27 +272,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2) {
assert(thread == rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
rscratch1);
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
&& tmp2 != noreg, "expecting a register");
Label done;
Label runtime;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
// If card is young, jump to done
__ br(Assembler::EQ, done);
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
__ bind(runtime);
// save the live input values
RegSet saved = RegSet::of(store_addr);
__ push(saved, sp);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
__ pop(saved, sp);
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, false /* new_val_may_be_null */);
__ bind(done);
}
@ -329,38 +333,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* stub) {
assert(thread == rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2,
rscratch1);
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg
&& tmp2 != noreg, "expecting a register");
stub->initialize_registers(thread, tmp1, tmp2);
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
// If card is not young, jump to stub (slow path)
__ br(Assembler::NE, *stub->entry());
__ bind(*stub->continuation());
}
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Register thread = stub->thread();
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
Register tmp2 = stub->tmp2();
assert(stub->tmp3() == noreg, "not needed in this platform");
__ bind(*stub->entry());
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
__ b(*stub->continuation());
bool new_val_may_be_null) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
__ bind(done);
}
#endif // COMPILER2
@ -456,20 +432,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ b(*stub->continuation());
}
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
__ bind(*stub->entry());
assert(stub->addr()->is_register(), "Precondition.");
assert(stub->new_val()->is_register(), "Precondition.");
Register new_val_reg = stub->new_val()->as_register();
__ cbz(new_val_reg, *stub->continuation());
ce->store_parameter(stub->addr()->as_pointer_register(), 0);
__ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
__ b(*stub->continuation());
}
#undef __
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
masm->bind(done);
}
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@ -521,74 +496,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ epilogue();
}
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
__ prologue("g1_post_barrier", false);
// arg0: store_address
Address store_addr(rfp, 2*BytesPerWord);
BarrierSet* bs = BarrierSet::barrier_set();
CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
CardTable* ct = ctbs->card_table();
Label done;
Label runtime;
// At this point we know new_value is non-null and the new_value crosses regions.
// Must check to see if card is already dirty
const Register thread = rthread;
Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
const Register card_offset = rscratch2;
// LR is free here, so we can use it to hold the byte_map_base.
const Register byte_map_base = lr;
assert_different_registers(card_offset, byte_map_base, rscratch1);
__ load_parameter(0, card_offset);
__ lsr(card_offset, card_offset, CardTable::card_shift());
__ load_byte_map_base(byte_map_base);
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cmpw(rscratch1, (int)G1CardTable::g1_young_card_val());
__ br(Assembler::EQ, done);
assert((int)CardTable::dirty_card_val() == 0, "must be 0");
__ membar(Assembler::StoreLoad);
__ ldrb(rscratch1, Address(byte_map_base, card_offset));
__ cbzw(rscratch1, done);
// storing region crossing non-null, card is clean.
// dirty card and log.
__ strb(zr, Address(byte_map_base, card_offset));
// Convert card offset into an address in card_addr
Register card_addr = card_offset;
__ add(card_addr, byte_map_base, card_addr);
__ ldr(rscratch1, queue_index);
__ cbz(rscratch1, runtime);
__ sub(rscratch1, rscratch1, wordSize);
__ str(rscratch1, queue_index);
// Reuse LR to hold buffer_addr
const Register buffer_addr = lr;
__ ldr(buffer_addr, buffer);
__ str(card_addr, Address(buffer_addr, rscratch1));
__ b(done);
__ bind(runtime);
__ push_call_clobbered_registers();
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
__ pop_call_clobbered_registers();
__ bind(done);
__ epilogue();
}
#undef __
#endif // COMPILER1

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -32,9 +32,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
class G1PreBarrierStubC2;
class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@ -65,10 +63,15 @@ protected:
public:
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
void g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2);
#endif
#ifdef COMPILER2
@ -87,9 +90,7 @@ public:
Register thread,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* c2_stub);
void generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const;
bool new_val_may_be_null);
#endif
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -62,13 +62,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2) {
if (!G1PostBarrierStubC2::needs_barrier(node)) {
if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub);
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, new_val_may_be_null);
}
%}

View File

@ -201,12 +201,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_may_be_null) {
// Does store cross heap regions?
assert(thread == Rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
// Does store cross heap regions?
__ eor(tmp1, store_addr, new_val);
__ movs(tmp1, AsmOperand(tmp1, lsr, G1HeapRegion::LogOfHRGrainBytes));
__ b(done, eq);
@ -215,76 +218,34 @@ static void generate_post_barrier_fast_path(MacroAssembler* masm,
if (new_val_may_be_null) {
__ cbz(new_val, done);
}
// storing region crossing non-null, is card already dirty?
const Register card_addr = tmp1;
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
__ mov_address(tmp2, (address)ct->card_table()->byte_map_base());
__ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
// storing region crossing non-null, is card already non-clean?
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
__ ldr(tmp2, card_table_addr);
__ add(tmp1, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift()));
__ ldrb(tmp2, Address(card_addr));
__ cmp(tmp2, (int)G1CardTable::g1_young_card_val());
if (UseCondCardMark) {
__ ldrb(tmp2, Address(tmp1));
// Instead of loading clean_card_val and comparing, we exploit the fact that
// the LSB of non-clean cards is always 0, and the LSB of clean cards 1.
__ tbz(tmp2, 0, done);
}
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zero_register()");
__ zero_register(tmp2);
__ strb(tmp2, Address(tmp1)); // *(card address) := dirty_card_val
}
static void generate_post_barrier_slow_path(MacroAssembler* masm,
const Register thread,
const Register tmp1,
const Register tmp2,
const Register tmp3,
Label& done,
Label& runtime) {
__ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
assert(CardTable::dirty_card_val() == 0, "adjust this code");
// card_addr is loaded by generate_post_barrier_fast_path
const Register card_addr = tmp1;
__ ldrb(tmp2, Address(card_addr));
__ cbz(tmp2, done);
// storing a region crossing, non-null oop, card is clean.
// dirty card and log.
__ strb(__ zero_register(tmp2), Address(card_addr));
generate_queue_test_and_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
runtime,
thread, card_addr, tmp2, tmp3);
__ b(done);
}
// G1 post-barrier.
// Blows all volatile registers R0-R3, LR).
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp1,
Register tmp2,
Register tmp3) {
Register store_addr,
Register new_val,
Register tmp1,
Register tmp2,
Register tmp3) {
Label done;
Label runtime;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
// If card is young, jump to done
// card_addr and card are loaded by generate_post_barrier_fast_path
const Register card = tmp2;
const Register card_addr = tmp1;
__ b(done, eq);
generate_post_barrier_slow_path(masm, Rthread, card_addr, tmp2, tmp3, done, runtime);
__ bind(runtime);
RegisterSet set = RegisterSet(store_addr) | RegisterSet(R0, R3) | RegisterSet(R12);
__ push(set);
if (card_addr != R0) {
__ mov(R0, card_addr);
}
__ mov(R1, Rthread);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), R0, R1);
__ pop(set);
generate_post_barrier_fast_path(masm, store_addr, new_val, Rthread, tmp1, tmp2, done, true /* new_val_may_be_null */);
__ bind(done);
}
@ -344,35 +305,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register tmp1,
Register tmp2,
Register tmp3,
G1PostBarrierStubC2* stub) {
assert(thread == Rthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
stub->initialize_registers(thread, tmp1, tmp2, tmp3);
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
// If card is not young, jump to stub (slow path)
__ b(*stub->entry(), ne);
__ bind(*stub->continuation());
}
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Register thread = stub->thread();
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
Register tmp2 = stub->tmp2();
Register tmp3 = stub->tmp3();
__ bind(*stub->entry());
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, tmp3, *stub->continuation(), runtime);
__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp2);
__ b(*stub->continuation());
bool new_val_may_be_null) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
__ bind(done);
}
#endif // COMPILER2
@ -463,20 +399,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ b(*stub->continuation());
}
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
__ bind(*stub->entry());
assert(stub->addr()->is_register(), "Precondition.");
assert(stub->new_val()->is_register(), "Precondition.");
Register new_val_reg = stub->new_val()->as_register();
__ cbz(new_val_reg, *stub->continuation());
ce->verify_reserved_argument_area_size(1);
__ str(stub->addr()->as_pointer_register(), Address(SP));
__ call(bs->post_barrier_c1_runtime_code_blob()->code_begin(), relocInfo::runtime_call_type);
__ b(*stub->continuation());
#undef __
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
masm->bind(done);
}
#undef __
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@ -536,102 +471,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ b(done);
}
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
// Input:
// - store_addr, pushed on the stack
__ set_info("g1_post_barrier_slow_id", false);
Label done;
Label recheck;
Label runtime;
Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
AddressLiteral cardtable(ci_card_table_address_as<address>(), relocInfo::none);
// save at least the registers that need saving if the runtime is called
const RegisterSet saved_regs = RegisterSet(R0,R3) | RegisterSet(R12) | RegisterSet(LR);
const int nb_saved_regs = 6;
assert(nb_saved_regs == saved_regs.size(), "fix nb_saved_regs");
__ push(saved_regs);
const Register r_card_addr_0 = R0; // must be R0 for the slow case
const Register r_obj_0 = R0;
const Register r_card_base_1 = R1;
const Register r_tmp2 = R2;
const Register r_index_2 = R2;
const Register r_buffer_3 = R3;
const Register tmp1 = Rtemp;
__ ldr(r_obj_0, Address(SP, nb_saved_regs*wordSize));
// Note: there is a comment in x86 code about not using
// ExternalAddress / lea, due to relocation not working
// properly for that address. Should be OK for arm, where we
// explicitly specify that 'cardtable' has a relocInfo::none
// type.
__ lea(r_card_base_1, cardtable);
__ add(r_card_addr_0, r_card_base_1, AsmOperand(r_obj_0, lsr, CardTable::card_shift()));
// first quick check without barrier
__ ldrb(r_tmp2, Address(r_card_addr_0));
__ cmp(r_tmp2, (int)G1CardTable::g1_young_card_val());
__ b(recheck, ne);
__ bind(done);
__ pop(saved_regs);
__ ret();
__ bind(recheck);
__ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp1);
// reload card state after the barrier that ensures the stored oop was visible
__ ldrb(r_tmp2, Address(r_card_addr_0));
assert(CardTable::dirty_card_val() == 0, "adjust this code");
__ cbz(r_tmp2, done);
// storing region crossing non-null, card is clean.
// dirty card and log.
assert(0 == (int)CardTable::dirty_card_val(), "adjust this code");
if ((ci_card_table_address_as<intptr_t>() & 0xff) == 0) {
// Card table is aligned so the lowest byte of the table address base is zero.
__ strb(r_card_base_1, Address(r_card_addr_0));
} else {
__ strb(__ zero_register(r_tmp2), Address(r_card_addr_0));
}
__ ldr(r_index_2, queue_index);
__ ldr(r_buffer_3, buffer);
__ subs(r_index_2, r_index_2, wordSize);
__ b(runtime, lt); // go to runtime if now negative
__ str(r_index_2, queue_index);
__ str(r_card_addr_0, Address(r_buffer_3, r_index_2));
__ b(done);
__ bind(runtime);
__ save_live_registers();
assert(r_card_addr_0 == c_rarg0, "card_addr should be in R0");
__ mov(c_rarg1, Rthread);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), c_rarg0, c_rarg1);
__ restore_live_registers_without_return();
__ b(done);
}
#undef __
#endif // COMPILER1

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -32,9 +32,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
class G1PreBarrierStubC2;
class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@ -66,10 +64,15 @@ public:
#ifdef COMPILER1
public:
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
void g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2);
#endif
#ifdef COMPILER2
@ -89,9 +92,7 @@ public:
Register tmp1,
Register tmp2,
Register tmp3,
G1PostBarrierStubC2* c2_stub);
void generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const;
bool new_val_may_be_null);
#endif
};

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -63,13 +63,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register tmp1,
Register tmp2,
Register tmp3) {
if (!G1PostBarrierStubC2::needs_barrier(node)) {
if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, stub);
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, new_val_may_be_null);
}
%}

View File

@ -28,7 +28,6 @@
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
@ -230,78 +229,52 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator
__ bind(filtered);
}
static void generate_region_crossing_test(MacroAssembler* masm, const Register store_addr, const Register new_val) {
__ xorr(R0, store_addr, new_val); // tmp1 := store address ^ new value
__ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
}
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_may_be_null) {
assert_different_registers(store_addr, new_val, tmp1, R0);
assert_different_registers(store_addr, tmp1, tmp2, R0);
static Address generate_card_young_test(MacroAssembler* masm, const Register store_addr, const Register tmp1, const Register tmp2) {
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
__ load_const_optimized(tmp1, (address)(ct->card_table()->byte_map_base()), tmp2);
__ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
__ lbzx(R0, tmp1, tmp2); // tmp1 := card address
__ cmpwi(CR0, R0, (int)G1CardTable::g1_young_card_val());
return Address(tmp1, tmp2); // return card address
}
__ xorr(R0, store_addr, new_val); // R0 := store address ^ new value
__ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // R0 := ((store address ^ new value) >> LogOfHRGrainBytes)
__ beq(CR0, done);
static void generate_card_dirty_test(MacroAssembler* masm, Address card_addr) {
__ membar(Assembler::StoreLoad); // Must reload after StoreLoad membar due to concurrent refinement
__ lbzx(R0, card_addr.base(), card_addr.index()); // tmp2 := card
__ cmpwi(CR0, R0, (int)G1CardTable::dirty_card_val()); // tmp2 := card == dirty_card_val?
// Crosses regions, storing null?
if (!new_val_may_be_null) {
#ifdef ASSERT
__ cmpdi(CR0, new_val, 0);
__ asm_assert_ne("null oop not allowed (G1 post)"); // Checked by caller.
#endif
} else {
__ cmpdi(CR0, new_val, 0);
__ beq(CR0, done);
}
__ ld(tmp1, G1ThreadLocalData::card_table_base_offset(), thread);
__ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp2 := card address relative to card table base
if (UseCondCardMark) {
__ lbzx(R0, tmp1, tmp2);
__ cmpwi(CR0, R0, (int)G1CardTable::clean_card_val());
__ bne(CR0, done);
}
__ li(R0, G1CardTable::dirty_card_val());
__ stbx(R0, tmp1, tmp2);
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
Register store_addr, Register new_val,
Register tmp1, Register tmp2, Register tmp3,
MacroAssembler::PreservationLevel preservation_level) {
Register tmp1, Register tmp2) {
bool not_null = (decorators & IS_NOT_NULL) != 0;
Label runtime, filtered;
assert_different_registers(store_addr, new_val, tmp1, tmp2);
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
generate_region_crossing_test(masm, store_addr, new_val);
__ beq(CR0, filtered);
// Crosses regions, storing null?
if (not_null) {
#ifdef ASSERT
__ cmpdi(CR0, new_val, 0);
__ asm_assert_ne("null oop not allowed (G1 post)"); // Checked by caller.
#endif
} else {
__ cmpdi(CR0, new_val, 0);
__ beq(CR0, filtered);
}
Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
__ beq(CR0, filtered);
generate_card_dirty_test(masm, card_addr);
__ beq(CR0, filtered);
__ li(R0, (int)G1CardTable::dirty_card_val());
__ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
Register Rcard_addr = tmp3;
__ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
generate_queue_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
runtime, Rcard_addr, tmp1);
__ b(filtered);
__ bind(runtime);
assert(preservation_level == MacroAssembler::PRESERVATION_NONE,
"g1_write_barrier_post doesn't support preservation levels higher than PRESERVATION_NONE");
// Save the live input values.
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, R16_thread);
__ bind(filtered);
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, R16_thread, tmp1, tmp2, done, !not_null);
__ bind(done);
}
void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@ -333,8 +306,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
}
g1_write_barrier_post(masm, decorators,
base, val,
tmp1, tmp2, tmp3,
preservation_level);
tmp1, tmp2);
}
}
@ -457,70 +429,29 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* stub,
bool new_val_may_be_null,
bool decode_new_val) {
assert_different_registers(store_addr, new_val, tmp1, R0);
assert_different_registers(store_addr, tmp1, tmp2, R0);
stub->initialize_registers(R16_thread, tmp1, tmp2);
Label done;
bool null_check_required = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
Register new_val_decoded = new_val;
if (decode_new_val) {
assert(UseCompressedOops, "or should not be here");
if (null_check_required && CompressedOops::base() != nullptr) {
if (new_val_may_be_null && CompressedOops::base() != nullptr) {
// We prefer doing the null check after the region crossing check.
// Only compressed oop modes with base != null require a null check here.
__ cmpwi(CR0, new_val, 0);
__ beq(CR0, *stub->continuation());
null_check_required = false;
__ beq(CR0, done);
new_val_may_be_null = false;
}
new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val);
}
generate_region_crossing_test(masm, store_addr, new_val_decoded);
__ beq(CR0, *stub->continuation());
// crosses regions, storing null?
if (null_check_required) {
__ cmpdi(CR0, new_val_decoded, 0);
__ beq(CR0, *stub->continuation());
}
Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2);
assert(card_addr.base() == tmp1 && card_addr.index() == tmp2, "needed by post barrier stub");
__ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CR0, Assembler::equal), *stub->entry());
__ bind(*stub->continuation());
}
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Address card_addr(stub->tmp1(), stub->tmp2()); // See above.
__ bind(*stub->entry());
generate_card_dirty_test(masm, card_addr);
__ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
__ li(R0, (int)G1CardTable::dirty_card_val());
__ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val
Register Rcard_addr = stub->tmp1();
__ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued.
generate_queue_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
runtime, Rcard_addr, stub->tmp2());
__ b(*stub->continuation());
__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, Rcard_addr, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
__ b(*stub->continuation());
generate_post_barrier_fast_path(masm, store_addr, new_val_decoded, R16_thread, tmp1, tmp2, done, new_val_may_be_null);
__ bind(done);
}
#endif // COMPILER2
@ -558,28 +489,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ b(*stub->continuation());
}
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
__ bind(*stub->entry());
#undef __
assert(stub->addr()->is_register(), "Precondition.");
assert(stub->new_val()->is_register(), "Precondition.");
Register addr_reg = stub->addr()->as_pointer_register();
Register new_val_reg = stub->new_val()->as_register();
__ cmpdi(CR0, new_val_reg, 0);
__ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CR0, Assembler::equal), *stub->continuation());
address c_code = bs->post_barrier_c1_runtime_code_blob()->code_begin();
//__ load_const_optimized(R0, c_code);
__ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(c_code));
__ mtctr(R0);
__ mr(R0, addr_reg); // Pass addr in R0.
__ bctrl();
__ b(*stub->continuation());
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
masm->bind(done);
}
#undef __
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@ -642,86 +564,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ b(restart);
}
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
G1BarrierSet* bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
__ set_info("g1_post_barrier_slow_id", false);
// Using stack slots: spill addr, spill tmp2
const int stack_slots = 2;
Register tmp = R0;
Register addr = R14;
Register tmp2 = R15;
CardTable::CardValue* byte_map_base = bs->card_table()->byte_map_base();
Label restart, refill, ret;
// Spill
__ std(addr, -8, R1_SP);
__ std(tmp2, -16, R1_SP);
__ srdi(addr, R0, CardTable::card_shift()); // Addr is passed in R0.
__ load_const_optimized(/*cardtable*/ tmp2, byte_map_base, tmp);
__ add(addr, tmp2, addr);
__ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
// Return if young card.
__ cmpwi(CR0, tmp, G1CardTable::g1_young_card_val());
__ beq(CR0, ret);
// Return if sequential consistent value is already dirty.
__ membar(Assembler::StoreLoad);
__ lbz(tmp, 0, addr); // tmp := [addr + cardtable]
__ cmpwi(CR0, tmp, G1CardTable::dirty_card_val());
__ beq(CR0, ret);
// Not dirty.
// First, dirty it.
__ li(tmp, G1CardTable::dirty_card_val());
__ stb(tmp, 0, addr);
int dirty_card_q_index_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
int dirty_card_q_buf_byte_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
__ bind(restart);
// Get the index into the update buffer. G1DirtyCardQueue::_index is
// a size_t so ld_ptr is appropriate here.
__ ld(tmp2, dirty_card_q_index_byte_offset, R16_thread);
// index == 0?
__ cmpdi(CR0, tmp2, 0);
__ beq(CR0, refill);
__ ld(tmp, dirty_card_q_buf_byte_offset, R16_thread);
__ addi(tmp2, tmp2, -oopSize);
__ std(tmp2, dirty_card_q_index_byte_offset, R16_thread);
__ add(tmp2, tmp, tmp2);
__ std(addr, 0, tmp2); // [_buf + index] := <address_of_card>
// Restore temp registers and return-from-leaf.
__ bind(ret);
__ ld(tmp2, -16, R1_SP);
__ ld(addr, -8, R1_SP);
__ blr();
__ bind(refill);
const int nbytes_save = (MacroAssembler::num_volatile_regs + stack_slots) * BytesPerWord;
__ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
__ mflr(R0);
__ std(R0, _abi0(lr), R1_SP);
__ push_frame_reg_args(nbytes_save, R0); // dummy frame for C call
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1DirtyCardQueueSet::handle_zero_index_for_thread), R16_thread);
__ pop_frame();
__ ld(R0, _abi0(lr), R1_SP);
__ mtlr(R0);
__ restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
__ b(restart);
}
#undef __
#endif // COMPILER1

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -37,9 +37,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
class G1PreBarrierStubC2;
class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@ -56,8 +54,7 @@ protected:
MacroAssembler::PreservationLevel preservation_level);
void g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators,
Register store_addr, Register new_val,
Register tmp1, Register tmp2, Register tmp3,
MacroAssembler::PreservationLevel preservation_level);
Register tmp1, Register tmp2);
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register base, RegisterOrConstant ind_or_offs, Register val,
@ -79,17 +76,21 @@ public:
Register new_val,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* c2_stub,
bool new_val_may_be_null,
bool decode_new_val);
void generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const;
#endif
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
void g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2);
#endif
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2025 SAP SE. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
@ -64,13 +64,13 @@ static void post_write_barrier(MacroAssembler* masm,
Register tmp1,
Register tmp2,
bool decode_new_val = false) {
if (!G1PostBarrierStubC2::needs_barrier(node)) {
if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub, decode_new_val);
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, new_val_may_be_null, decode_new_val);
}
%}

View File

@ -87,15 +87,54 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
}
}
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register start, Register count, Register tmp, RegSet saved_regs) {
__ push_reg(saved_regs, sp);
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm,
DecoratorSet decorators,
Register start,
Register count,
Register tmp,
RegSet saved_regs) {
assert_different_registers(start, count, tmp);
assert_different_registers(c_rarg0, count);
__ mv(c_rarg0, start);
__ mv(c_rarg1, count);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
__ pop_reg(saved_regs, sp);
Label loop, next, done;
// Zero count? Nothing to do.
__ beqz(count, done);
// Calculate the number of card marks to set. Since the object might start and
// end within a card, we need to calculate this via the card table indexes of
// the actual start and last addresses covered by the object.
// Temporarily use the count register for the last element address.
__ shadd(count, count, start, tmp, LogBytesPerHeapOop); // end = start + count << LogBytesPerHeapOop
__ subi(count, count, BytesPerHeapOop); // Use last element address for end.
__ srli(start, start, CardTable::card_shift());
__ srli(count, count, CardTable::card_shift());
__ sub(count, count, start); // Number of bytes to mark - 1.
// Add card table base offset to start.
Address card_table_address(xthread, G1ThreadLocalData::card_table_base_offset());
__ ld(tmp, card_table_address);
__ add(start, start, tmp);
__ bind(loop);
if (UseCondCardMark) {
__ add(tmp, start, count);
__ lbu(tmp, Address(tmp, 0));
static_assert((uint)G1CardTable::clean_card_val() == 0xff, "must be");
__ subi(tmp, tmp, G1CardTable::clean_card_val()); // Convert to clean_card_value() to a comparison
// against zero to avoid use of an extra temp.
__ bnez(tmp, next);
}
__ add(tmp, start, count);
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use zr");
__ sb(zr, Address(tmp, 0));
__ bind(next);
__ subi(count, count, 1);
__ bgez(count, loop);
__ bind(done);
}
static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime,
@ -192,44 +231,37 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_may_be_null) {
// Does store cross heap regions?
__ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
__ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
__ beqz(tmp1, done);
// Crosses regions, storing null?
if (new_val_may_be_null) {
__ beqz(new_val, done);
}
// Storing region crossing non-null, is card young?
__ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
__ load_byte_map_base(tmp2); // tmp2 := card table base address
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
__ lbu(tmp2, Address(tmp1)); // tmp2 := card
}
static void generate_post_barrier_slow_path(MacroAssembler* masm,
const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
Label& runtime) {
__ membar(MacroAssembler::StoreLoad); // StoreLoad membar
__ lbu(tmp2, Address(tmp1)); // tmp2 := card
__ beqz(tmp2, done, true);
// Storing a region crossing, non-null oop, card is clean.
// Dirty card and log.
STATIC_ASSERT(CardTable::dirty_card_val() == 0);
__ sb(zr, Address(tmp1)); // *(card address) := dirty_card_val
generate_queue_test_and_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
runtime,
thread, tmp1, tmp2, t0);
__ j(done);
bool new_val_may_be_null) {
assert(thread == xthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
// Does store cross heap regions?
__ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
__ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
__ beqz(tmp1, done);
// Crosses regions, storing null?
if (new_val_may_be_null) {
__ beqz(new_val, done);
}
// Storing region crossing non-null, is card clean?
__ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base
Address card_table_address(xthread, G1ThreadLocalData::card_table_base_offset());
__ ld(tmp2, card_table_address); // tmp2 := card table base address
__ add(tmp1, tmp1, tmp2); // tmp1 := card address
if (UseCondCardMark) {
static_assert((uint)G1CardTable::clean_card_val() == 0xff, "must be");
__ lbu(tmp2, Address(tmp1, 0)); // tmp2 := card
__ subi(tmp2, tmp2, G1CardTable::clean_card_val()); // Convert to clean_card_value() to a comparison
// against zero to avoid use of an extra temp.
__ bnez(tmp2, done);
}
static_assert((uint)G1CardTable::dirty_card_val() == 0, "must be to use zr");
__ sb(zr, Address(tmp1, 0));
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
@ -238,27 +270,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2) {
assert(thread == xthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
"expecting a register");
Label done;
Label runtime;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */);
// If card is young, jump to done (tmp2 holds the card value)
__ mv(t0, (int)G1CardTable::g1_young_card_val());
__ beq(tmp2, t0, done); // card == young_card_val?
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime);
__ bind(runtime);
// save the live input values
RegSet saved = RegSet::of(store_addr);
__ push_reg(saved, sp);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread);
__ pop_reg(saved, sp);
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
__ bind(done);
}
@ -318,37 +331,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register thread,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* stub) {
assert(thread == xthread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0);
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg,
"expecting a register");
stub->initialize_registers(thread, tmp1, tmp2);
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null);
// If card is not young, jump to stub (slow path) (tmp2 holds the card value)
__ mv(t0, (int)G1CardTable::g1_young_card_val());
__ bne(tmp2, t0, *stub->entry(), true);
__ bind(*stub->continuation());
}
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Register thread = stub->thread();
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
Register tmp2 = stub->tmp2();
__ bind(*stub->entry());
generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime);
__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
__ j(*stub->continuation());
bool new_val_may_be_null) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
__ bind(done);
}
#endif // COMPILER2
@ -443,20 +429,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ j(*stub->continuation());
}
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
__ bind(*stub->entry());
assert(stub->addr()->is_register(), "Precondition");
assert(stub->new_val()->is_register(), "Precondition");
Register new_val_reg = stub->new_val()->as_register();
__ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
ce->store_parameter(stub->addr()->as_pointer_register(), 0);
__ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
__ j(*stub->continuation());
}
#undef __
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
masm->bind(done);
}
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@ -507,74 +492,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ epilogue();
}
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
__ prologue("g1_post_barrier", false);
// arg0 : store_address
Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
BarrierSet* bs = BarrierSet::barrier_set();
CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
Label done;
Label runtime;
// At this point we know new_value is non-null and the new_value crosses regions.
// Must check to see if card is already dirty
const Register thread = xthread;
Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
const Register card_offset = t1;
// RA is free here, so we can use it to hold the byte_map_base.
const Register byte_map_base = ra;
assert_different_registers(card_offset, byte_map_base, t0);
__ load_parameter(0, card_offset);
__ srli(card_offset, card_offset, CardTable::card_shift());
__ load_byte_map_base(byte_map_base);
// Convert card offset into an address in card_addr
Register card_addr = card_offset;
__ add(card_addr, byte_map_base, card_addr);
__ lbu(t0, Address(card_addr, 0));
__ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
__ beqz(t0, done);
assert((int)CardTable::dirty_card_val() == 0, "must be 0");
__ membar(MacroAssembler::StoreLoad);
__ lbu(t0, Address(card_addr, 0));
__ beqz(t0, done);
// storing region crossing non-null, card is clean.
// dirty card and log.
__ sb(zr, Address(card_addr, 0));
__ ld(t0, queue_index);
__ beqz(t0, runtime);
__ subi(t0, t0, wordSize);
__ sd(t0, queue_index);
// Reuse RA to hold buffer_addr
const Register buffer_addr = ra;
__ ld(buffer_addr, buffer);
__ add(t0, buffer_addr, t0);
__ sd(card_addr, Address(t0, 0));
__ j(done);
__ bind(runtime);
__ push_call_clobbered_registers();
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
__ pop_call_clobbered_registers();
__ bind(done);
__ epilogue();
}
#undef __
#endif // COMPILER1

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -35,9 +35,7 @@ class LIR_Assembler;
#endif
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
class G1PreBarrierStubC2;
class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@ -68,10 +66,16 @@ protected:
public:
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
void g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2);
#endif
#ifdef COMPILER2
@ -90,9 +94,7 @@ public:
Register thread,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* c2_stub);
void generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const;
bool new_val_may_be_null);
#endif
void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
@ -63,13 +63,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2) {
if (!G1PostBarrierStubC2::needs_barrier(node)) {
if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, stub);
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, new_val_may_be_null);
}
%}

View File

@ -28,7 +28,6 @@
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1BarrierSetRuntime.hpp"
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
@ -205,104 +204,71 @@ void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm,
BLOCK_COMMENT("} generate_c2_pre_barrier_stub");
}
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register thread,
const Register tmp1,
const Register tmp2,
Label& done,
bool new_val_may_be_null) {
__ block_comment("generate_post_barrier_fast_path {");
assert(thread == Z_thread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg);
// Does store cross heap regions?
if (VM_Version::has_DistinctOpnds()) {
__ z_xgrk(tmp1, store_addr, new_val); // tmp1 := store address ^ new value
} else {
__ z_lgr(tmp1, store_addr);
__ z_xgr(tmp1, new_val);
}
__ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes)
__ branch_optimized(Assembler::bcondEqual, done);
// Crosses regions, storing null?
if (new_val_may_be_null) {
__ z_ltgr(new_val, new_val);
__ z_bre(done);
} else {
#ifdef ASSERT
__ z_ltgr(new_val, new_val);
__ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
#endif
}
__ z_srag(tmp1, store_addr, CardTable::card_shift());
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
__ z_alg(tmp1, card_table_addr); // tmp1 := card address
if(UseCondCardMark) {
__ z_cli(0, tmp1, G1CardTable::clean_card_val());
__ branch_optimized(Assembler::bcondNotEqual, done);
}
static_assert(G1CardTable::dirty_card_val() == 0, "must be to use z_mvi");
__ z_mvi(0, tmp1, G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
__ block_comment("} generate_post_barrier_fast_path");
}
void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* stub) {
bool new_val_may_be_null) {
BLOCK_COMMENT("g1_write_barrier_post_c2 {");
assert(thread == Z_thread, "must be");
assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, Z_R1_scratch);
assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register");
stub->initialize_registers(thread, tmp1, tmp2);
BLOCK_COMMENT("generate_region_crossing_test {");
if (VM_Version::has_DistinctOpnds()) {
__ z_xgrk(tmp1, store_addr, new_val);
} else {
__ z_lgr(tmp1, store_addr);
__ z_xgr(tmp1, new_val);
}
__ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes);
__ branch_optimized(Assembler::bcondEqual, *stub->continuation());
BLOCK_COMMENT("} generate_region_crossing_test");
// crosses regions, storing null?
if ((stub->barrier_data() & G1C2BarrierPostNotNull) == 0) {
__ z_ltgr(new_val, new_val);
__ branch_optimized(Assembler::bcondEqual, *stub->continuation());
}
BLOCK_COMMENT("generate_card_young_test {");
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
// calculate address of card
__ load_const_optimized(tmp2, (address)ct->card_table()->byte_map_base()); // Card table base.
__ z_srlg(tmp1, store_addr, CardTable::card_shift()); // Index into card table.
__ z_algr(tmp1, tmp2); // Explicit calculation needed for cli.
// Filter young.
__ z_cli(0, tmp1, G1CardTable::g1_young_card_val());
BLOCK_COMMENT("} generate_card_young_test");
// From here on, tmp1 holds the card address.
__ branch_optimized(Assembler::bcondNotEqual, *stub->entry());
__ bind(*stub->continuation());
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, new_val_may_be_null);
__ bind(done);
BLOCK_COMMENT("} g1_write_barrier_post_c2");
}
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const {
BLOCK_COMMENT("generate_c2_post_barrier_stub {");
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Register thread = stub->thread();
Register tmp1 = stub->tmp1(); // tmp1 holds the card address.
Register tmp2 = stub->tmp2();
Register Rcard_addr = tmp1;
__ bind(*stub->entry());
BLOCK_COMMENT("generate_card_clean_test {");
__ z_sync(); // Required to support concurrent cleaning.
__ z_cli(0, Rcard_addr, 0); // Reload after membar.
__ branch_optimized(Assembler::bcondEqual, *stub->continuation());
BLOCK_COMMENT("} generate_card_clean_test");
BLOCK_COMMENT("generate_dirty_card {");
// Storing a region crossing, non-null oop, card is clean.
// Dirty card and log.
STATIC_ASSERT(CardTable::dirty_card_val() == 0);
__ z_mvi(0, Rcard_addr, CardTable::dirty_card_val());
BLOCK_COMMENT("} generate_dirty_card");
generate_queue_test_and_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
runtime,
Z_thread, tmp1, tmp2);
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
BLOCK_COMMENT("} generate_c2_post_barrier_stub");
}
#endif //COMPILER2
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@ -451,99 +417,9 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato
Register Rtmp1, Register Rtmp2, Register Rtmp3) {
bool not_null = (decorators & IS_NOT_NULL) != 0;
assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3.
Label callRuntime, filtered;
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
BLOCK_COMMENT("g1_write_barrier_post {");
// Does store cross heap regions?
// It does if the two addresses specify different grain addresses.
if (VM_Version::has_DistinctOpnds()) {
__ z_xgrk(Rtmp1, Rstore_addr, Rnew_val);
} else {
__ z_lgr(Rtmp1, Rstore_addr);
__ z_xgr(Rtmp1, Rnew_val);
}
__ z_srag(Rtmp1, Rtmp1, G1HeapRegion::LogOfHRGrainBytes);
__ z_bre(filtered);
// Crosses regions, storing null?
if (not_null) {
#ifdef ASSERT
__ z_ltgr(Rnew_val, Rnew_val);
__ asm_assert(Assembler::bcondNotZero, "null oop not allowed (G1 post)", 0x322); // Checked by caller.
#endif
} else {
__ z_ltgr(Rnew_val, Rnew_val);
__ z_bre(filtered);
}
Rnew_val = noreg; // end of lifetime
// Storing region crossing non-null, is card already dirty?
assert_different_registers(Rtmp1, Rtmp2, Rtmp3);
// Make sure not to use Z_R0 for any of these registers.
Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3;
Register Rbase = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3;
// calculate address of card
__ load_const_optimized(Rbase, (address)ct->card_table()->byte_map_base()); // Card table base.
__ z_srlg(Rcard_addr, Rstore_addr, CardTable::card_shift()); // Index into card table.
__ z_algr(Rcard_addr, Rbase); // Explicit calculation needed for cli.
Rbase = noreg; // end of lifetime
// Filter young.
__ z_cli(0, Rcard_addr, G1CardTable::g1_young_card_val());
__ z_bre(filtered);
// Check the card value. If dirty, we're done.
// This also avoids false sharing of the (already dirty) card.
__ z_sync(); // Required to support concurrent cleaning.
__ z_cli(0, Rcard_addr, G1CardTable::dirty_card_val()); // Reload after membar.
__ z_bre(filtered);
// Storing a region crossing, non-null oop, card is clean.
// Dirty card and log.
__ z_mvi(0, Rcard_addr, G1CardTable::dirty_card_val());
Register Rcard_addr_x = Rcard_addr;
Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1;
if (Rcard_addr == Rqueue_index) {
Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0.
}
__ lgr_if_needed(Rcard_addr_x, Rcard_addr);
generate_queue_test_and_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
callRuntime,
Z_thread, Rcard_addr_x, Rqueue_index);
__ z_bru(filtered);
__ bind(callRuntime);
// TODO: do we need a frame? Introduced to be on the safe side.
bool needs_frame = true;
__ lgr_if_needed(Rcard_addr, Rcard_addr_x); // copy back asap. push_frame will destroy Z_R0_scratch!
// VM call need frame to access(write) O register.
if (needs_frame) {
__ save_return_pc();
__ push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs.
}
// Save the live input values.
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), Rcard_addr, Z_thread);
if (needs_frame) {
__ pop_frame();
__ restore_return_pc();
}
__ bind(filtered);
Label done;
generate_post_barrier_fast_path(masm, Rstore_addr, Rnew_val, Z_thread, Rtmp1, Rtmp2, done, !not_null);
__ bind(done);
BLOCK_COMMENT("} g1_write_barrier_post");
}
@ -615,22 +491,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
}
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
__ bind(*stub->entry());
ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots.
assert(stub->addr()->is_register(), "Precondition.");
assert(stub->new_val()->is_register(), "Precondition.");
Register new_val_reg = stub->new_val()->as_register();
__ z_ltgr(new_val_reg, new_val_reg);
__ branch_optimized(Assembler::bcondZero, *stub->continuation());
__ z_lgr(Z_R1_scratch, stub->addr()->as_pointer_register());
ce->emit_call_c(bs->post_barrier_c1_runtime_code_blob()->code_begin());
__ branch_optimized(Assembler::bcondAlways, *stub->continuation());
}
#undef __
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, thread, tmp1, tmp2, done, true /* new_val_may_be_null */);
masm->bind(done);
}
#define __ sasm->
static OopMap* save_volatile_registers(StubAssembler* sasm, Register return_pc = Z_R14) {
@ -705,92 +578,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ z_bru(restart);
}
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
// Z_R1_scratch: oop address, address of updated memory slot
BarrierSet* bs = BarrierSet::barrier_set();
__ set_info("g1_post_barrier_slow_id", false);
Register addr_oop = Z_R1_scratch;
Register addr_card = Z_R1_scratch;
Register r1 = Z_R6; // Must be saved/restored.
Register r2 = Z_R7; // Must be saved/restored.
Register cardtable = r1; // Must be non-volatile, because it is used to save addr_card.
CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
CardTable* ct = ctbs->card_table();
CardTable::CardValue* byte_map_base = ct->byte_map_base();
// Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
__ z_stg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
Label not_already_dirty, restart, refill, young_card;
// Calculate address of card corresponding to the updated oop slot.
AddressLiteral rs(byte_map_base);
__ z_srlg(addr_card, addr_oop, CardTable::card_shift());
addr_oop = noreg; // dead now
__ load_const_optimized(cardtable, rs); // cardtable := <card table base>
__ z_agr(addr_card, cardtable); // addr_card := addr_oop>>card_shift + cardtable
__ z_cli(0, addr_card, (int)G1CardTable::g1_young_card_val());
__ z_bre(young_card);
__ z_sync(); // Required to support concurrent cleaning.
__ z_cli(0, addr_card, (int)CardTable::dirty_card_val());
__ z_brne(not_already_dirty);
__ bind(young_card);
// We didn't take the branch, so we're already dirty: restore
// used registers and return.
__ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
__ z_br(Z_R14);
// Not dirty.
__ bind(not_already_dirty);
// First, dirty it: [addr_card] := 0
__ z_mvi(0, addr_card, CardTable::dirty_card_val());
Register idx = cardtable; // Must be non-volatile, because it is used to save addr_card.
Register buf = r2;
cardtable = noreg; // now dead
// Save registers used below (see assertion in G1PreBarrierStub::emit_code()).
__ z_stg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
ByteSize dirty_card_q_index_byte_offset = G1ThreadLocalData::dirty_card_queue_index_offset();
ByteSize dirty_card_q_buf_byte_offset = G1ThreadLocalData::dirty_card_queue_buffer_offset();
__ bind(restart);
// Get the index into the update buffer. G1DirtyCardQueue::_index is
// a size_t so z_ltg is appropriate here.
__ z_ltg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
// index == 0?
__ z_brz(refill);
__ z_lg(buf, Address(Z_thread, dirty_card_q_buf_byte_offset));
__ add2reg(idx, -oopSize);
__ z_stg(addr_card, 0, idx, buf); // [_buf + index] := <address_of_card>
__ z_stg(idx, Address(Z_thread, dirty_card_q_index_byte_offset));
// Restore killed registers and return.
__ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
__ z_lg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP);
__ z_br(Z_R14);
__ bind(refill);
save_volatile_registers(sasm);
__ z_lgr(idx, addr_card); // Save addr_card, tmp3 must be non-volatile.
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1DirtyCardQueueSet::handle_zero_index_for_thread),
Z_thread);
__ z_lgr(addr_card, idx);
restore_volatile_registers(sasm); // Restore addr_card.
__ z_bru(restart);
}
#undef __
#endif // COMPILER1

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -33,9 +33,7 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
class G1PreBarrierStubC2;
class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@ -60,10 +58,16 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
public:
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
void g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2);
#endif // COMPILER1
#ifdef COMPILER2
@ -81,9 +85,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
Register thread,
Register tmp1,
Register tmp2,
G1PostBarrierStubC2* c2_stub);
void generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const;
bool new_val_may_be_null);
#endif // COMPILER2
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright 2024 IBM Corporation. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
@ -62,13 +62,13 @@ static void write_barrier_post(MacroAssembler* masm,
Register new_val,
Register tmp1,
Register tmp2) {
if (!G1PostBarrierStubC2::needs_barrier(node)) {
if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, stub);
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, new_val_may_be_null);
}
%} // source

View File

@ -89,19 +89,53 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm
void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
Register addr, Register count, Register tmp) {
__ push_call_clobbered_registers(false /* save_fpu */);
if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
assert_different_registers(c_rarg1, addr);
__ mov(c_rarg1, count);
__ mov(c_rarg0, addr);
} else {
assert_different_registers(c_rarg0, count);
__ mov(c_rarg0, addr);
__ mov(c_rarg1, count);
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
__ pop_call_clobbered_registers(false /* save_fpu */);
Label done;
__ testptr(count, count);
__ jcc(Assembler::zero, done);
// Calculate end address in "count".
Address::ScaleFactor scale = UseCompressedOops ? Address::times_4 : Address::times_8;
__ leaq(count, Address(addr, count, scale));
// Calculate start card address in "addr".
__ shrptr(addr, CardTable::card_shift());
Register thread = r15_thread;
__ movptr(tmp, Address(thread, in_bytes(G1ThreadLocalData::card_table_base_offset())));
__ addptr(addr, tmp);
// Calculate address of card of last word in the array.
__ subptr(count, 1);
__ shrptr(count, CardTable::card_shift());
__ addptr(count, tmp);
Label loop;
// Iterate from start card to end card (inclusive).
__ bind(loop);
Label is_clean_card;
if (UseCondCardMark) {
__ cmpb(Address(addr, 0), G1CardTable::clean_card_val());
__ jcc(Assembler::equal, is_clean_card);
} else {
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
}
Label next_card;
__ bind(next_card);
__ addptr(addr, sizeof(CardTable::CardValue));
__ cmpptr(addr, count);
__ jcc(Assembler::belowEqual, loop);
__ jmp(done);
__ bind(is_clean_card);
// Card was clean. Dirty card and go to next..
__ movb(Address(addr, 0), G1CardTable::dirty_card_val());
__ jmp(next_card);
__ bind(done);
}
void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
@ -182,7 +216,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
// If expand_call is true then we expand the call_VM_leaf macro
// directly to skip generating the check by
// InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
const Register thread = r15_thread;
Label done;
@ -238,73 +271,46 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
static void generate_post_barrier_fast_path(MacroAssembler* masm,
const Register store_addr,
const Register new_val,
const Register tmp,
const Register tmp2,
const Register tmp1,
Label& done,
bool new_val_may_be_null) {
CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
assert_different_registers(store_addr, new_val, tmp1, noreg);
Register thread = r15_thread;
// Does store cross heap regions?
__ movptr(tmp, store_addr); // tmp := store address
__ xorptr(tmp, new_val); // tmp := store address ^ new value
__ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
__ movptr(tmp1, store_addr); // tmp1 := store address
__ xorptr(tmp1, new_val); // tmp1 := store address ^ new value
__ shrptr(tmp1, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0?
__ jcc(Assembler::equal, done);
// Crosses regions, storing null?
if (new_val_may_be_null) {
__ cmpptr(new_val, NULL_WORD); // new value == null?
__ cmpptr(new_val, NULL_WORD); // new value == null?
__ jcc(Assembler::equal, done);
}
// Storing region crossing non-null, is card young?
__ movptr(tmp, store_addr); // tmp := store address
__ shrptr(tmp, CardTable::card_shift()); // tmp := card address relative to card table base
// Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
// a valid address and therefore is not properly handled by the relocation code.
__ movptr(tmp2, (intptr_t)ct->card_table()->byte_map_base()); // tmp2 := card table base address
__ addptr(tmp, tmp2); // tmp := card address
__ cmpb(Address(tmp, 0), G1CardTable::g1_young_card_val()); // *(card address) == young_card_val?
}
static void generate_post_barrier_slow_path(MacroAssembler* masm,
const Register thread,
const Register tmp,
const Register tmp2,
Label& done,
Label& runtime) {
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); // StoreLoad membar
__ cmpb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) == dirty_card_val?
__ jcc(Assembler::equal, done);
__ movptr(tmp1, store_addr); // tmp1 := store address
__ shrptr(tmp1, CardTable::card_shift()); // tmp1 := card address relative to card table base
Address card_table_addr(thread, in_bytes(G1ThreadLocalData::card_table_base_offset()));
__ addptr(tmp1, card_table_addr); // tmp1 := card address
if (UseCondCardMark) {
__ cmpb(Address(tmp1, 0), G1CardTable::clean_card_val()); // *(card address) == clean_card_val?
__ jcc(Assembler::notEqual, done);
}
// Storing a region crossing, non-null oop, card is clean.
// Dirty card and log.
__ movb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
generate_queue_insertion(masm,
G1ThreadLocalData::dirty_card_queue_index_offset(),
G1ThreadLocalData::dirty_card_queue_buffer_offset(),
runtime,
thread, tmp, tmp2);
__ jmp(done);
// Dirty card.
__ movb(Address(tmp1, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val
}
void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp,
Register tmp2) {
const Register thread = r15_thread;
Register tmp) {
Label done;
Label runtime;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, done, true /* new_val_may_be_null */);
// If card is young, jump to done
__ jcc(Assembler::equal, done);
generate_post_barrier_slow_path(masm, thread, tmp, tmp2, done, runtime);
__ bind(runtime);
// save the live input values
RegSet saved = RegSet::of(store_addr);
__ push_set(saved);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread);
__ pop_set(saved);
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, done, true /* new_val_may_be_null */);
__ bind(done);
}
@ -367,34 +373,10 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp,
Register tmp2,
G1PostBarrierStubC2* stub) {
const Register thread = r15_thread;
stub->initialize_registers(thread, tmp, tmp2);
bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, *stub->continuation(), new_val_may_be_null);
// If card is not young, jump to stub (slow path)
__ jcc(Assembler::notEqual, *stub->entry());
__ bind(*stub->continuation());
}
void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const {
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
Label runtime;
Register thread = stub->thread();
Register tmp = stub->tmp1(); // tmp holds the card address.
Register tmp2 = stub->tmp2();
assert(stub->tmp3() == noreg, "not needed in this platform");
__ bind(*stub->entry());
generate_post_barrier_slow_path(masm, thread, tmp, tmp2, *stub->continuation(), runtime);
__ bind(runtime);
generate_c2_barrier_runtime_call(masm, stub, tmp, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry));
__ jmp(*stub->continuation());
bool new_val_may_be_null) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, done, new_val_may_be_null);
__ bind(done);
}
#endif // COMPILER2
@ -441,8 +423,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco
g1_write_barrier_post(masm /*masm*/,
tmp1 /* store_adr */,
new_val /* new_val */,
tmp3 /* tmp */,
tmp2 /* tmp2 */);
tmp3 /* tmp */);
}
}
}
@ -476,21 +457,19 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier
}
void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
__ bind(*stub->entry());
assert(stub->addr()->is_register(), "Precondition.");
assert(stub->new_val()->is_register(), "Precondition.");
Register new_val_reg = stub->new_val()->as_register();
__ cmpptr(new_val_reg, NULL_WORD);
__ jcc(Assembler::equal, *stub->continuation());
ce->store_parameter(stub->addr()->as_pointer_register(), 0);
__ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
__ jmp(*stub->continuation());
}
#undef __
void G1BarrierSetAssembler::g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2 /* unused on x86 */) {
Label done;
generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, done, true /* new_val_may_be_null */);
masm->bind(done);
}
#define __ sasm->
void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
@ -555,78 +534,6 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler*
__ epilogue();
}
void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
__ prologue("g1_post_barrier", false);
CardTableBarrierSet* ct =
barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
Label done;
Label enqueued;
Label runtime;
// At this point we know new_value is non-null and the new_value crosses regions.
// Must check to see if card is already dirty
const Register thread = r15_thread;
Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
__ push_ppx(rax);
__ push_ppx(rcx);
const Register cardtable = rax;
const Register card_addr = rcx;
__ load_parameter(0, card_addr);
__ shrptr(card_addr, CardTable::card_shift());
// Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
// a valid address and therefore is not properly handled by the relocation code.
__ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
__ addptr(card_addr, cardtable);
__ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val());
__ jcc(Assembler::equal, done);
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
__ cmpb(Address(card_addr, 0), CardTable::dirty_card_val());
__ jcc(Assembler::equal, done);
// storing region crossing non-null, card is clean.
// dirty card and log.
__ movb(Address(card_addr, 0), CardTable::dirty_card_val());
const Register tmp = rdx;
__ push_ppx(rdx);
__ movptr(tmp, queue_index);
__ testptr(tmp, tmp);
__ jcc(Assembler::zero, runtime);
__ subptr(tmp, wordSize);
__ movptr(queue_index, tmp);
__ addptr(tmp, buffer);
__ movptr(Address(tmp, 0), card_addr);
__ jmp(enqueued);
__ bind(runtime);
__ push_call_clobbered_registers();
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
__ pop_call_clobbered_registers();
__ bind(enqueued);
__ pop_ppx(rdx);
__ bind(done);
__ pop_ppx(rcx);
__ pop_ppx(rax);
__ epilogue();
}
#undef __
#endif // COMPILER1

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -31,10 +31,8 @@
class LIR_Assembler;
class StubAssembler;
class G1PreBarrierStub;
class G1PostBarrierStub;
class G1BarrierStubC2;
class G1PreBarrierStubC2;
class G1PostBarrierStubC2;
class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
protected:
@ -51,22 +49,28 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
void g1_write_barrier_post(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register tmp,
Register tmp2);
Register tmp);
virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
public:
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
Register dst, Address src, Register tmp1);
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
void g1_write_barrier_post_c1(MacroAssembler* masm,
Register store_addr,
Register new_val,
Register thread,
Register tmp1,
Register tmp2);
#endif
#ifdef COMPILER2
void g1_write_barrier_pre_c2(MacroAssembler* masm,
Register obj,
@ -79,10 +83,7 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
Register store_addr,
Register new_val,
Register tmp,
Register tmp2,
G1PostBarrierStubC2* c2_stub);
void generate_c2_post_barrier_stub(MacroAssembler* masm,
G1PostBarrierStubC2* stub) const;
bool new_val_may_be_null);
#endif // COMPILER2
};

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -59,15 +59,14 @@ static void write_barrier_post(MacroAssembler* masm,
const MachNode* node,
Register store_addr,
Register new_val,
Register tmp1,
Register tmp2) {
if (!G1PostBarrierStubC2::needs_barrier(node)) {
Register tmp1) {
if (!G1BarrierStubC2::needs_post_barrier(node)) {
return;
}
Assembler::InlineSkippedInstructionsCounter skip_counter(masm);
G1BarrierSetAssembler* g1_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub);
bool new_val_may_be_null = G1BarrierStubC2::post_new_val_may_be_null(node);
g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, new_val_may_be_null);
}
%}
@ -95,8 +94,7 @@ instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3,
write_barrier_post(masm, this,
$tmp1$$Register /* store_addr */,
$src$$Register /* new_val */,
$tmp3$$Register /* tmp1 */,
$tmp2$$Register /* tmp2 */);
$tmp3$$Register /* tmp1 */);
%}
ins_pipe(ialu_mem_reg);
%}
@ -127,8 +125,7 @@ instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFl
write_barrier_post(masm, this,
$tmp1$$Register /* store_addr */,
$tmp2$$Register /* new_val */,
$tmp3$$Register /* tmp1 */,
$tmp2$$Register /* tmp2 */);
$tmp3$$Register /* tmp1 */);
%}
ins_pipe(ialu_mem_reg);
%}
@ -158,8 +155,7 @@ instruct g1EncodePAndStoreN(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rR
write_barrier_post(masm, this,
$tmp1$$Register /* store_addr */,
$src$$Register /* new_val */,
$tmp3$$Register /* tmp1 */,
$tmp2$$Register /* tmp2 */);
$tmp3$$Register /* tmp1 */);
%}
ins_pipe(ialu_mem_reg);
%}
@ -187,8 +183,7 @@ instruct g1CompareAndExchangeP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
$tmp2$$Register /* tmp1 */,
$tmp3$$Register /* tmp2 */);
$tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@ -214,8 +209,7 @@ instruct g1CompareAndExchangeN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
$tmp2$$Register /* tmp1 */,
$tmp3$$Register /* tmp2 */);
$tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@ -246,8 +240,7 @@ instruct g1CompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp1, rR
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
$tmp2$$Register /* tmp1 */,
$tmp3$$Register /* tmp2 */);
$tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@ -279,8 +272,7 @@ instruct g1CompareAndSwapN(rRegI res, indirect mem, rRegN newval, rRegP tmp1, rR
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
$tmp2$$Register /* tmp1 */,
$tmp3$$Register /* tmp2 */);
$tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@ -303,8 +295,7 @@ instruct g1GetAndSetP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
$tmp2$$Register /* tmp1 */,
$tmp3$$Register /* tmp2 */);
$tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}
@ -328,8 +319,7 @@ instruct g1GetAndSetN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP
write_barrier_post(masm, this,
$mem$$Register /* store_addr */,
$tmp1$$Register /* new_val */,
$tmp2$$Register /* tmp1 */,
$tmp3$$Register /* tmp2 */);
$tmp2$$Register /* tmp1 */);
%}
ins_pipe(pipe_cmpxchg);
%}

View File

@ -1365,7 +1365,6 @@ void AOTCodeAddressTable::init_extrs() {
#endif // COMPILER2
#if INCLUDE_G1GC
SET_ADDRESS(_extrs, G1BarrierSetRuntime::write_ref_field_post_entry);
SET_ADDRESS(_extrs, G1BarrierSetRuntime::write_ref_field_pre_entry);
#endif
#if INCLUDE_SHENANDOAHGC

View File

@ -23,12 +23,15 @@
*/
#include "c1/c1_CodeStubs.hpp"
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_LIRGenerator.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/g1/c1/g1BarrierSetC1.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1BarrierSetAssembler.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
#ifdef ASSERT
@ -42,11 +45,6 @@ void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
bs->gen_pre_barrier_stub(ce, this);
}
void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
G1BarrierSetAssembler* bs = (G1BarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
bs->gen_post_barrier_stub(ce, this);
}
void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
LIR_Opr pre_val, CodeEmitInfo* info) {
LIRGenerator* gen = access.gen();
@ -114,6 +112,87 @@ void G1BarrierSetC1::pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
__ branch_destination(slow->continuation());
}
class LIR_OpG1PostBarrier : public LIR_Op {
friend class LIR_OpVisitState;
private:
LIR_Opr _addr;
LIR_Opr _new_val;
LIR_Opr _thread;
LIR_Opr _tmp1;
LIR_Opr _tmp2;
public:
LIR_OpG1PostBarrier(LIR_Opr addr,
LIR_Opr new_val,
LIR_Opr thread,
LIR_Opr tmp1,
LIR_Opr tmp2)
: LIR_Op(lir_none, lir_none, nullptr),
_addr(addr),
_new_val(new_val),
_thread(thread),
_tmp1(tmp1),
_tmp2(tmp2)
{}
virtual void visit(LIR_OpVisitState* state) {
state->do_input(_addr);
state->do_input(_new_val);
state->do_input(_thread);
// Use temps to enforce different registers.
state->do_temp(_addr);
state->do_temp(_new_val);
state->do_temp(_thread);
state->do_temp(_tmp1);
state->do_temp(_tmp2);
if (_info != nullptr) {
state->do_info(_info);
}
}
virtual void emit_code(LIR_Assembler* ce) {
if (_info != nullptr) {
ce->add_debug_info_for_null_check_here(_info);
}
Register addr = _addr->as_pointer_register();
Register new_val = _new_val->as_pointer_register();
Register thread = _thread->as_pointer_register();
Register tmp1 = _tmp1->as_pointer_register();
Register tmp2 = _tmp2->as_pointer_register();
// This may happen for a store of x.a = x - we do not need a post barrier for those
// as the cross-region test will always exit early anyway.
// The post barrier implementations can assume that addr and new_val are different
// then.
if (addr == new_val) {
ce->masm()->block_comment(err_msg("same addr/new_val due to self-referential store with imprecise card mark %s", addr->name()));
return;
}
G1BarrierSetAssembler* bs_asm = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
bs_asm->g1_write_barrier_post_c1(ce->masm(), addr, new_val, thread, tmp1, tmp2);
}
virtual void print_instr(outputStream* out) const {
_addr->print(out); out->print(" ");
_new_val->print(out); out->print(" ");
_thread->print(out); out->print(" ");
_tmp1->print(out); out->print(" ");
_tmp2->print(out); out->print(" ");
out->cr();
}
#ifndef PRODUCT
virtual const char* name() const {
return "lir_g1_post_barrier";
}
#endif // PRODUCT
};
void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_val) {
LIRGenerator* gen = access.gen();
DecoratorSet decorators = access.decorators();
@ -150,29 +229,11 @@ void G1BarrierSetC1::post_barrier(LIRAccess& access, LIR_Opr addr, LIR_Opr new_v
}
assert(addr->is_register(), "must be a register at this point");
LIR_Opr xor_res = gen->new_pointer_register();
LIR_Opr xor_shift_res = gen->new_pointer_register();
if (two_operand_lir_form) {
__ move(addr, xor_res);
__ logical_xor(xor_res, new_val, xor_res);
__ move(xor_res, xor_shift_res);
__ unsigned_shift_right(xor_shift_res,
LIR_OprFact::intConst(checked_cast<jint>(G1HeapRegion::LogOfHRGrainBytes)),
xor_shift_res,
LIR_Opr::illegalOpr());
} else {
__ logical_xor(addr, new_val, xor_res);
__ unsigned_shift_right(xor_res,
LIR_OprFact::intConst(checked_cast<jint>(G1HeapRegion::LogOfHRGrainBytes)),
xor_shift_res,
LIR_Opr::illegalOpr());
}
__ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
CodeStub* slow = new G1PostBarrierStub(addr, new_val);
__ branch(lir_cond_notEqual, slow);
__ branch_destination(slow->continuation());
__ append(new LIR_OpG1PostBarrier(addr,
new_val,
gen->getThreadPointer() /* thread */,
gen->new_pointer_register() /* tmp1 */,
gen->new_pointer_register() /* tmp2 */));
}
void G1BarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) {
@ -207,20 +268,9 @@ class C1G1PreBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
}
};
class C1G1PostBarrierCodeGenClosure : public StubAssemblerCodeGenClosure {
virtual OopMapSet* generate_code(StubAssembler* sasm) {
G1BarrierSetAssembler* bs = (G1BarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
bs->generate_c1_post_barrier_runtime_stub(sasm);
return nullptr;
}
};
bool G1BarrierSetC1::generate_c1_runtime_stubs(BufferBlob* buffer_blob) {
C1G1PreBarrierCodeGenClosure pre_code_gen_cl;
C1G1PostBarrierCodeGenClosure post_code_gen_cl;
_pre_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, StubId::NO_STUBID, "g1_pre_barrier_slow",
false, &pre_code_gen_cl);
_post_barrier_c1_runtime_code_blob = Runtime1::generate_blob(buffer_blob, StubId::NO_STUBID, "g1_post_barrier_slow",
false, &post_code_gen_cl);
return _pre_barrier_c1_runtime_code_blob != nullptr && _post_barrier_c1_runtime_code_blob != nullptr;
return _pre_barrier_c1_runtime_code_blob != nullptr;
}

View File

@ -91,40 +91,11 @@ class G1PreBarrierStub: public CodeStub {
#endif // PRODUCT
};
class G1PostBarrierStub: public CodeStub {
friend class G1BarrierSetC1;
private:
LIR_Opr _addr;
LIR_Opr _new_val;
public:
// addr (the address of the object head) and new_val must be registers.
G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) {
FrameMap* f = Compilation::current()->frame_map();
f->update_reserved_argument_area_size(2 * BytesPerWord);
}
LIR_Opr addr() const { return _addr; }
LIR_Opr new_val() const { return _new_val; }
virtual void emit_code(LIR_Assembler* e);
virtual void visit(LIR_OpVisitState* visitor) {
// don't pass in the code emit info since it's processed in the fast path
visitor->do_slow_case();
visitor->do_input(_addr);
visitor->do_input(_new_val);
}
#ifndef PRODUCT
virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
#endif // PRODUCT
};
class CodeBlob;
class G1BarrierSetC1 : public ModRefBarrierSetC1 {
protected:
CodeBlob* _pre_barrier_c1_runtime_code_blob;
CodeBlob* _post_barrier_c1_runtime_code_blob;
virtual void pre_barrier(LIRAccess& access, LIR_Opr addr_opr,
LIR_Opr pre_val, CodeEmitInfo* info);
@ -134,11 +105,9 @@ class G1BarrierSetC1 : public ModRefBarrierSetC1 {
public:
G1BarrierSetC1()
: _pre_barrier_c1_runtime_code_blob(nullptr),
_post_barrier_c1_runtime_code_blob(nullptr) {}
: _pre_barrier_c1_runtime_code_blob(nullptr) {}
CodeBlob* pre_barrier_c1_runtime_code_blob() { return _pre_barrier_c1_runtime_code_blob; }
CodeBlob* post_barrier_c1_runtime_code_blob() { return _post_barrier_c1_runtime_code_blob; }
virtual bool generate_c1_runtime_stubs(BufferBlob* buffer_blob);
};

View File

@ -298,7 +298,13 @@ uint G1BarrierSetC2::estimated_barrier_size(const Node* node) const {
nodes += 6;
}
if ((barrier_data & G1C2BarrierPost) != 0) {
nodes += 60;
// Approximate the number of nodes needed; an if costs 4 nodes (Cmp, Bool,
// If, If projection), any other (Assembly) instruction is approximated with
// a cost of 1.
nodes += 4 // base cost for the card write containing getting base offset, address calculation and the card write;
+ 6 // same region check: Uncompress (new_val) oop, xor, shr, (cmp), jmp
+ 4 // new_val is null check
+ (UseCondCardMark ? 4 : 0); // card not clean check.
}
return nodes;
}
@ -386,8 +392,9 @@ public:
}
bool needs_liveness_data(const MachNode* mach) const {
return G1PreBarrierStubC2::needs_barrier(mach) ||
G1PostBarrierStubC2::needs_barrier(mach);
// Liveness data is only required to compute registers that must be preserved
// across the runtime call in the pre-barrier stub.
return G1BarrierStubC2::needs_pre_barrier(mach);
}
bool needs_livein_data() const {
@ -401,10 +408,22 @@ static G1BarrierSetC2State* barrier_set_state() {
G1BarrierStubC2::G1BarrierStubC2(const MachNode* node) : BarrierStubC2(node) {}
bool G1BarrierStubC2::needs_pre_barrier(const MachNode* node) {
return (node->barrier_data() & G1C2BarrierPre) != 0;
}
bool G1BarrierStubC2::needs_post_barrier(const MachNode* node) {
return (node->barrier_data() & G1C2BarrierPost) != 0;
}
bool G1BarrierStubC2::post_new_val_may_be_null(const MachNode* node) {
return (node->barrier_data() & G1C2BarrierPostNotNull) == 0;
}
G1PreBarrierStubC2::G1PreBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
bool G1PreBarrierStubC2::needs_barrier(const MachNode* node) {
return (node->barrier_data() & G1C2BarrierPre) != 0;
return needs_pre_barrier(node);
}
G1PreBarrierStubC2* G1PreBarrierStubC2::create(const MachNode* node) {
@ -448,48 +467,6 @@ void G1PreBarrierStubC2::emit_code(MacroAssembler& masm) {
bs->generate_c2_pre_barrier_stub(&masm, this);
}
G1PostBarrierStubC2::G1PostBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {}
bool G1PostBarrierStubC2::needs_barrier(const MachNode* node) {
return (node->barrier_data() & G1C2BarrierPost) != 0;
}
G1PostBarrierStubC2* G1PostBarrierStubC2::create(const MachNode* node) {
G1PostBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PostBarrierStubC2(node);
if (!Compile::current()->output()->in_scratch_emit_size()) {
barrier_set_state()->stubs()->append(stub);
}
return stub;
}
void G1PostBarrierStubC2::initialize_registers(Register thread, Register tmp1, Register tmp2, Register tmp3) {
_thread = thread;
_tmp1 = tmp1;
_tmp2 = tmp2;
_tmp3 = tmp3;
}
Register G1PostBarrierStubC2::thread() const {
return _thread;
}
Register G1PostBarrierStubC2::tmp1() const {
return _tmp1;
}
Register G1PostBarrierStubC2::tmp2() const {
return _tmp2;
}
Register G1PostBarrierStubC2::tmp3() const {
return _tmp3;
}
void G1PostBarrierStubC2::emit_code(MacroAssembler& masm) {
G1BarrierSetAssembler* bs = static_cast<G1BarrierSetAssembler*>(BarrierSet::barrier_set()->barrier_set_assembler());
bs->generate_c2_post_barrier_stub(&masm, this);
}
void* G1BarrierSetC2::create_barrier_state(Arena* comp_arena) const {
return new (comp_arena) G1BarrierSetC2State(comp_arena);
}

View File

@ -37,6 +37,10 @@ const int G1C2BarrierPostNotNull = 4;
class G1BarrierStubC2 : public BarrierStubC2 {
public:
static bool needs_pre_barrier(const MachNode* node);
static bool needs_post_barrier(const MachNode* node);
static bool post_new_val_may_be_null(const MachNode* node);
G1BarrierStubC2(const MachNode* node);
virtual void emit_code(MacroAssembler& masm) = 0;
};
@ -64,27 +68,6 @@ public:
virtual void emit_code(MacroAssembler& masm);
};
class G1PostBarrierStubC2 : public G1BarrierStubC2 {
private:
Register _thread;
Register _tmp1;
Register _tmp2;
Register _tmp3;
protected:
G1PostBarrierStubC2(const MachNode* node);
public:
static bool needs_barrier(const MachNode* node);
static G1PostBarrierStubC2* create(const MachNode* node);
void initialize_registers(Register thread, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg);
Register thread() const;
Register tmp1() const;
Register tmp2() const;
Register tmp3() const;
virtual void emit_code(MacroAssembler& masm);
};
class G1BarrierSetC2: public CardTableBarrierSetC2 {
private:
void analyze_dominating_barriers() const;

View File

@ -262,9 +262,6 @@ HeapWord* G1Allocator::survivor_attempt_allocation(uint node_index,
}
}
}
if (result != nullptr) {
_g1h->dirty_young_block(result, *actual_word_size);
}
return result;
}

View File

@ -37,12 +37,10 @@
// They were chosen by running GCOld and SPECjbb on debris with different
// numbers of GC threads and choosing them based on the results
static double cost_per_logged_card_ms_defaults[] = {
0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
};
static double cost_per_pending_card_ms_default = 0.01;
// all the same
static double young_card_scan_to_merge_ratio_defaults[] = {
static double young_card_merge_to_scan_ratio_defaults[] = {
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
};
@ -78,8 +76,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_concurrent_gc_cpu_time_ms(),
_concurrent_refine_rate_ms_seq(TruncatedSeqLength),
_dirtied_cards_rate_ms_seq(TruncatedSeqLength),
_dirtied_cards_in_thread_buffers_seq(TruncatedSeqLength),
_card_scan_to_merge_ratio_seq(TruncatedSeqLength),
_card_merge_to_scan_ratio_seq(TruncatedSeqLength),
_cost_per_card_scan_ms_seq(TruncatedSeqLength),
_cost_per_card_merge_ms_seq(TruncatedSeqLength),
_cost_per_code_root_ms_seq(TruncatedSeqLength),
@ -87,6 +84,7 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
_pending_cards_seq(TruncatedSeqLength),
_card_rs_length_seq(TruncatedSeqLength),
_code_root_rs_length_seq(TruncatedSeqLength),
_merge_refinement_table_ms_seq(TruncatedSeqLength),
_constant_other_time_ms_seq(TruncatedSeqLength),
_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
_non_young_other_cost_per_region_ms_seq(TruncatedSeqLength),
@ -100,17 +98,17 @@ G1Analytics::G1Analytics(const G1Predictions* predictor) :
uint index = MIN2(ParallelGCThreads - 1, 7u);
// Start with inverse of maximum STW cost.
_concurrent_refine_rate_ms_seq.add(1/cost_per_logged_card_ms_defaults[0]);
// Some applications have very low rates for logging cards.
_concurrent_refine_rate_ms_seq.add(1 / cost_per_pending_card_ms_default);
// Some applications have very low rates for dirtying cards.
_dirtied_cards_rate_ms_seq.add(0.0);
_card_scan_to_merge_ratio_seq.set_initial(young_card_scan_to_merge_ratio_defaults[index]);
_card_merge_to_scan_ratio_seq.set_initial(young_card_merge_to_scan_ratio_defaults[index]);
_cost_per_card_scan_ms_seq.set_initial(young_only_cost_per_card_scan_ms_defaults[index]);
_card_rs_length_seq.set_initial(0);
_code_root_rs_length_seq.set_initial(0);
_cost_per_byte_copied_ms_seq.set_initial(cost_per_byte_ms_defaults[index]);
_merge_refinement_table_ms_seq.add(0);
_constant_other_time_ms_seq.add(constant_other_time_ms_defaults[index]);
_young_other_cost_per_region_ms_seq.add(young_other_cost_per_region_ms_defaults[index]);
_non_young_other_cost_per_region_ms_seq.add(non_young_other_cost_per_region_ms_defaults[index]);
@ -196,10 +194,6 @@ void G1Analytics::report_dirtied_cards_rate_ms(double cards_per_ms) {
_dirtied_cards_rate_ms_seq.add(cards_per_ms);
}
void G1Analytics::report_dirtied_cards_in_thread_buffers(size_t cards) {
_dirtied_cards_in_thread_buffers_seq.add(double(cards));
}
void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_only_phase) {
_cost_per_card_scan_ms_seq.add(cost_per_card_ms, for_young_only_phase);
}
@ -212,8 +206,8 @@ void G1Analytics::report_cost_per_code_root_scan_ms(double cost_per_code_root_ms
_cost_per_code_root_ms_seq.add(cost_per_code_root_ms, for_young_only_phase);
}
void G1Analytics::report_card_scan_to_merge_ratio(double merge_to_scan_ratio, bool for_young_only_phase) {
_card_scan_to_merge_ratio_seq.add(merge_to_scan_ratio, for_young_only_phase);
void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_only_phase) {
_card_merge_to_scan_ratio_seq.add(merge_to_scan_ratio, for_young_only_phase);
}
void G1Analytics::report_cost_per_byte_ms(double cost_per_byte_ms, bool for_young_only_phase) {
@ -228,6 +222,10 @@ void G1Analytics::report_non_young_other_cost_per_region_ms(double other_cost_pe
_non_young_other_cost_per_region_ms_seq.add(other_cost_per_region_ms);
}
void G1Analytics::report_merge_refinement_table_time_ms(double merge_refinement_table_time_ms) {
_merge_refinement_table_ms_seq.add(merge_refinement_table_time_ms);
}
void G1Analytics::report_constant_other_time_ms(double constant_other_time_ms) {
_constant_other_time_ms_seq.add(constant_other_time_ms);
}
@ -260,12 +258,8 @@ double G1Analytics::predict_dirtied_cards_rate_ms() const {
return predict_zero_bounded(&_dirtied_cards_rate_ms_seq);
}
size_t G1Analytics::predict_dirtied_cards_in_thread_buffers() const {
return predict_size(&_dirtied_cards_in_thread_buffers_seq);
}
size_t G1Analytics::predict_scan_card_num(size_t card_rs_length, bool for_young_only_phase) const {
return card_rs_length * predict_in_unit_interval(&_card_scan_to_merge_ratio_seq, for_young_only_phase);
return card_rs_length * predict_in_unit_interval(&_card_merge_to_scan_ratio_seq, for_young_only_phase);
}
double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_only_phase) const {
@ -284,6 +278,10 @@ double G1Analytics::predict_object_copy_time_ms(size_t bytes_to_copy, bool for_y
return bytes_to_copy * predict_zero_bounded(&_cost_per_byte_copied_ms_seq, for_young_only_phase);
}
double G1Analytics::predict_merge_refinement_table_time_ms() const {
return predict_zero_bounded(&_merge_refinement_table_ms_seq);
}
double G1Analytics::predict_constant_other_time_ms() const {
return predict_zero_bounded(&_constant_other_time_ms_seq);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -56,14 +56,13 @@ class G1Analytics: public CHeapObj<mtGC> {
TruncatedSeq _concurrent_refine_rate_ms_seq;
TruncatedSeq _dirtied_cards_rate_ms_seq;
TruncatedSeq _dirtied_cards_in_thread_buffers_seq;
// The ratio between the number of scanned cards and actually merged cards, for
// young-only and mixed gcs.
G1PhaseDependentSeq _card_scan_to_merge_ratio_seq;
// The ratio between the number of merged cards to actually scanned cards for
// card based remembered sets, for young-only and mixed gcs.
G1PhaseDependentSeq _card_merge_to_scan_ratio_seq;
// The cost to scan a card during young-only and mixed gcs in ms.
G1PhaseDependentSeq _cost_per_card_scan_ms_seq;
// The cost to merge a card during young-only and mixed gcs in ms.
// The cost to merge a card from the remembered sets for non-young regions in ms.
G1PhaseDependentSeq _cost_per_card_merge_ms_seq;
// The cost to scan entries in the code root remembered set in ms.
G1PhaseDependentSeq _cost_per_code_root_ms_seq;
@ -74,6 +73,8 @@ class G1Analytics: public CHeapObj<mtGC> {
G1PhaseDependentSeq _card_rs_length_seq;
G1PhaseDependentSeq _code_root_rs_length_seq;
// Prediction for merging the refinement table to the card table during GC.
TruncatedSeq _merge_refinement_table_ms_seq;
TruncatedSeq _constant_other_time_ms_seq;
TruncatedSeq _young_other_cost_per_region_ms_seq;
TruncatedSeq _non_young_other_cost_per_region_ms_seq;
@ -149,14 +150,14 @@ public:
void report_alloc_rate_ms(double alloc_rate);
void report_concurrent_refine_rate_ms(double cards_per_ms);
void report_dirtied_cards_rate_ms(double cards_per_ms);
void report_dirtied_cards_in_thread_buffers(size_t num_cards);
void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_only_phase);
void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_only_phase);
void report_cost_per_code_root_scan_ms(double cost_per_code_root_ms, bool for_young_only_phase);
void report_card_scan_to_merge_ratio(double cards_per_entry_ratio, bool for_young_only_phase);
void report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_only_phase);
void report_cost_per_byte_ms(double cost_per_byte_ms, bool for_young_only_phase);
void report_young_other_cost_per_region_ms(double other_cost_per_region_ms);
void report_non_young_other_cost_per_region_ms(double other_cost_per_region_ms);
void report_merge_refinement_table_time_ms(double pending_card_merge_time_ms);
void report_constant_other_time_ms(double constant_other_time_ms);
void report_pending_cards(double pending_cards, bool for_young_only_phase);
void report_card_rs_length(double card_rs_length, bool for_young_only_phase);
@ -167,7 +168,6 @@ public:
double predict_concurrent_refine_rate_ms() const;
double predict_dirtied_cards_rate_ms() const;
size_t predict_dirtied_cards_in_thread_buffers() const;
// Predict how many of the given remembered set of length card_rs_length will add to
// the number of total cards scanned.
@ -180,6 +180,7 @@ public:
double predict_object_copy_time_ms(size_t bytes_to_copy, bool for_young_only_phase) const;
double predict_merge_refinement_table_time_ms() const;
double predict_constant_other_time_ms() const;
double predict_young_other_time_ms(size_t young_num) const;

View File

@ -68,6 +68,12 @@ void G1Arguments::initialize_alignments() {
if (FLAG_IS_DEFAULT(G1EagerReclaimRemSetThreshold)) {
FLAG_SET_ERGO(G1EagerReclaimRemSetThreshold, G1RemSetArrayOfCardsEntries);
}
// G1 prefers to use conditional card marking to avoid overwriting cards that
// have already been found to contain a to-collection set reference. This reduces
// refinement effort.
if (FLAG_IS_DEFAULT(UseCondCardMark)) {
FLAG_SET_ERGO(UseCondCardMark, true);
}
}
size_t G1Arguments::conservative_max_heap_alignment() {
@ -241,9 +247,8 @@ void G1Arguments::initialize() {
// Verify that the maximum parallelism isn't too high to eventually overflow
// the refcount in G1CardSetContainer.
uint max_parallel_refinement_threads = G1ConcRefinementThreads + G1DirtyCardQueueSet::num_par_ids();
uint const divisor = 3; // Safe divisor; we increment by 2 for each claim, but there is a small initial value.
if (max_parallel_refinement_threads > UINT_MAX / divisor) {
if (G1ConcRefinementThreads > UINT_MAX / divisor) {
vm_exit_during_initialization("Too large parallelism for remembered sets.");
}

View File

@ -32,12 +32,14 @@
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/shared/satbMarkQueue.hpp"
#include "logging/log.hpp"
#include "memory/iterator.hpp"
#include "oops/access.inline.hpp"
#include "oops/compressedOops.inline.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/threads.hpp"
#include "utilities/macros.hpp"
#ifdef COMPILER1
#include "gc/g1/c1/g1BarrierSetC1.hpp"
@ -49,18 +51,38 @@
class G1BarrierSetC1;
class G1BarrierSetC2;
G1BarrierSet::G1BarrierSet(G1CardTable* card_table) :
G1BarrierSet::G1BarrierSet(G1CardTable* card_table,
G1CardTable* refinement_table) :
CardTableBarrierSet(make_barrier_set_assembler<G1BarrierSetAssembler>(),
make_barrier_set_c1<G1BarrierSetC1>(),
make_barrier_set_c2<G1BarrierSetC2>(),
card_table,
BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)),
_satb_mark_queue_buffer_allocator("SATB Buffer Allocator", G1SATBBufferSize),
_dirty_card_queue_buffer_allocator("DC Buffer Allocator", G1UpdateBufferSize),
_satb_mark_queue_set(&_satb_mark_queue_buffer_allocator),
_dirty_card_queue_set(&_dirty_card_queue_buffer_allocator)
_refinement_table(refinement_table)
{}
G1BarrierSet::~G1BarrierSet() {
delete _refinement_table;
}
void G1BarrierSet::swap_global_card_table() {
G1CardTable* temp = static_cast<G1CardTable*>(_card_table);
_card_table = _refinement_table;
_refinement_table = temp;
}
void G1BarrierSet::update_card_table_base(Thread* thread) {
#ifdef ASSERT
{
ResourceMark rm;
assert(thread->is_Java_thread(), "may only update card table base of JavaThreads, not %s", thread->name());
}
#endif
G1ThreadLocalData::set_byte_map_base(thread, _card_table->byte_map_base());
}
template <class T> void
G1BarrierSet::write_ref_array_pre_work(T* dst, size_t count) {
G1SATBMarkQueueSet& queue_set = G1BarrierSet::satb_mark_queue_set();
@ -89,28 +111,14 @@ void G1BarrierSet::write_ref_array_pre(narrowOop* dst, size_t count, bool dest_u
}
}
void G1BarrierSet::write_ref_field_post_slow(volatile CardValue* byte) {
// In the slow path, we know a card is not young
assert(*byte != G1CardTable::g1_young_card_val(), "slow path invoked without filtering");
OrderAccess::storeload();
if (*byte != G1CardTable::dirty_card_val()) {
*byte = G1CardTable::dirty_card_val();
Thread* thr = Thread::current();
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thr);
G1BarrierSet::dirty_card_queue_set().enqueue(queue, byte);
}
}
void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
if (mr.is_empty()) {
return;
}
volatile CardValue* byte = _card_table->byte_for(mr.start());
CardValue* last_byte = _card_table->byte_for(mr.last());
// skip young gen cards
if (*byte == G1CardTable::g1_young_card_val()) {
// MemRegion should not span multiple regions for the young gen.
// Skip writes to young gen.
if (G1CollectedHeap::heap()->heap_region_containing(mr.start())->is_young()) {
// MemRegion should not span multiple regions for arrays in young gen.
DEBUG_ONLY(G1HeapRegion* containing_hr = G1CollectedHeap::heap()->heap_region_containing(mr.start());)
assert(containing_hr->is_young(), "it should be young");
assert(containing_hr->is_in(mr.start()), "it should contain start");
@ -118,16 +126,25 @@ void G1BarrierSet::write_region(JavaThread* thread, MemRegion mr) {
return;
}
OrderAccess::storeload();
// Enqueue if necessary.
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
// We need to make sure that we get the start/end byte information for the area
// to mark from the same card table to avoid getting confused in the mark loop
// further below - we might execute while the global card table is being switched.
//
// It does not matter which card table we write to: at worst we may write to the
// new card table (after the switching), which means that we will catch the
// marks next time.
// If we write to the old card table (after the switching, then the refinement
// table) the oncoming handshake will do the memory synchronization.
CardTable* card_table = AtomicAccess::load(&_card_table);
volatile CardValue* byte = card_table->byte_for(mr.start());
CardValue* last_byte = card_table->byte_for(mr.last());
// Dirty cards only if necessary.
for (; byte <= last_byte; byte++) {
CardValue bv = *byte;
assert(bv != G1CardTable::g1_young_card_val(), "Invalid card");
if (bv != G1CardTable::dirty_card_val()) {
if (bv == G1CardTable::clean_card_val()) {
*byte = G1CardTable::dirty_card_val();
qset.enqueue(queue, byte);
}
}
}
@ -148,14 +165,15 @@ void G1BarrierSet::on_thread_attach(Thread* thread) {
assert(!satbq.is_active(), "SATB queue should not be active");
assert(satbq.buffer() == nullptr, "SATB queue should not have a buffer");
assert(satbq.index() == 0, "SATB queue index should be zero");
G1DirtyCardQueue& dirtyq = G1ThreadLocalData::dirty_card_queue(thread);
assert(dirtyq.buffer() == nullptr, "Dirty Card queue should not have a buffer");
assert(dirtyq.index() == 0, "Dirty Card queue index should be zero");
// If we are creating the thread during a marking cycle, we should
// set the active field of the SATB queue to true. That involves
// copying the global is_active value to this thread's queue.
satbq.set_active(_satb_mark_queue_set.is_active());
if (thread->is_Java_thread()) {
assert(Threads_lock->is_locked(), "must be, synchronization with refinement.");
update_card_table_base(thread);
}
}
void G1BarrierSet::on_thread_detach(Thread* thread) {
@ -165,14 +183,13 @@ void G1BarrierSet::on_thread_detach(Thread* thread) {
SATBMarkQueue& queue = G1ThreadLocalData::satb_mark_queue(thread);
G1BarrierSet::satb_mark_queue_set().flush_queue(queue);
}
{
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
qset.flush_queue(queue);
qset.record_detached_refinement_stats(queue.refinement_stats());
}
{
G1RegionPinCache& cache = G1ThreadLocalData::pin_count_cache(thread);
cache.flush();
}
}
void G1BarrierSet::print_on(outputStream* st) const {
_card_table->print_on(st, "Card");
_refinement_table->print_on(st, "Refinement");
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,32 +25,65 @@
#ifndef SHARE_GC_G1_G1BARRIERSET_HPP
#define SHARE_GC_G1_G1BARRIERSET_HPP
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
#include "gc/shared/bufferNode.hpp"
#include "gc/shared/cardTable.hpp"
#include "gc/shared/cardTableBarrierSet.hpp"
class G1CardTable;
class Thread;
// This barrier is specialized to use a logging barrier to support
// snapshot-at-the-beginning marking.
// This barrier set is specialized to manage two card tables:
// * one the mutator is currently working on ("card table")
// * one the refinement threads or GC during pause are working on ("refinement table")
//
// The card table acts like a regular card table where the mutator dirties cards
// containing potentially interesting references.
//
// When the amount of dirty cards on the card table exceeds a threshold, G1 swaps
// the card tables and has the refinement threads reduce them by "refining"
// them.
// I.e. refinement looks at all dirty cards on the refinement table, and updates
// the remembered sets accordingly, clearing the cards on the refinement table.
//
// Meanwhile the mutator continues dirtying the now empty card table.
//
// This separation of data the mutator and refinement threads are working on
// removes the need for any fine-grained (per mutator write) synchronization between
// them, keeping the write barrier simple.
//
// The refinement threads mark cards in the current collection set specially on the
// card table - this is fine wrt synchronization with the mutator, because at
// most the mutator will overwrite it again if there is a race, as G1 will scan the
// entire card either way during the GC pause.
//
// During garbage collection, if the refinement table is known to be non-empty, G1
// merges it back (and cleaning it) to the card table which is scanned for dirty
// cards.
//
class G1BarrierSet: public CardTableBarrierSet {
friend class VMStructs;
private:
BufferNode::Allocator _satb_mark_queue_buffer_allocator;
BufferNode::Allocator _dirty_card_queue_buffer_allocator;
G1SATBMarkQueueSet _satb_mark_queue_set;
G1DirtyCardQueueSet _dirty_card_queue_set;
G1CardTable* _refinement_table;
public:
G1BarrierSet(G1CardTable* card_table, G1CardTable* refinement_table);
virtual ~G1BarrierSet();
static G1BarrierSet* g1_barrier_set() {
return barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
}
public:
G1BarrierSet(G1CardTable* table);
~G1BarrierSet() { }
G1CardTable* refinement_table() const { return _refinement_table; }
// Swap the global card table references, without synchronization.
void swap_global_card_table();
// Update the given thread's card table (byte map) base to the current card table's.
void update_card_table_base(Thread* thread);
virtual bool card_mark_must_follow_store() const {
return true;
@ -74,9 +107,8 @@ class G1BarrierSet: public CardTableBarrierSet {
inline void write_region(MemRegion mr);
void write_region(JavaThread* thread, MemRegion mr);
template <DecoratorSet decorators, typename T>
template <DecoratorSet decorators = DECORATORS_NONE, typename T>
void write_ref_field_post(T* field);
void write_ref_field_post_slow(volatile CardValue* byte);
virtual void on_thread_create(Thread* thread);
virtual void on_thread_destroy(Thread* thread);
@ -87,9 +119,7 @@ class G1BarrierSet: public CardTableBarrierSet {
return g1_barrier_set()->_satb_mark_queue_set;
}
static G1DirtyCardQueueSet& dirty_card_queue_set() {
return g1_barrier_set()->_dirty_card_queue_set;
}
virtual void print_on(outputStream* st) const;
// Callbacks for runtime accesses.
template <DecoratorSet decorators, typename BarrierSetT = G1BarrierSet>

View File

@ -75,9 +75,8 @@ inline void G1BarrierSet::write_region(MemRegion mr) {
template <DecoratorSet decorators, typename T>
inline void G1BarrierSet::write_ref_field_post(T* field) {
volatile CardValue* byte = _card_table->byte_for(field);
if (*byte != G1CardTable::g1_young_card_val()) {
// Take a slow path for cards in old
write_ref_field_post_slow(byte);
if (*byte == G1CardTable::clean_card_val()) {
*byte = G1CardTable::dirty_card_val();
}
}
@ -127,7 +126,7 @@ inline void G1BarrierSet::AccessBarrier<decorators, BarrierSetT>::
oop_store_not_in_heap(T* addr, oop new_value) {
// Apply SATB barriers for all non-heap references, to allow
// concurrent scanning of such references.
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
G1BarrierSet *bs = g1_barrier_set();
bs->write_ref_field_pre<decorators>(addr);
Raw::oop_store(addr, new_value);
}

View File

@ -29,17 +29,17 @@
#include "utilities/macros.hpp"
void G1BarrierSetRuntime::write_ref_array_pre_oop_entry(oop* dst, size_t length) {
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
bs->write_ref_array_pre(dst, length, false);
}
void G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry(narrowOop* dst, size_t length) {
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
bs->write_ref_array_pre(dst, length, false);
}
void G1BarrierSetRuntime::write_ref_array_post_entry(HeapWord* dst, size_t length) {
G1BarrierSet *bs = barrier_set_cast<G1BarrierSet>(BarrierSet::barrier_set());
G1BarrierSet *bs = G1BarrierSet::g1_barrier_set();
bs->G1BarrierSet::write_ref_array(dst, length);
}
@ -53,14 +53,6 @@ JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_pre_entry(oopDesc* orig, Jav
G1BarrierSet::satb_mark_queue_set().enqueue_known_active(queue, orig);
JRT_END
// G1 post write barrier slowpath
JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_post_entry(volatile G1CardTable::CardValue* card_addr,
JavaThread* thread))
assert(thread == JavaThread::current(), "pre-condition");
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
G1BarrierSet::dirty_card_queue_set().enqueue(queue, card_addr);
JRT_END
JRT_LEAF(void, G1BarrierSetRuntime::clone(oopDesc* src, oopDesc* dst, size_t size))
HeapAccess<>::clone(src, dst, size);
JRT_END

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -47,7 +47,6 @@ public:
// C2 slow-path runtime calls.
static void write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread);
static void write_ref_field_post_entry(volatile CardValue* card_addr, JavaThread* thread);
static address clone_addr();
};

View File

@ -28,18 +28,37 @@
#include "logging/log.hpp"
#include "runtime/os.hpp"
void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
CardValue *const first = byte_for(mr.start());
CardValue *const last = byte_after(mr.last());
void G1CardTable::verify_region(MemRegion mr, CardValue val, bool val_equals) {
if (mr.is_empty()) {
return;
}
CardValue* start = byte_for(mr.start());
CardValue* end = byte_for(mr.last());
memset_with_concurrent_readers(first, g1_young_gen, pointer_delta(last, first, sizeof(CardValue)));
}
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1HeapRegion* r = g1h->heap_region_containing(mr.start());
#ifndef PRODUCT
void G1CardTable::verify_g1_young_region(MemRegion mr) {
verify_region(mr, g1_young_gen, true);
assert(r == g1h->heap_region_containing(mr.last()), "MemRegion crosses region");
bool failures = false;
for (CardValue* curr = start; curr <= end; ++curr) {
CardValue curr_val = *curr;
bool failed = (val_equals) ? (curr_val != val) : (curr_val == val);
if (failed) {
if (!failures) {
log_error(gc, verify)("== CT verification failed: [" PTR_FORMAT "," PTR_FORMAT "] r: %d (%s) %sexpecting value: %d",
p2i(start), p2i(end), r->hrm_index(), r->get_short_type_str(),
(val_equals) ? "" : "not ", val);
failures = true;
}
log_error(gc, verify)("== card " PTR_FORMAT " [" PTR_FORMAT "," PTR_FORMAT "], val: %d",
p2i(curr), p2i(addr_for(curr)),
p2i((HeapWord*) (((size_t) addr_for(curr)) + _card_size)),
(int) curr_val);
}
}
guarantee(!failures, "there should not have been any failures");
}
#endif
void G1CardTableChangedListener::on_commit(uint start_idx, size_t num_regions, bool zero_filled) {
// Default value for a clean card on the card table is -1. So we cannot take advantage of the zero_filled parameter.
@ -74,6 +93,5 @@ void G1CardTable::initialize(G1RegionToSpaceMapper* mapper) {
}
bool G1CardTable::is_in_young(const void* p) const {
volatile CardValue* card = byte_for(p);
return *card == G1CardTable::g1_young_card_val();
return G1CollectedHeap::heap()->heap_region_containing(p)->is_young();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -52,8 +52,6 @@ class G1CardTable : public CardTable {
public:
enum G1CardValues {
g1_young_gen = CT_MR_BS_last_reserved << 1,
// During evacuation we use the card table to consolidate the cards we need to
// scan for roots onto the card table from the various sources. Further it is
// used to record already completely scanned cards to avoid re-scanning them
@ -63,18 +61,43 @@ public:
// The merge at the start of each evacuation round simply sets cards to dirty
// that are clean; scanned cards are set to 0x1.
//
// This means that the LSB determines what to do with the card during evacuation
// given the following possible values:
// This means that the LSB determines whether the card is clean or non-clean
// (LSB is 1 -> clean, LSB is 0 -> non-clean) given the following possible values:
//
// 11111111 - clean, do not scan
// 00000001 - already scanned, do not scan
// xxxxxxx1 - clean, already scanned, do not scan again (during GC only).
// 00000100 - dirty, needs to be scanned, dirty from remembered set (during GC only)
// 00000010 - dirty, needs to be scanned, contains reference to collection set.
// 00000000 - dirty, needs to be scanned.
//
g1_card_already_scanned = 0x1
// g1_to_cset_card and g1_from_remset_card are both used for optimization and
// needed for more accurate prediction of card generation rate.
//
// g1_to_cset_card allows to separate dirty card generation rate by the mutator
// (which just dirties cards) from cards that will be scanned during next garbage
// collection anyway.
// Further it allows the optimization to not refine them, assuming that their
// references to young gen does not change, and not add this card to any other
// remembered set.
// This color is sticky during mutator time: refinement threads encountering
// this card on the refinement table will just copy it over to the regular card
// table without re-refining this card. This saves on refinement effort spent
// on that card because most of the time already found interesting references
// stay interesting.
//
// g1_from_remset_card allows separation of cards generated by the mutator from
// cards in the remembered set, again to make mutator dirty card generation
// prediction more accurate.
//
// More accurate prediction allow better (less wasteful) refinement control.
g1_dirty_card = dirty_card,
g1_card_already_scanned = 0x1,
g1_to_cset_card = 0x2,
g1_from_remset_card = 0x4
};
static const size_t WordAllClean = SIZE_MAX;
static const size_t WordAllDirty = 0;
static const size_t WordAllFromRemset = (SIZE_MAX / 255) * g1_from_remset_card;
STATIC_ASSERT(BitsPerByte == 8);
static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned;
@ -83,27 +106,27 @@ public:
_listener.set_card_table(this);
}
static CardValue g1_young_card_val() { return g1_young_gen; }
static CardValue g1_scanned_card_val() { return g1_card_already_scanned; }
void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
void g1_mark_as_young(const MemRegion& mr);
void verify_region(MemRegion mr, CardValue val, bool val_equals) override;
size_t index_for_cardvalue(CardValue const* p) const {
return pointer_delta(p, _byte_map, sizeof(CardValue));
}
// Mark the given card as Dirty if it is Clean. Returns whether the card was
// Mark the given card as From Remset if it is Clean. Returns whether the card was
// Clean before this operation. This result may be inaccurate as it does not
// perform the dirtying atomically.
inline bool mark_clean_as_dirty(CardValue* card);
inline bool mark_clean_as_from_remset(CardValue* card);
// Change Clean cards in a (large) area on the card table as Dirty, preserving
// already scanned cards. Assumes that most cards in that area are Clean.
inline void mark_range_dirty(size_t start_card_index, size_t num_cards);
// Change Clean cards in a (large) area on the card table as From_Remset, preserving
// cards already marked otherwise. Assumes that most cards in that area are Clean.
// Not atomic.
inline size_t mark_clean_range_as_from_remset(size_t start_card_index, size_t num_cards);
// Change the given range of dirty cards to "which". All of these cards must be Dirty.
inline void change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which);
// Change the given range of dirty cards to "which". All of these cards must be non-clean.
// Returns the number of pending cards found.
inline size_t change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which);
inline uint region_idx_for(CardValue* p);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -28,25 +28,39 @@
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "utilities/population_count.hpp"
inline uint G1CardTable::region_idx_for(CardValue* p) {
size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue));
return (uint)(card_idx >> G1HeapRegion::LogCardsPerRegion);
}
inline bool G1CardTable::mark_clean_as_dirty(CardValue* card) {
inline bool G1CardTable::mark_clean_as_from_remset(CardValue* card) {
CardValue value = *card;
if (value == clean_card_val()) {
*card = dirty_card_val();
*card = g1_from_remset_card;
return true;
}
return false;
}
inline void G1CardTable::mark_range_dirty(size_t start_card_index, size_t num_cards) {
// Returns bits from a where mask is 0, and bits from b where mask is 1.
//
// Example:
// a = 0xAAAAAAAA
// b = 0xBBBBBBBB
// mask = 0xFF00FF00
// result = 0xBBAABBAA
inline size_t blend(size_t a, size_t b, size_t mask) {
return (a & ~mask) | (b & mask);
}
inline size_t G1CardTable::mark_clean_range_as_from_remset(size_t start_card_index, size_t num_cards) {
assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
size_t result = 0;
size_t const num_chunks = num_cards / sizeof(size_t);
size_t* cur_word = (size_t*)&_byte_map[start_card_index];
@ -54,31 +68,33 @@ inline void G1CardTable::mark_range_dirty(size_t start_card_index, size_t num_ca
while (cur_word < end_word_map) {
size_t value = *cur_word;
if (value == WordAllClean) {
*cur_word = WordAllDirty;
} else if (value == WordAllDirty) {
// do nothing.
*cur_word = WordAllFromRemset;
result += sizeof(size_t);
} else if ((value & WordAlreadyScanned) == 0) {
// Do nothing if there is no "Clean" card in it.
} else {
// There is a mix of cards in there. Tread slowly.
CardValue* cur = (CardValue*)cur_word;
for (size_t i = 0; i < sizeof(size_t); i++) {
CardValue value = *cur;
if (value == clean_card_val()) {
*cur = dirty_card_val();
}
cur++;
}
// There is a mix of cards in there. Tread "slowly".
size_t clean_card_mask = (value & WordAlreadyScanned) * 0xff; // All "Clean" cards have 0xff, all other places 0x00 now.
result += population_count(clean_card_mask) / BitsPerByte;
*cur_word = blend(value, WordAllFromRemset, clean_card_mask);
}
cur_word++;
}
return result;
}
inline void G1CardTable::change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which) {
inline size_t G1CardTable::change_dirty_cards_to(CardValue* start_card, CardValue* end_card, CardValue which) {
size_t result = 0;
for (CardValue* i_card = start_card; i_card < end_card; ++i_card) {
CardValue value = *i_card;
assert(value == dirty_card_val(),
assert((value & g1_card_already_scanned) == 0,
"Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start_card), p2i(end_card));
if (value == g1_dirty_card) {
result++;
}
*i_card = which;
}
return result;
}
#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */

View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "gc/shared/workerThread.hpp"
#include "memory/allocation.hpp"
#include "utilities/checkedCast.hpp"
#include "utilities/powerOfTwo.hpp"
G1CardTableClaimTable::G1CardTableClaimTable(uint chunks_per_region) :
_max_reserved_regions(0),
_card_claims(nullptr),
_cards_per_chunk(checked_cast<uint>(G1HeapRegion::CardsPerRegion / chunks_per_region))
{
guarantee(chunks_per_region > 0, "%u chunks per region", chunks_per_region);
}
G1CardTableClaimTable::~G1CardTableClaimTable() {
FREE_C_HEAP_ARRAY(uint, _card_claims);
}
void G1CardTableClaimTable::initialize(uint max_reserved_regions) {
assert(_card_claims == nullptr, "Must not be initialized twice");
_card_claims = NEW_C_HEAP_ARRAY(uint, max_reserved_regions, mtGC);
_max_reserved_regions = max_reserved_regions;
reset_all_to_unclaimed();
}
void G1CardTableClaimTable::reset_all_to_unclaimed() {
for (uint i = 0; i < _max_reserved_regions; i++) {
_card_claims[i] = 0;
}
}
void G1CardTableClaimTable::reset_all_to_claimed() {
for (uint i = 0; i < _max_reserved_regions; i++) {
_card_claims[i] = (uint)G1HeapRegion::CardsPerRegion;
}
}
void G1CardTableClaimTable::heap_region_iterate_from_worker_offset(G1HeapRegionClosure* cl, uint worker_id, uint max_workers) {
// Every worker will actually look at all regions, skipping over regions that
// are completed.
const size_t n_regions = _max_reserved_regions;
const uint start_index = (uint)(worker_id * n_regions / max_workers);
for (uint count = 0; count < n_regions; count++) {
const uint index = (start_index + count) % n_regions;
assert(index < n_regions, "sanity");
// Skip over fully processed regions
if (!has_unclaimed_cards(index)) {
continue;
}
G1HeapRegion* r = G1CollectedHeap::heap()->region_at(index);
bool res = cl->do_heap_region(r);
if (res) {
return;
}
}
}
G1CardTableChunkClaimer::G1CardTableChunkClaimer(G1CardTableClaimTable* scan_state, uint region_idx) :
_claim_values(scan_state),
_region_idx(region_idx),
_cur_claim(0) {
guarantee(size() <= G1HeapRegion::CardsPerRegion, "Should not claim more space than possible.");
}
G1ChunkScanner::G1ChunkScanner(CardValue* const start_card, CardValue* const end_card) :
_start_card(start_card),
_end_card(end_card) {
assert(is_word_aligned(start_card), "precondition");
assert(is_word_aligned(end_card), "precondition");
}

View File

@ -0,0 +1,137 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
#define SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP
#include "gc/g1/g1CardTable.hpp"
#include "memory/allocation.hpp"
class G1HeapRegionClosure;
// Helper class representing claim values for the cards in the card table corresponding
// to a region.
// I.e. for every region this class stores an atomic counter that represents the
// number of cards from 0 to the number of cards per region already claimed for
// this region.
// If the claimed value is >= the number of cards of a region, the region can be
// considered fully claimed.
//
// Claiming works on full region (all cards in region) or a range of contiguous cards
// (chunk). Chunk size is given at construction time.
class G1CardTableClaimTable : public CHeapObj<mtGC> {
uint _max_reserved_regions;
// Card table iteration claim values for every heap region, from 0 (completely unclaimed)
// to (>=) G1HeapRegion::CardsPerRegion (completely claimed).
uint volatile* _card_claims;
uint _cards_per_chunk; // For conversion between card index and chunk index.
// Claim increment number of cards, returning the previous claim value.
inline uint claim_cards(uint region, uint increment);
public:
G1CardTableClaimTable(uint chunks_per_region);
~G1CardTableClaimTable();
// Allocates the data structure and initializes the claims to unclaimed.
void initialize(uint max_reserved_regions);
void reset_all_to_unclaimed();
void reset_all_to_claimed();
inline bool has_unclaimed_cards(uint region);
inline void reset_to_unclaimed(uint region);
// Claims all cards in that region, returning the previous claim value.
inline uint claim_all_cards(uint region);
// Claim a single chunk in that region, returning the previous claim value.
inline uint claim_chunk(uint region);
inline uint cards_per_chunk() const;
size_t max_reserved_regions() { return _max_reserved_regions; }
void heap_region_iterate_from_worker_offset(G1HeapRegionClosure* cl, uint worker_id, uint max_workers);
};
// Helper class to claim dirty chunks within the card table for a given region.
class G1CardTableChunkClaimer {
G1CardTableClaimTable* _claim_values;
uint _region_idx;
uint _cur_claim;
public:
G1CardTableChunkClaimer(G1CardTableClaimTable* claim_table, uint region_idx);
inline bool has_next();
inline uint value() const;
inline uint size() const;
};
// Helper class to locate consecutive dirty cards inside a range of cards.
class G1ChunkScanner {
using Word = size_t;
using CardValue = G1CardTable::CardValue;
CardValue* const _start_card;
CardValue* const _end_card;
static const size_t ExpandedToScanMask = G1CardTable::WordAlreadyScanned;
static const size_t ToScanMask = G1CardTable::g1_card_already_scanned;
inline bool is_card_dirty(const CardValue* const card) const;
inline bool is_word_aligned(const void* const addr) const;
inline CardValue* find_first_dirty_card(CardValue* i_card) const;
inline CardValue* find_first_non_dirty_card(CardValue* i_card) const;
public:
G1ChunkScanner(CardValue* const start_card, CardValue* const end_card);
template<typename Func>
void on_dirty_cards(Func&& f) {
for (CardValue* cur_card = _start_card; cur_card < _end_card; /* empty */) {
CardValue* dirty_l = find_first_dirty_card(cur_card);
CardValue* dirty_r = find_first_non_dirty_card(dirty_l);
assert(dirty_l <= dirty_r, "inv");
if (dirty_l == dirty_r) {
assert(dirty_r == _end_card, "finished the entire chunk");
return;
}
f(dirty_l, dirty_r);
cur_card = dirty_r + 1;
}
}
};
#endif // SHARE_GC_G1_G1CARDTABLECLAIMTABLE_HPP

View File

@ -0,0 +1,128 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
#define SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP
#include "gc/g1/g1CardTableClaimTable.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "runtime/atomicAccess.hpp"
bool G1CardTableClaimTable::has_unclaimed_cards(uint region) {
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
return AtomicAccess::load(&_card_claims[region]) < G1HeapRegion::CardsPerRegion;
}
void G1CardTableClaimTable::reset_to_unclaimed(uint region) {
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
AtomicAccess::store(&_card_claims[region], 0u);
}
uint G1CardTableClaimTable::claim_cards(uint region, uint increment) {
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
return AtomicAccess::fetch_then_add(&_card_claims[region], increment, memory_order_relaxed);
}
uint G1CardTableClaimTable::claim_chunk(uint region) {
assert(region < _max_reserved_regions, "Tried to access invalid region %u", region);
return AtomicAccess::fetch_then_add(&_card_claims[region], cards_per_chunk(), memory_order_relaxed);
}
uint G1CardTableClaimTable::claim_all_cards(uint region) {
return claim_cards(region, (uint)G1HeapRegion::CardsPerRegion);
}
uint G1CardTableClaimTable::cards_per_chunk() const { return _cards_per_chunk; }
bool G1CardTableChunkClaimer::has_next() {
_cur_claim = _claim_values->claim_chunk(_region_idx);
return (_cur_claim < G1HeapRegion::CardsPerRegion);
}
uint G1CardTableChunkClaimer::value() const { return _cur_claim; }
uint G1CardTableChunkClaimer::size() const { return _claim_values->cards_per_chunk(); }
bool G1ChunkScanner::is_card_dirty(const CardValue* const card) const {
return (*card & ToScanMask) == 0;
}
bool G1ChunkScanner::is_word_aligned(const void* const addr) const {
return ((uintptr_t)addr) % sizeof(Word) == 0;
}
G1CardTable::CardValue* G1ChunkScanner::find_first_dirty_card(CardValue* i_card) const {
while (!is_word_aligned(i_card)) {
if (is_card_dirty(i_card)) {
return i_card;
}
i_card++;
}
for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
Word word_value = *reinterpret_cast<Word*>(i_card);
bool has_dirty_cards_in_word = (~word_value & ExpandedToScanMask) != 0;
if (has_dirty_cards_in_word) {
for (uint i = 0; i < sizeof(Word); ++i) {
if (is_card_dirty(i_card)) {
return i_card;
}
i_card++;
}
ShouldNotReachHere();
}
}
return _end_card;
}
G1CardTable::CardValue* G1ChunkScanner::find_first_non_dirty_card(CardValue* i_card) const {
while (!is_word_aligned(i_card)) {
if (!is_card_dirty(i_card)) {
return i_card;
}
i_card++;
}
for (/* empty */; i_card < _end_card; i_card += sizeof(Word)) {
Word word_value = *reinterpret_cast<Word*>(i_card);
bool all_cards_dirty = (word_value & ExpandedToScanMask) == 0;
if (!all_cards_dirty) {
for (uint i = 0; i < sizeof(Word); ++i) {
if (!is_card_dirty(i_card)) {
return i_card;
}
i_card++;
}
ShouldNotReachHere();
}
}
return _end_card;
}
#endif // SHARE_GC_G1_G1CARDTABLECLAIMTABLE_INLINE_HPP

View File

@ -38,7 +38,6 @@
#include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1EvacStats.inline.hpp"
#include "gc/g1/g1FullCollector.hpp"
#include "gc/g1/g1GCCounters.hpp"
@ -60,10 +59,10 @@
#include "gc/g1/g1ParScanThreadState.inline.hpp"
#include "gc/g1/g1PeriodicGCTask.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/g1/g1RegionPinCache.inline.hpp"
#include "gc/g1/g1RegionToSpaceMapper.hpp"
#include "gc/g1/g1RemSet.hpp"
#include "gc/g1/g1ReviseYoungLengthTask.hpp"
#include "gc/g1/g1RootClosures.hpp"
#include "gc/g1/g1RootProcessor.hpp"
#include "gc/g1/g1SATBMarkQueueSet.hpp"
@ -111,6 +110,7 @@
#include "runtime/init.hpp"
#include "runtime/java.hpp"
#include "runtime/orderAccess.hpp"
#include "runtime/threads.hpp"
#include "runtime/threadSMR.hpp"
#include "runtime/vmThread.hpp"
#include "utilities/align.hpp"
@ -146,7 +146,7 @@ void G1CollectedHeap::run_batch_task(G1BatchedTask* cl) {
workers()->run_task(cl, num_workers);
}
uint G1CollectedHeap::get_chunks_per_region() {
uint G1CollectedHeap::get_chunks_per_region_for_scan() {
uint log_region_size = G1HeapRegion::LogOfHRGrainBytes;
// Limit the expected input values to current known possible values of the
// (log) region size. Adjust as necessary after testing if changing the permissible
@ -156,6 +156,18 @@ uint G1CollectedHeap::get_chunks_per_region() {
return 1u << (log_region_size / 2 - 4);
}
uint G1CollectedHeap::get_chunks_per_region_for_merge() {
uint log_region_size = G1HeapRegion::LogOfHRGrainBytes;
// Limit the expected input values to current known possible values of the
// (log) region size. Adjust as necessary after testing if changing the permissible
// values for region size.
assert(log_region_size >= 20 && log_region_size <= 29,
"expected value in [20,29], but got %u", log_region_size);
uint half_log_region_size = (log_region_size + 1) / 2;
return 1 << (half_log_region_size - 9);
}
G1HeapRegion* G1CollectedHeap::new_heap_region(uint hrs_index,
MemRegion mr) {
return new G1HeapRegion(hrs_index, bot(), mr, &_card_set_config);
@ -614,7 +626,6 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t min_word_size,
assert_heap_not_locked();
if (result != nullptr) {
assert(*actual_word_size != 0, "Actual size must have been set here");
dirty_young_block(result, *actual_word_size);
} else {
*actual_word_size = 0;
}
@ -809,11 +820,27 @@ void G1CollectedHeap::prepare_for_mutator_after_full_collection(size_t allocatio
}
void G1CollectedHeap::abort_refinement() {
// Discard all remembered set updates and reset refinement statistics.
G1BarrierSet::dirty_card_queue_set().abandon_logs_and_stats();
assert(G1BarrierSet::dirty_card_queue_set().num_cards() == 0,
"DCQS should be empty");
concurrent_refine()->get_and_reset_refinement_stats();
G1ConcurrentRefineSweepState& sweep_state = concurrent_refine()->sweep_state();
if (sweep_state.is_in_progress()) {
if (!sweep_state.are_java_threads_synched()) {
// Synchronize Java threads with global card table that has already been swapped.
class SwapThreadCardTableClosure : public ThreadClosure {
public:
virtual void do_thread(Thread* t) {
G1BarrierSet* bs = G1BarrierSet::g1_barrier_set();
bs->update_card_table_base(t);
}
} cl;
Threads::java_threads_do(&cl);
}
// Record any available refinement statistics.
policy()->record_refinement_stats(sweep_state.stats());
sweep_state.complete_work(false /* concurrent */, false /* print_log */);
}
sweep_state.reset_stats();
}
void G1CollectedHeap::verify_after_full_collection() {
@ -825,6 +852,7 @@ void G1CollectedHeap::verify_after_full_collection() {
}
_hrm.verify_optional();
_verifier->verify_region_sets_optional();
_verifier->verify_card_tables_clean(true /* both_card_tables */);
_verifier->verify_after_gc();
_verifier->verify_bitmap_clear(false /* above_tams_only */);
@ -1168,8 +1196,13 @@ G1CollectedHeap::G1CollectedHeap() :
_service_thread(nullptr),
_periodic_gc_task(nullptr),
_free_arena_memory_task(nullptr),
_revise_young_length_task(nullptr),
_workers(nullptr),
_card_table(nullptr),
_refinement_epoch(0),
_last_synchronized_start(0),
_last_refinement_epoch_start(0),
_yield_duration_in_refinement_epoch(0),
_last_safepoint_refinement_epoch(0),
_collection_pause_end(Ticks::now()),
_old_set("Old Region Set", new OldRegionSetChecker()),
_humongous_set("Humongous Region Set", new HumongousRegionSetChecker()),
@ -1289,7 +1322,7 @@ G1RegionToSpaceMapper* G1CollectedHeap::create_aux_memory_mapper(const char* des
jint G1CollectedHeap::initialize_concurrent_refinement() {
jint ecode = JNI_OK;
_cr = G1ConcurrentRefine::create(policy(), &ecode);
_cr = G1ConcurrentRefine::create(this, &ecode);
return ecode;
}
@ -1345,18 +1378,12 @@ jint G1CollectedHeap::initialize() {
initialize_reserved_region(heap_rs);
// Create the barrier set for the entire reserved region.
G1CardTable* ct = new G1CardTable(_reserved);
G1BarrierSet* bs = new G1BarrierSet(ct);
G1CardTable* card_table = new G1CardTable(_reserved);
G1CardTable* refinement_table = new G1CardTable(_reserved);
G1BarrierSet* bs = new G1BarrierSet(card_table, refinement_table);
bs->initialize();
assert(bs->is_a(BarrierSet::G1BarrierSet), "sanity");
BarrierSet::set_barrier_set(bs);
_card_table = ct;
{
G1SATBMarkQueueSet& satbqs = bs->satb_mark_queue_set();
satbqs.set_process_completed_buffers_threshold(G1SATBProcessCompletedThreshold);
satbqs.set_buffer_enqueue_threshold_percentage(G1SATBBufferEnqueueingThresholdPercent);
}
// Create space mappers.
size_t page_size = heap_rs.page_size();
@ -1391,12 +1418,26 @@ jint G1CollectedHeap::initialize() {
G1CardTable::compute_size(heap_rs.size() / HeapWordSize),
G1CardTable::heap_map_factor());
G1RegionToSpaceMapper* refinement_cards_storage =
create_aux_memory_mapper("Refinement Card Table",
G1CardTable::compute_size(heap_rs.size() / HeapWordSize),
G1CardTable::heap_map_factor());
size_t bitmap_size = G1CMBitMap::compute_size(heap_rs.size());
G1RegionToSpaceMapper* bitmap_storage =
create_aux_memory_mapper("Mark Bitmap", bitmap_size, G1CMBitMap::heap_map_factor());
_hrm.initialize(heap_storage, bitmap_storage, bot_storage, cardtable_storage);
_card_table->initialize(cardtable_storage);
_hrm.initialize(heap_storage, bitmap_storage, bot_storage, cardtable_storage, refinement_cards_storage);
card_table->initialize(cardtable_storage);
refinement_table->initialize(refinement_cards_storage);
BarrierSet::set_barrier_set(bs);
{
G1SATBMarkQueueSet& satbqs = bs->satb_mark_queue_set();
satbqs.set_process_completed_buffers_threshold(G1SATBProcessCompletedThreshold);
satbqs.set_buffer_enqueue_threshold_percentage(G1SATBBufferEnqueueingThresholdPercent);
}
// 6843694 - ensure that the maximum region index can fit
// in the remembered set structures.
@ -1408,7 +1449,7 @@ jint G1CollectedHeap::initialize() {
guarantee((uintptr_t)(heap_rs.base()) >= G1CardTable::card_size(), "Java heap must not start within the first card.");
G1FromCardCache::initialize(max_num_regions());
// Also create a G1 rem set.
_rem_set = new G1RemSet(this, _card_table);
_rem_set = new G1RemSet(this);
_rem_set->initialize(max_num_regions());
size_t max_cards_per_region = ((size_t)1 << (sizeof(CardIdx_t)*BitsPerByte-1)) - 1;
@ -1467,6 +1508,11 @@ jint G1CollectedHeap::initialize() {
_free_arena_memory_task = new G1MonotonicArenaFreeMemoryTask("Card Set Free Memory Task");
_service_thread->register_task(_free_arena_memory_task);
if (policy()->use_adaptive_young_list_length()) {
_revise_young_length_task = new G1ReviseYoungLengthTask("Revise Young Length List Task");
_service_thread->register_task(_revise_young_length_task);
}
// Here we allocate the dummy G1HeapRegion that is required by the
// G1AllocRegion class.
G1HeapRegion* dummy_region = _hrm.get_dummy_region();
@ -1495,6 +1541,7 @@ jint G1CollectedHeap::initialize() {
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_parallel_workers);
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_mark);
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_refine);
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_conc_refine_control);
CPUTimeCounters::create_counter(CPUTimeGroups::CPUTimeType::gc_service);
G1InitLogger::print();
@ -1519,12 +1566,35 @@ void G1CollectedHeap::stop() {
void G1CollectedHeap::safepoint_synchronize_begin() {
SuspendibleThreadSet::synchronize();
_last_synchronized_start = os::elapsed_counter();
}
void G1CollectedHeap::safepoint_synchronize_end() {
jlong now = os::elapsed_counter();
jlong synchronize_duration = now - _last_synchronized_start;
if (_last_safepoint_refinement_epoch == _refinement_epoch) {
_yield_duration_in_refinement_epoch += synchronize_duration;
} else {
_last_refinement_epoch_start = now;
_last_safepoint_refinement_epoch = _refinement_epoch;
_yield_duration_in_refinement_epoch = 0;
}
SuspendibleThreadSet::desynchronize();
}
void G1CollectedHeap::set_last_refinement_epoch_start(jlong epoch_start, jlong last_yield_duration) {
_last_refinement_epoch_start = epoch_start;
guarantee(_yield_duration_in_refinement_epoch >= last_yield_duration, "should be");
_yield_duration_in_refinement_epoch -= last_yield_duration;
}
jlong G1CollectedHeap::yield_duration_in_refinement_epoch() {
return _yield_duration_in_refinement_epoch;
}
void G1CollectedHeap::post_initialize() {
CollectedHeap::post_initialize();
ref_processing_init();
@ -2336,6 +2406,7 @@ void G1CollectedHeap::gc_epilogue(bool full) {
&_collection_set_candidates_card_set_stats);
update_perf_counter_cpu_time();
_refinement_epoch++;
}
uint G1CollectedHeap::uncommit_regions(uint region_limit) {
@ -2468,7 +2539,6 @@ void G1CollectedHeap::verify_before_young_collection(G1HeapVerifier::G1VerifyTyp
Ticks start = Ticks::now();
_verifier->prepare_for_verify();
_verifier->verify_region_sets_optional();
_verifier->verify_dirty_young_regions();
_verifier->verify_before_gc();
verify_numa_regions("GC Start");
phase_times()->record_verify_before_time_ms((Ticks::now() - start).seconds() * MILLIUNITS);
@ -2734,6 +2804,11 @@ void G1CollectedHeap::free_region(G1HeapRegion* hr, G1FreeRegionList* free_list)
if (free_list != nullptr) {
free_list->add_ordered(hr);
}
if (VerifyDuringGC) {
// Card and refinement table must be clear for freed regions.
card_table()->verify_region(MemRegion(hr->bottom(), hr->end()), G1CardTable::clean_card_val(), true);
refinement_table()->verify_region(MemRegion(hr->bottom(), hr->end()), G1CardTable::clean_card_val(), true);
}
}
void G1CollectedHeap::retain_region(G1HeapRegion* hr) {

View File

@ -75,6 +75,7 @@ class G1GCPhaseTimes;
class G1HeapSizingPolicy;
class G1NewTracer;
class G1RemSet;
class G1ReviseYoungLengthTask;
class G1ServiceTask;
class G1ServiceThread;
class GCMemoryManager;
@ -171,9 +172,23 @@ private:
G1ServiceThread* _service_thread;
G1ServiceTask* _periodic_gc_task;
G1MonotonicArenaFreeMemoryTask* _free_arena_memory_task;
G1ReviseYoungLengthTask* _revise_young_length_task;
WorkerThreads* _workers;
G1CardTable* _card_table;
// The current epoch for refinement, i.e. the number of times the card tables
// have been swapped by a garbage collection.
// Used for detecting whether concurrent refinement has been interrupted by a
// garbage collection.
size_t _refinement_epoch;
// The following members are for tracking safepoint durations between garbage
// collections.
jlong _last_synchronized_start;
jlong _last_refinement_epoch_start;
jlong _yield_duration_in_refinement_epoch; // Time spent in safepoints since beginning of last refinement epoch.
size_t _last_safepoint_refinement_epoch; // Refinement epoch before last safepoint.
Ticks _collection_pause_end;
@ -541,12 +556,17 @@ public:
void run_batch_task(G1BatchedTask* cl);
// Return "optimal" number of chunks per region we want to use for claiming areas
// within a region to claim.
// within a region to claim during card table scanning.
// The returned value is a trade-off between granularity of work distribution and
// memory usage and maintenance costs of that table.
// Testing showed that 64 for 1M/2M region, 128 for 4M/8M regions, 256 for 16/32M regions,
// and so on seems to be such a good trade-off.
static uint get_chunks_per_region();
static uint get_chunks_per_region_for_scan();
// Return "optimal" number of chunks per region we want to use for claiming areas
// within a region to claim during card table merging.
// This is much smaller than for scanning as the merge work is much smaller.
// Currently 1 for 1M regions, 2 for 2/4M regions, 4 for 8/16M regions and so on.
static uint get_chunks_per_region_for_merge();
G1Allocator* allocator() {
return _allocator;
@ -687,11 +707,6 @@ public:
// Add the given region to the retained regions collection set candidates.
void retain_region(G1HeapRegion* hr);
// It dirties the cards that cover the block so that the post
// write barrier never queues anything when updating objects on this
// block. It is assumed (and in fact we assert) that the block
// belongs to a young region.
inline void dirty_young_block(HeapWord* start, size_t word_size);
// Frees a humongous region by collapsing it into individual regions
// and calling free_region() for each of them. The freed regions
@ -905,6 +920,10 @@ public:
void safepoint_synchronize_begin() override;
void safepoint_synchronize_end() override;
jlong last_refinement_epoch_start() const { return _last_refinement_epoch_start; }
void set_last_refinement_epoch_start(jlong epoch_start, jlong last_yield_duration);
jlong yield_duration_in_refinement_epoch();
// Does operations required after initialization has been done.
void post_initialize() override;
@ -1069,7 +1088,16 @@ public:
}
G1CardTable* card_table() const {
return _card_table;
return static_cast<G1CardTable*>(G1BarrierSet::g1_barrier_set()->card_table());
}
G1CardTable* refinement_table() const {
return G1BarrierSet::g1_barrier_set()->refinement_table();
}
G1CardTable::CardValue* card_table_base() const {
assert(card_table() != nullptr, "must be");
return card_table()->byte_map_base();
}
// Iteration functions.

View File

@ -149,30 +149,6 @@ inline void G1CollectedHeap::old_set_remove(G1HeapRegion* hr) {
_old_set.remove(hr);
}
// It dirties the cards that cover the block so that the post
// write barrier never queues anything when updating objects on this
// block. It is assumed (and in fact we assert) that the block
// belongs to a young region.
inline void
G1CollectedHeap::dirty_young_block(HeapWord* start, size_t word_size) {
assert_heap_not_locked();
// Assign the containing region to containing_hr so that we don't
// have to keep calling heap_region_containing() in the
// asserts below.
DEBUG_ONLY(G1HeapRegion* containing_hr = heap_region_containing(start);)
assert(word_size > 0, "pre-condition");
assert(containing_hr->is_in(start), "it should contain start");
assert(containing_hr->is_young(), "it should be young");
assert(!containing_hr->is_humongous(), "it should not be humongous");
HeapWord* end = start + word_size;
assert(containing_hr->is_in(end - 1), "it should also contain end - 1");
MemRegion mr(start, end);
card_table()->g1_mark_as_young(mr);
}
inline G1ScannerTasksQueueSet* G1CollectedHeap::task_queues() const {
return _task_queues;
}

View File

@ -308,7 +308,8 @@ double G1CollectionSet::finalize_young_part(double target_pause_time_ms, G1Survi
guarantee(target_pause_time_ms > 0.0,
"target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
size_t pending_cards = _policy->pending_cards_at_gc_start();
bool in_young_only_phase = _policy->collector_state()->in_young_only_phase();
size_t pending_cards = _policy->analytics()->predict_pending_cards(in_young_only_phase);
log_trace(gc, ergo, cset)("Start choosing CSet. Pending cards: %zu target pause time: %1.2fms",
pending_cards, target_pause_time_ms);
@ -323,10 +324,8 @@ double G1CollectionSet::finalize_young_part(double target_pause_time_ms, G1Survi
verify_young_cset_indices();
size_t num_young_cards = _g1h->young_regions_cardset()->occupied();
_policy->record_card_rs_length(num_young_cards);
double predicted_base_time_ms = _policy->predict_base_time_ms(pending_cards, num_young_cards);
size_t card_rs_length = _policy->analytics()->predict_card_rs_length(in_young_only_phase);
double predicted_base_time_ms = _policy->predict_base_time_ms(pending_cards, card_rs_length);
// Base time already includes the whole remembered set related time, so do not add that here
// again.
double predicted_eden_time = _policy->predict_young_region_other_time_ms(eden_region_length) +

View File

@ -27,6 +27,7 @@
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1BatchedTask.hpp"
#include "gc/g1/g1CardSetMemory.hpp"
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1CollectionSetChooser.hpp"
#include "gc/g1/g1CollectorState.hpp"
@ -34,7 +35,7 @@
#include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
#include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
#include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "gc/g1/g1HeapRegionManager.hpp"
#include "gc/g1/g1HeapRegionPrinter.hpp"
@ -483,7 +484,7 @@ G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
// _finger set in set_non_marking_state
_worker_id_offset(G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads),
_worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
_max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
// _num_active_tasks set in set_non_marking_state()
// _tasks set inside the constructor
@ -1141,7 +1142,7 @@ void G1ConcurrentMark::mark_from_roots() {
// worker threads may currently exist and more may not be
// available.
active_workers = _concurrent_workers->set_active_workers(active_workers);
log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->max_workers());
log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
_num_concurrent_workers = active_workers;

View File

@ -580,6 +580,8 @@ public:
// TARS for the given region during remembered set rebuilding.
inline HeapWord* top_at_rebuild_start(G1HeapRegion* r) const;
uint worker_id_offset() const { return _worker_id_offset; }
// Clear statistics gathered during the concurrent cycle for the given region after
// it has been reclaimed.
void clear_statistics(G1HeapRegion* r);

View File

@ -25,6 +25,7 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentMark.inline.hpp"
#include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "gc/g1/g1HeapRegionPrinter.hpp"
#include "gc/g1/g1RemSetTrackingPolicy.hpp"
@ -54,15 +55,16 @@ struct G1UpdateRegionLivenessAndSelectForRebuildTask::G1OnRegionClosure : public
_num_humongous_regions_removed(0),
_local_cleanup_list(local_cleanup_list) {}
void reclaim_empty_region(G1HeapRegion* hr) {
void reclaim_empty_region_common(G1HeapRegion* hr) {
assert(!hr->has_pinned_objects(), "precondition");
assert(hr->used() > 0, "precondition");
_freed_bytes += hr->used();
hr->set_containing_set(nullptr);
hr->clear_cardtable();
hr->clear_both_card_tables();
_cm->clear_statistics(hr);
G1HeapRegionPrinter::mark_reclaim(hr);
_g1h->concurrent_refine()->notify_region_reclaimed(hr);
}
void reclaim_empty_humongous_region(G1HeapRegion* hr) {
@ -71,8 +73,8 @@ struct G1UpdateRegionLivenessAndSelectForRebuildTask::G1OnRegionClosure : public
auto on_humongous_region = [&] (G1HeapRegion* hr) {
assert(hr->is_humongous(), "precondition");
reclaim_empty_region(hr);
_num_humongous_regions_removed++;
reclaim_empty_region_common(hr);
_g1h->free_humongous_region(hr, _local_cleanup_list);
};
@ -82,8 +84,8 @@ struct G1UpdateRegionLivenessAndSelectForRebuildTask::G1OnRegionClosure : public
void reclaim_empty_old_region(G1HeapRegion* hr) {
assert(hr->is_old(), "precondition");
reclaim_empty_region(hr);
_num_old_regions_removed++;
reclaim_empty_region_common(hr);
_g1h->free_region(hr, _local_cleanup_list);
}

View File

@ -245,7 +245,7 @@ class G1RebuildRSAndScrubTask : public WorkerTask {
G1RebuildRSAndScrubRegionClosure(G1ConcurrentMark* cm, bool should_rebuild_remset, uint worker_id) :
_cm(cm),
_bitmap(_cm->mark_bitmap()),
_rebuild_closure(G1CollectedHeap::heap(), worker_id),
_rebuild_closure(G1CollectedHeap::heap(), worker_id + cm->worker_id_offset()),
_should_rebuild_remset(should_rebuild_remset),
_processed_words(0) { }

View File

@ -22,15 +22,20 @@
*
*/
#include "gc/g1/g1Analytics.hpp"
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1CollectionSet.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.inline.hpp"
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/shared/gc_globals.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/workerThread.hpp"
#include "logging/log.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/iterator.hpp"
@ -38,17 +43,15 @@
#include "runtime/mutexLocker.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/ticks.hpp"
#include <math.h>
G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) {
G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread() {
G1ConcurrentRefineThread* result = nullptr;
if (initializing || !InjectGCWorkerCreationFailure) {
result = G1ConcurrentRefineThread::create(_cr, worker_id);
}
result = G1ConcurrentRefineThread::create(_cr);
if (result == nullptr || result->osthread() == nullptr) {
log_warning(gc)("Failed to create refinement thread %u, no more %s",
worker_id,
log_warning(gc)("Failed to create refinement control thread, no more %s",
result == nullptr ? "memory" : "OS threads");
if (result != nullptr) {
delete result;
@ -60,106 +63,392 @@ G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thr
G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl(uint max_num_threads) :
_cr(nullptr),
_threads(max_num_threads)
_control_thread(nullptr),
_workers(nullptr),
_max_num_threads(max_num_threads)
{}
G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() {
while (_threads.is_nonempty()) {
delete _threads.pop();
}
}
bool G1ConcurrentRefineThreadControl::ensure_threads_created(uint worker_id, bool initializing) {
assert(worker_id < max_num_threads(), "precondition");
while ((uint)_threads.length() <= worker_id) {
G1ConcurrentRefineThread* rt = create_refinement_thread(_threads.length(), initializing);
if (rt == nullptr) {
return false;
}
_threads.push(rt);
}
return true;
delete _control_thread;
delete _workers;
}
jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr) {
assert(cr != nullptr, "G1ConcurrentRefine must not be null");
_cr = cr;
if (max_num_threads() > 0) {
_threads.push(create_refinement_thread(0, true));
if (_threads.at(0) == nullptr) {
vm_shutdown_during_initialization("Could not allocate primary refinement thread");
if (is_refinement_enabled()) {
_control_thread = create_refinement_thread();
if (_control_thread == nullptr) {
vm_shutdown_during_initialization("Could not allocate refinement control thread");
return JNI_ENOMEM;
}
if (!UseDynamicNumberOfGCThreads) {
if (!ensure_threads_created(max_num_threads() - 1, true)) {
vm_shutdown_during_initialization("Could not allocate refinement threads");
return JNI_ENOMEM;
}
}
_workers = new WorkerThreads("G1 Refinement Workers", max_num_threads());
_workers->initialize_workers();
}
return JNI_OK;
}
#ifdef ASSERT
void G1ConcurrentRefineThreadControl::assert_current_thread_is_primary_refinement_thread() const {
assert(Thread::current() == _threads.at(0), "Not primary thread");
void G1ConcurrentRefineThreadControl::assert_current_thread_is_control_refinement_thread() const {
assert(Thread::current() == _control_thread, "Not refinement control thread");
}
#endif // ASSERT
bool G1ConcurrentRefineThreadControl::activate(uint worker_id) {
if (ensure_threads_created(worker_id, false)) {
_threads.at(worker_id)->activate();
return true;
}
void G1ConcurrentRefineThreadControl::activate() {
_control_thread->activate();
}
return false;
void G1ConcurrentRefineThreadControl::run_task(WorkerTask* task, uint num_workers) {
assert(num_workers >= 1, "must be");
WithActiveWorkers w(_workers, num_workers);
_workers->run_task(task);
}
void G1ConcurrentRefineThreadControl::control_thread_do(ThreadClosure* tc) {
if (is_refinement_enabled()) {
tc->do_thread(_control_thread);
}
}
void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) {
for (G1ConcurrentRefineThread* t : _threads) {
tc->do_thread(t);
if (is_refinement_enabled()) {
_workers->threads_do(tc);
}
}
void G1ConcurrentRefineThreadControl::stop() {
for (G1ConcurrentRefineThread* t : _threads) {
t->stop();
if (is_refinement_enabled()) {
_control_thread->stop();
}
}
G1ConcurrentRefineSweepState::G1ConcurrentRefineSweepState(uint max_reserved_regions) :
_state(State::Idle),
_sweep_table(new G1CardTableClaimTable(G1CollectedHeap::get_chunks_per_region_for_merge())),
_stats()
{
_sweep_table->initialize(max_reserved_regions);
}
G1ConcurrentRefineSweepState::~G1ConcurrentRefineSweepState() {
delete _sweep_table;
}
void G1ConcurrentRefineSweepState::set_state_start_time() {
_state_start[static_cast<uint>(_state)] = Ticks::now();
}
Tickspan G1ConcurrentRefineSweepState::get_duration(State start, State end) {
return _state_start[static_cast<uint>(end)] - _state_start[static_cast<uint>(start)];
}
void G1ConcurrentRefineSweepState::reset_stats() {
stats()->reset();
}
void G1ConcurrentRefineSweepState::add_yield_during_sweep_duration(jlong duration) {
stats()->inc_yield_during_sweep_duration(duration);
}
bool G1ConcurrentRefineSweepState::advance_state(State next_state) {
bool result = is_in_progress();
if (result) {
_state = next_state;
} else {
_state = State::Idle;
}
return result;
}
void G1ConcurrentRefineSweepState::assert_state(State expected) {
assert(_state == expected, "must be %s but is %s", state_name(expected), state_name(_state));
}
void G1ConcurrentRefineSweepState::start_work() {
assert_state(State::Idle);
set_state_start_time();
_stats.reset();
_state = State::SwapGlobalCT;
}
bool G1ConcurrentRefineSweepState::swap_global_card_table() {
assert_state(State::SwapGlobalCT);
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Global Card Table Swap");
set_state_start_time();
{
// We can't have any new threads being in the process of created while we
// swap the card table because we read the current card table state during
// initialization.
// A safepoint may occur during that time, so leave the STS temporarily.
SuspendibleThreadSetLeaver sts_leave;
MutexLocker mu(Threads_lock);
// A GC that advanced the epoch might have happened, which already switched
// The global card table. Do nothing.
if (is_in_progress()) {
G1BarrierSet::g1_barrier_set()->swap_global_card_table();
}
}
return advance_state(State::SwapJavaThreadsCT);
}
bool G1ConcurrentRefineSweepState::swap_java_threads_ct() {
assert_state(State::SwapJavaThreadsCT);
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Java Thread CT swap");
set_state_start_time();
{
// Need to leave the STS to avoid potential deadlock in the handshake.
SuspendibleThreadSetLeaver sts;
class G1SwapThreadCardTableClosure : public HandshakeClosure {
public:
G1SwapThreadCardTableClosure() : HandshakeClosure("G1 Java Thread CT swap") { }
virtual void do_thread(Thread* thread) {
G1BarrierSet* bs = G1BarrierSet::g1_barrier_set();
bs->update_card_table_base(thread);
}
} cl;
Handshake::execute(&cl);
}
return advance_state(State::SynchronizeGCThreads);
}
bool G1ConcurrentRefineSweepState::swap_gc_threads_ct() {
assert_state(State::SynchronizeGCThreads);
GCTraceTime(Info, gc, refine) tm("Concurrent Refine GC Thread CT swap");
set_state_start_time();
{
class RendezvousGCThreads: public VM_Operation {
public:
VMOp_Type type() const { return VMOp_G1RendezvousGCThreads; }
virtual bool evaluate_at_safepoint() const {
// We only care about synchronizing the GC threads.
// Leave the Java threads running.
return false;
}
virtual bool skip_thread_oop_barriers() const {
fatal("Concurrent VMOps should not call this");
return true;
}
void doit() {
// Light weight "handshake" of the GC threads for memory synchronization;
// both changes to the Java heap need to be synchronized as well as the
// previous global card table reference change, so that no GC thread
// accesses the wrong card table.
// For example in the rebuild remset process the marking threads write
// marks into the card table, and that card table reference must be the
// correct one.
SuspendibleThreadSet::synchronize();
SuspendibleThreadSet::desynchronize();
};
} op;
SuspendibleThreadSetLeaver sts_leave;
VMThread::execute(&op);
}
return advance_state(State::SnapshotHeap);
}
void G1ConcurrentRefineSweepState::snapshot_heap(bool concurrent) {
if (concurrent) {
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Snapshot Heap");
assert_state(State::SnapshotHeap);
set_state_start_time();
snapshot_heap_inner();
advance_state(State::SweepRT);
} else {
assert_state(State::Idle);
assert_at_safepoint();
snapshot_heap_inner();
}
}
void G1ConcurrentRefineSweepState::sweep_refinement_table_start() {
assert_state(State::SweepRT);
set_state_start_time();
}
bool G1ConcurrentRefineSweepState::sweep_refinement_table_step() {
assert_state(State::SweepRT);
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Table Step");
G1ConcurrentRefine* cr = G1CollectedHeap::heap()->concurrent_refine();
G1ConcurrentRefineSweepTask task(_sweep_table, &_stats, cr->num_threads_wanted());
cr->run_with_refinement_workers(&task);
if (task.sweep_completed()) {
advance_state(State::CompleteRefineWork);
return true;
} else {
return false;
}
}
bool G1ConcurrentRefineSweepState::complete_work(bool concurrent, bool print_log) {
if (concurrent) {
assert_state(State::CompleteRefineWork);
} else {
// May have been forced to complete at any other time.
assert(is_in_progress() && _state != State::CompleteRefineWork, "must be but is %s", state_name(_state));
}
set_state_start_time();
if (print_log) {
G1ConcurrentRefineStats* s = &_stats;
log_debug(gc, refine)("Refinement took %.2fms (pre-sweep %.2fms card refine %.2f) "
"(scanned %zu clean %zu (%.2f%%) not_clean %zu (%.2f%%) not_parsable %zu "
"refers_to_cset %zu (%.2f%%) still_refers_to_cset %zu (%.2f%%) no_cross_region %zu pending %zu)",
get_duration(State::Idle, _state).seconds() * 1000.0,
get_duration(State::Idle, State::SweepRT).seconds() * 1000.0,
TimeHelper::counter_to_millis(s->refine_duration()),
s->cards_scanned(),
s->cards_clean(),
percent_of(s->cards_clean(), s->cards_scanned()),
s->cards_not_clean(),
percent_of(s->cards_not_clean(), s->cards_scanned()),
s->cards_not_parsable(),
s->cards_refer_to_cset(),
percent_of(s->cards_refer_to_cset(), s->cards_not_clean()),
s->cards_already_refer_to_cset(),
percent_of(s->cards_already_refer_to_cset(), s->cards_not_clean()),
s->cards_no_cross_region(),
s->cards_pending()
);
}
bool has_sweep_rt_work = _state == State::SweepRT;
advance_state(State::Idle);
return has_sweep_rt_work;
}
void G1ConcurrentRefineSweepState::snapshot_heap_inner() {
// G1CollectedHeap::heap_region_iterate() below will only visit currently committed
// regions. Initialize all entries in the state table here and later in this method
// selectively enable regions that we are interested. This way regions committed
// later will be automatically excluded from iteration.
// Their refinement table must be completely empty anyway.
_sweep_table->reset_all_to_claimed();
class SnapshotRegionsClosure : public G1HeapRegionClosure {
G1CardTableClaimTable* _sweep_table;
public:
SnapshotRegionsClosure(G1CardTableClaimTable* sweep_table) : G1HeapRegionClosure(), _sweep_table(sweep_table) { }
bool do_heap_region(G1HeapRegion* r) override {
if (!r->is_free()) {
// Need to scan all parts of non-free regions, so reset the claim.
// No need for synchronization: we are only interested in regions
// that were allocated before the handshake; the handshake makes such
// regions' metadata visible to all threads, and we do not care about
// humongous regions that were allocated afterwards.
_sweep_table->reset_to_unclaimed(r->hrm_index());
}
return false;
}
} cl(_sweep_table);
G1CollectedHeap::heap()->heap_region_iterate(&cl);
}
bool G1ConcurrentRefineSweepState::is_in_progress() const {
return _state != State::Idle;
}
bool G1ConcurrentRefineSweepState::are_java_threads_synched() const {
return _state > State::SwapJavaThreadsCT || !is_in_progress();
}
uint64_t G1ConcurrentRefine::adjust_threads_period_ms() const {
// Instead of a fixed value, this could be a command line option. But then
// we might also want to allow configuration of adjust_threads_wait_ms().
return 50;
// Use a prime number close to 50ms, different to other components that derive
// their wait time from the try_get_available_bytes_estimate() call to minimize
// interference.
return 53;
}
static size_t minimum_pending_cards_target() {
// One buffer per thread.
return ParallelGCThreads * G1UpdateBufferSize;
return ParallelGCThreads * G1PerThreadPendingCardThreshold;
}
G1ConcurrentRefine::G1ConcurrentRefine(G1Policy* policy) :
_policy(policy),
_threads_wanted(0),
G1ConcurrentRefine::G1ConcurrentRefine(G1CollectedHeap* g1h) :
_policy(g1h->policy()),
_num_threads_wanted(0),
_pending_cards_target(PendingCardsTargetUninitialized),
_last_adjust(),
_needs_adjust(false),
_threads_needed(policy, adjust_threads_period_ms()),
_heap_was_locked(false),
_threads_needed(g1h->policy(), adjust_threads_period_ms()),
_thread_control(G1ConcRefinementThreads),
_dcqs(G1BarrierSet::dirty_card_queue_set())
{}
_sweep_state(g1h->max_num_regions())
{ }
jint G1ConcurrentRefine::initialize() {
return _thread_control.initialize(this);
}
G1ConcurrentRefine* G1ConcurrentRefine::create(G1Policy* policy, jint* ecode) {
G1ConcurrentRefine* cr = new G1ConcurrentRefine(policy);
G1ConcurrentRefineSweepState& G1ConcurrentRefine::sweep_state_for_merge() {
bool has_sweep_claims = sweep_state().complete_work(false /* concurrent */);
if (has_sweep_claims) {
log_debug(gc, refine)("Continue existing work");
} else {
// Refinement has been interrupted without having a snapshot. There may
// be a mix of already swapped and not-swapped card tables assigned to threads,
// so they might have already dirtied the swapped card tables.
// Conservatively scan all (non-free, non-committed) region's card tables,
// creating the snapshot right now.
log_debug(gc, refine)("Create work from scratch");
sweep_state().snapshot_heap(false /* concurrent */);
}
return sweep_state();
}
void G1ConcurrentRefine::run_with_refinement_workers(WorkerTask* task) {
_thread_control.run_task(task, num_threads_wanted());
}
void G1ConcurrentRefine::notify_region_reclaimed(G1HeapRegion* r) {
assert_at_safepoint();
if (_sweep_state.is_in_progress()) {
_sweep_state.sweep_table()->claim_all_cards(r->hrm_index());
}
}
G1ConcurrentRefine* G1ConcurrentRefine::create(G1CollectedHeap* g1h, jint* ecode) {
G1ConcurrentRefine* cr = new G1ConcurrentRefine(g1h);
*ecode = cr->initialize();
if (*ecode != 0) {
delete cr;
@ -176,25 +465,31 @@ G1ConcurrentRefine::~G1ConcurrentRefine() {
}
void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
worker_threads_do(tc);
control_thread_do(tc);
}
void G1ConcurrentRefine::worker_threads_do(ThreadClosure *tc) {
_thread_control.worker_threads_do(tc);
}
void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms,
size_t processed_logged_cards,
size_t predicted_thread_buffer_cards,
void G1ConcurrentRefine::control_thread_do(ThreadClosure *tc) {
_thread_control.control_thread_do(tc);
}
void G1ConcurrentRefine::update_pending_cards_target(double pending_cards_time_ms,
size_t processed_pending_cards,
double goal_ms) {
size_t minimum = minimum_pending_cards_target();
if ((processed_logged_cards < minimum) || (logged_cards_time_ms == 0.0)) {
log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu",
_pending_cards_target);
if ((processed_pending_cards < minimum) || (pending_cards_time_ms == 0.0)) {
log_debug(gc, ergo, refine)("Unchanged pending cards target: %zu (processed %zu minimum %zu time %1.2f)",
_pending_cards_target, processed_pending_cards, minimum, pending_cards_time_ms);
return;
}
// Base the pending cards budget on the measured rate.
double rate = processed_logged_cards / logged_cards_time_ms;
size_t budget = static_cast<size_t>(goal_ms * rate);
// Deduct predicted cards in thread buffers to get target.
size_t new_target = budget - MIN2(budget, predicted_thread_buffer_cards);
double rate = processed_pending_cards / pending_cards_time_ms;
size_t new_target = static_cast<size_t>(goal_ms * rate);
// Add some hysteresis with previous values.
if (is_pending_cards_target_initialized()) {
new_target = (new_target + _pending_cards_target) / 2;
@ -205,46 +500,36 @@ void G1ConcurrentRefine::update_pending_cards_target(double logged_cards_time_ms
log_debug(gc, ergo, refine)("New pending cards target: %zu", new_target);
}
void G1ConcurrentRefine::adjust_after_gc(double logged_cards_time_ms,
size_t processed_logged_cards,
size_t predicted_thread_buffer_cards,
void G1ConcurrentRefine::adjust_after_gc(double pending_cards_time_ms,
size_t processed_pending_cards,
double goal_ms) {
if (!G1UseConcRefinement) return;
if (!G1UseConcRefinement) {
return;
}
update_pending_cards_target(logged_cards_time_ms,
processed_logged_cards,
predicted_thread_buffer_cards,
update_pending_cards_target(pending_cards_time_ms,
processed_pending_cards,
goal_ms);
if (_thread_control.max_num_threads() == 0) {
// If no refinement threads then the mutator threshold is the target.
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
} else {
// Provisionally make the mutator threshold unlimited, to be updated by
// the next periodic adjustment. Because card state may have changed
// drastically, record that adjustment is needed and kick the primary
// thread, in case it is waiting.
_dcqs.set_mutator_refinement_threshold(SIZE_MAX);
if (_thread_control.is_refinement_enabled()) {
_needs_adjust = true;
if (is_pending_cards_target_initialized()) {
_thread_control.activate(0);
_thread_control.activate();
}
}
}
// Wake up the primary thread less frequently when the time available until
// the next GC is longer. But don't increase the wait time too rapidly.
// This reduces the number of primary thread wakeups that just immediately
// go back to waiting, while still being responsive to behavior changes.
static uint64_t compute_adjust_wait_time_ms(double available_ms) {
return static_cast<uint64_t>(sqrt(available_ms) * 4.0);
}
uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
assert_current_thread_is_primary_refinement_thread();
assert_current_thread_is_control_refinement_thread();
if (is_pending_cards_target_initialized()) {
double available_ms = _threads_needed.predicted_time_until_next_gc_ms();
uint64_t wait_time_ms = compute_adjust_wait_time_ms(available_ms);
return MAX2(wait_time_ms, adjust_threads_period_ms());
// Retry asap when the cause for not getting a prediction was that we temporarily
// did not get the heap lock. Otherwise we might wait for too long until we get
// back here.
if (_heap_was_locked) {
return 1;
}
double available_time_ms = _threads_needed.predicted_time_until_next_gc_ms();
return _policy->adjust_wait_time_ms(available_time_ms, adjust_threads_period_ms());
} else {
// If target not yet initialized then wait forever (until explicitly
// activated). This happens during startup, when we don't bother with
@ -253,185 +538,74 @@ uint64_t G1ConcurrentRefine::adjust_threads_wait_ms() const {
}
}
class G1ConcurrentRefine::RemSetSamplingClosure : public G1HeapRegionClosure {
size_t _sampled_code_root_rs_length;
bool G1ConcurrentRefine::adjust_num_threads_periodically() {
assert_current_thread_is_control_refinement_thread();
public:
RemSetSamplingClosure() :
_sampled_code_root_rs_length(0) {}
bool do_heap_region(G1HeapRegion* r) override {
G1HeapRegionRemSet* rem_set = r->rem_set();
_sampled_code_root_rs_length += rem_set->code_roots_list_length();
return false;
}
size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
};
// Adjust the target length (in regions) of the young gen, based on the
// current length of the remembered sets.
//
// At the end of the GC G1 determines the length of the young gen based on
// how much time the next GC can take, and when the next GC may occur
// according to the MMU.
//
// The assumption is that a significant part of the GC is spent on scanning
// the remembered sets (and many other components), so this thread constantly
// reevaluates the prediction for the remembered set scanning costs, and potentially
// resizes the young gen. This may do a premature GC or even increase the young
// gen size to keep pause time length goal.
void G1ConcurrentRefine::adjust_young_list_target_length() {
if (_policy->use_adaptive_young_list_length()) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1CollectionSet* cset = g1h->collection_set();
RemSetSamplingClosure cl;
cset->iterate(&cl);
size_t card_rs_length = g1h->young_regions_cardset()->occupied();
size_t sampled_code_root_rs_length = cl.sampled_code_root_rs_length();
_policy->revise_young_list_target_length(card_rs_length, sampled_code_root_rs_length);
}
}
bool G1ConcurrentRefine::adjust_threads_periodically() {
assert_current_thread_is_primary_refinement_thread();
// Check whether it's time to do a periodic adjustment.
_heap_was_locked = false;
// Check whether it's time to do a periodic adjustment if there is no explicit
// request pending. We might have spuriously woken up.
if (!_needs_adjust) {
Tickspan since_adjust = Ticks::now() - _last_adjust;
if (since_adjust.milliseconds() >= adjust_threads_period_ms()) {
_needs_adjust = true;
if (since_adjust.milliseconds() < adjust_threads_period_ms()) {
_num_threads_wanted = 0;
return false;
}
}
// If needed, try to adjust threads wanted.
if (_needs_adjust) {
// Getting used young bytes requires holding Heap_lock. But we can't use
// normal lock and block until available. Blocking on the lock could
// deadlock with a GC VMOp that is holding the lock and requesting a
// safepoint. Instead try to lock, and if fail then skip adjustment for
// this iteration of the thread, do some refinement work, and retry the
// adjustment later.
if (Heap_lock->try_lock()) {
size_t used_bytes = _policy->estimate_used_young_bytes_locked();
Heap_lock->unlock();
adjust_young_list_target_length();
size_t young_bytes = _policy->young_list_target_length() * G1HeapRegion::GrainBytes;
size_t available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
adjust_threads_wanted(available_bytes);
_needs_adjust = false;
_last_adjust = Ticks::now();
return true;
}
// Reset pending request.
_needs_adjust = false;
size_t available_bytes = 0;
if (_policy->try_get_available_bytes_estimate(available_bytes)) {
adjust_threads_wanted(available_bytes);
_last_adjust = Ticks::now();
} else {
_heap_was_locked = true;
// Defer adjustment to next time.
_needs_adjust = true;
}
return false;
}
bool G1ConcurrentRefine::is_in_last_adjustment_period() const {
return _threads_needed.predicted_time_until_next_gc_ms() <= adjust_threads_period_ms();
return (_num_threads_wanted > 0) && !heap_was_locked();
}
void G1ConcurrentRefine::adjust_threads_wanted(size_t available_bytes) {
assert_current_thread_is_primary_refinement_thread();
size_t num_cards = _dcqs.num_cards();
size_t mutator_threshold = SIZE_MAX;
uint old_wanted = AtomicAccess::load(&_threads_wanted);
assert_current_thread_is_control_refinement_thread();
_threads_needed.update(old_wanted,
G1Policy* policy = G1CollectedHeap::heap()->policy();
const G1Analytics* analytics = policy->analytics();
size_t num_cards = policy->current_pending_cards();
_threads_needed.update(_num_threads_wanted,
available_bytes,
num_cards,
_pending_cards_target);
uint new_wanted = _threads_needed.threads_needed();
if (new_wanted > _thread_control.max_num_threads()) {
// If running all the threads can't reach goal, turn on refinement by
// mutator threads. Using target as the threshold may be stronger
// than required, but will do the most to get us under goal, and we'll
// reevaluate with the next adjustment.
mutator_threshold = _pending_cards_target;
// Bound the wanted threads by maximum available.
new_wanted = _thread_control.max_num_threads();
} else if (is_in_last_adjustment_period()) {
// If very little time remains until GC, enable mutator refinement. If
// the target has been reached, this keeps the number of pending cards on
// target even if refinement threads deactivate in the meantime. And if
// the target hasn't been reached, this prevents things from getting
// worse.
mutator_threshold = _pending_cards_target;
}
AtomicAccess::store(&_threads_wanted, new_wanted);
_dcqs.set_mutator_refinement_threshold(mutator_threshold);
log_debug(gc, refine)("Concurrent refinement: wanted %u, cards: %zu, "
"predicted: %zu, time: %1.2fms",
_num_threads_wanted = new_wanted;
log_debug(gc, refine)("Concurrent refinement: wanted %u, pending cards: %zu (pending-from-gc %zu), "
"predicted: %zu, goal %zu, time-until-next-gc: %1.2fms pred-refine-rate %1.2fc/ms log-rate %1.2fc/ms",
new_wanted,
num_cards,
G1CollectedHeap::heap()->policy()->pending_cards_from_gc(),
_threads_needed.predicted_cards_at_next_gc(),
_threads_needed.predicted_time_until_next_gc_ms());
// Activate newly wanted threads. The current thread is the primary
// refinement thread, so is already active.
for (uint i = MAX2(old_wanted, 1u); i < new_wanted; ++i) {
if (!_thread_control.activate(i)) {
// Failed to allocate and activate thread. Stop trying to activate, and
// instead use mutator threads to make up the gap.
AtomicAccess::store(&_threads_wanted, i);
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
break;
}
}
}
void G1ConcurrentRefine::reduce_threads_wanted() {
assert_current_thread_is_primary_refinement_thread();
if (!_needs_adjust) { // Defer if adjustment request is active.
uint wanted = AtomicAccess::load(&_threads_wanted);
if (wanted > 0) {
AtomicAccess::store(&_threads_wanted, --wanted);
}
// If very little time remains until GC, enable mutator refinement. If
// the target has been reached, this keeps the number of pending cards on
// target even as refinement threads deactivate in the meantime.
if (is_in_last_adjustment_period()) {
_dcqs.set_mutator_refinement_threshold(_pending_cards_target);
}
}
}
bool G1ConcurrentRefine::is_thread_wanted(uint worker_id) const {
return worker_id < AtomicAccess::load(&_threads_wanted);
_pending_cards_target,
_threads_needed.predicted_time_until_next_gc_ms(),
analytics->predict_concurrent_refine_rate_ms(),
analytics->predict_dirtied_cards_rate_ms()
);
}
bool G1ConcurrentRefine::is_thread_adjustment_needed() const {
assert_current_thread_is_primary_refinement_thread();
assert_current_thread_is_control_refinement_thread();
return _needs_adjust;
}
void G1ConcurrentRefine::record_thread_adjustment_needed() {
assert_current_thread_is_primary_refinement_thread();
assert_current_thread_is_control_refinement_thread();
_needs_adjust = true;
}
G1ConcurrentRefineStats G1ConcurrentRefine::get_and_reset_refinement_stats() {
struct CollectStats : public ThreadClosure {
G1ConcurrentRefineStats _total_stats;
virtual void do_thread(Thread* t) {
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
G1ConcurrentRefineStats& stats = *crt->refinement_stats();
_total_stats += stats;
stats.reset();
}
} collector;
threads_do(&collector);
return collector._total_stats;
}
uint G1ConcurrentRefine::worker_id_offset() {
return G1DirtyCardQueueSet::num_par_ids();
}
bool G1ConcurrentRefine::try_refinement_step(uint worker_id,
size_t stop_at,
G1ConcurrentRefineStats* stats) {
uint adjusted_id = worker_id + worker_id_offset();
return _dcqs.refine_completed_buffer_concurrently(adjusted_id, stop_at, stats);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,23 +34,28 @@
#include "utilities/macros.hpp"
// Forward decl
class G1CardTableClaimTable;
class G1CollectedHeap;
class G1ConcurrentRefine;
class G1ConcurrentRefineThread;
class G1DirtyCardQueueSet;
class G1HeapRegion;
class G1Policy;
class ThreadClosure;
class WorkerTask;
class WorkerThreads;
// Helper class for refinement thread management. Used to start, stop and
// iterate over them.
class G1ConcurrentRefineThreadControl {
G1ConcurrentRefine* _cr;
GrowableArrayCHeap<G1ConcurrentRefineThread*, mtGC> _threads;
G1ConcurrentRefineThread* _control_thread;
WorkerThreads* _workers;
uint _max_num_threads;
// Create the refinement thread for the given worker id.
// If initializing is true, ignore InjectGCWorkerCreationFailure.
G1ConcurrentRefineThread* create_refinement_thread(uint worker_id, bool initializing);
bool ensure_threads_created(uint worker_id, bool initializing);
G1ConcurrentRefineThread* create_refinement_thread();
NONCOPYABLE(G1ConcurrentRefineThreadControl);
@ -60,21 +65,119 @@ public:
jint initialize(G1ConcurrentRefine* cr);
void assert_current_thread_is_primary_refinement_thread() const NOT_DEBUG_RETURN;
void assert_current_thread_is_control_refinement_thread() const NOT_DEBUG_RETURN;
uint max_num_threads() const { return _threads.capacity(); }
uint max_num_threads() const { return _max_num_threads; }
bool is_refinement_enabled() const { return _max_num_threads > 0; }
// Activate the indicated thread. If the thread has not yet been allocated,
// allocate and then activate. If allocation is needed and fails, return
// false. Otherwise return true.
// precondition: worker_id < max_num_threads().
// precondition: current thread is not the designated worker.
bool activate(uint worker_id);
// Activate the control thread.
void activate();
void run_task(WorkerTask* task, uint num_workers);
void control_thread_do(ThreadClosure* tc);
void worker_threads_do(ThreadClosure* tc);
void stop();
};
// Tracks the current state of re-examining the dirty cards from idle to completion
// (and reset back to idle).
//
// The process steps are as follows:
//
// 1) Swap global card table pointers
//
// 2) Swap Java Thread's card table pointers
//
// 3) Synchronize GC Threads
// Ensures memory visibility
//
// After this point mutator threads should not mark the refinement table.
//
// 4) Snapshot the heap
// Determines which regions need to be swept.
//
// 5) Sweep Refinement table
// Examines non-Clean cards on the refinement table.
//
// 6) Completion Work
// Calculates statistics about the process to be used in various parts of
// the garbage collection.
//
// All but step 4 are interruptible by safepoints. In case of a garbage collection,
// the garbage collection will interrupt this process, and go to Idle state.
//
class G1ConcurrentRefineSweepState {
enum class State : uint {
Idle, // Refinement is doing nothing.
SwapGlobalCT, // Swap global card table.
SwapJavaThreadsCT, // Swap java thread's card tables.
SynchronizeGCThreads, // Synchronize GC thread's memory view.
SnapshotHeap, // Take a snapshot of the region's top() values.
SweepRT, // Sweep the refinement table for pending (dirty) cards.
CompleteRefineWork, // Cleanup of refinement work, reset to idle.
Last
} _state;
static const char* state_name(State state) {
static const char* _state_names[] = {
"Idle",
"Swap Global Card Table",
"Swap JavaThread Card Table",
"Synchronize GC Threads",
"Snapshot Heap",
"Sweep Refinement Table",
"Complete Sweep Work"
};
return _state_names[static_cast<uint>(state)];
}
// Current heap snapshot.
G1CardTableClaimTable* _sweep_table;
// Start times for all states.
Ticks _state_start[static_cast<uint>(State::Last)];
void set_state_start_time();
Tickspan get_duration(State start, State end);
G1ConcurrentRefineStats _stats;
// Advances the state to next_state if not interrupted by a changed epoch. Returns
// to Idle otherwise.
bool advance_state(State next_state);
void assert_state(State expected);
void snapshot_heap_inner();
public:
G1ConcurrentRefineSweepState(uint max_reserved_regions);
~G1ConcurrentRefineSweepState();
void start_work();
bool swap_global_card_table();
bool swap_java_threads_ct();
bool swap_gc_threads_ct();
void snapshot_heap(bool concurrent = true);
void sweep_refinement_table_start();
bool sweep_refinement_table_step();
bool complete_work(bool concurrent, bool print_log = true);
G1CardTableClaimTable* sweep_table() { return _sweep_table; }
G1ConcurrentRefineStats* stats() { return &_stats; }
void reset_stats();
void add_yield_during_sweep_duration(jlong duration);
bool is_in_progress() const;
bool are_java_threads_synched() const;
};
// Controls concurrent refinement.
//
// Mutator threads produce dirty cards, which need to be examined for updates
@ -84,49 +187,43 @@ public:
// pending dirty cards at the start of a GC can be processed within that time
// budget.
//
// Concurrent refinement is performed by a combination of dedicated threads
// and by mutator threads as they produce dirty cards. If configured to not
// have any dedicated threads (-XX:G1ConcRefinementThreads=0) then all
// concurrent refinement work is performed by mutator threads. When there are
// dedicated threads, they generally do most of the concurrent refinement
// work, to minimize throughput impact of refinement work on mutator threads.
// Concurrent refinement is performed by a set of dedicated threads. If configured
// to not have any dedicated threads (-XX:G1ConcRefinementThreads=0) then no
// refinement work is performed at all.
//
// This class determines the target number of dirty cards pending for the next
// GC. It also owns the dedicated refinement threads and controls their
// activation in order to achieve that target.
//
// There are two kinds of dedicated refinement threads, a single primary
// thread and some number of secondary threads. When active, all refinement
// threads take buffers of dirty cards from the dirty card queue and process
// them. Between buffers they query this owning object to find out whether
// they should continue running, deactivating themselves if not.
// There are two kinds of dedicated refinement threads, a single control
// thread and some number of refinement worker threads.
// The control thread determines whether there is need to do work, and then starts
// an appropriate number of refinement worker threads to get back to the target
// number of pending dirty cards.
//
// The control wakes up periodically whether there is need to do refinement
// work, starting the refinement process as necessary.
//
// The primary thread drives the control system that determines how many
// refinement threads should be active. If inactive, it wakes up periodically
// to recalculate the number of active threads needed, and activates
// additional threads as necessary. While active it also periodically
// recalculates the number wanted and activates more threads if needed. It
// also reduces the number of wanted threads when the target has been reached,
// triggering deactivations.
class G1ConcurrentRefine : public CHeapObj<mtGC> {
G1Policy* _policy;
volatile uint _threads_wanted;
volatile uint _num_threads_wanted;
size_t _pending_cards_target;
Ticks _last_adjust;
Ticks _last_deactivate;
bool _needs_adjust;
bool _heap_was_locked; // The heap has been locked the last time we tried to adjust the number of refinement threads.
G1ConcurrentRefineThreadsNeeded _threads_needed;
G1ConcurrentRefineThreadControl _thread_control;
G1DirtyCardQueueSet& _dcqs;
G1ConcurrentRefine(G1Policy* policy);
G1ConcurrentRefineSweepState _sweep_state;
static uint worker_id_offset();
G1ConcurrentRefine(G1CollectedHeap* g1h);
jint initialize();
void assert_current_thread_is_primary_refinement_thread() const {
_thread_control.assert_current_thread_is_primary_refinement_thread();
void assert_current_thread_is_control_refinement_thread() const {
_thread_control.assert_current_thread_is_control_refinement_thread();
}
// For the first few collection cycles we don't have a target (and so don't
@ -138,16 +235,11 @@ class G1ConcurrentRefine : public CHeapObj<mtGC> {
return _pending_cards_target != PendingCardsTargetUninitialized;
}
void update_pending_cards_target(double logged_cards_scan_time_ms,
size_t processed_logged_cards,
size_t predicted_thread_buffer_cards,
void update_pending_cards_target(double pending_cards_scan_time_ms,
size_t processed_pending_cards,
double goal_ms);
uint64_t adjust_threads_period_ms() const;
bool is_in_last_adjustment_period() const;
class RemSetSamplingClosure; // Helper class for adjusting young length.
void adjust_young_list_target_length();
void adjust_threads_wanted(size_t available_bytes);
@ -156,67 +248,66 @@ class G1ConcurrentRefine : public CHeapObj<mtGC> {
public:
~G1ConcurrentRefine();
G1ConcurrentRefineSweepState& sweep_state() { return _sweep_state; }
G1ConcurrentRefineSweepState& sweep_state_for_merge();
void run_with_refinement_workers(WorkerTask* task);
void notify_region_reclaimed(G1HeapRegion* r);
// Returns a G1ConcurrentRefine instance if succeeded to create/initialize the
// G1ConcurrentRefine instance. Otherwise, returns null with error code.
static G1ConcurrentRefine* create(G1Policy* policy, jint* ecode);
static G1ConcurrentRefine* create(G1CollectedHeap* g1h, jint* ecode);
// Stop all the refinement threads.
void stop();
// Called at the end of a GC to prepare for refinement during the next
// concurrent phase. Updates the target for the number of pending dirty
// cards. Updates the mutator refinement threshold. Ensures the primary
// refinement thread (if it exists) is active, so it will adjust the number
// cards. Updates the mutator refinement threshold. Ensures the refinement
// control thread (if it exists) is active, so it will adjust the number
// of running threads.
void adjust_after_gc(double logged_cards_scan_time_ms,
size_t processed_logged_cards,
size_t predicted_thread_buffer_cards,
void adjust_after_gc(double pending_cards_scan_time_ms,
size_t processed_pending_cards,
double goal_ms);
// Target number of pending dirty cards at the start of the next GC.
size_t pending_cards_target() const { return _pending_cards_target; }
// May recalculate the number of refinement threads that should be active in
// order to meet the pending cards target. Returns true if adjustment was
// performed, and clears any pending request. Returns false if the
// adjustment period has not expired, or because a timed or requested
// adjustment could not be performed immediately and so was deferred.
// precondition: current thread is the primary refinement thread.
bool adjust_threads_periodically();
// Recalculates the number of refinement threads that should be active in
// order to meet the pending cards target.
// Returns true if it could recalculate the number of threads and
// refinement threads should be started.
// Returns false if the adjustment period has not expired, or because a timed
// or requested adjustment could not be performed immediately and so was deferred.
bool adjust_num_threads_periodically();
// The amount of time (in ms) the primary refinement thread should sleep
// The amount of time (in ms) the refinement control thread should sleep
// when it is inactive. It requests adjustment whenever it is reactivated.
// precondition: current thread is the primary refinement thread.
// precondition: current thread is the refinement control thread.
uint64_t adjust_threads_wait_ms() const;
// Record a request for thread adjustment as soon as possible.
// precondition: current thread is the primary refinement thread.
// precondition: current thread is the refinement control thread.
void record_thread_adjustment_needed();
// Test whether there is a pending request for thread adjustment.
// precondition: current thread is the primary refinement thread.
// precondition: current thread is the refinement control thread.
bool is_thread_adjustment_needed() const;
// Reduce the number of active threads wanted.
// precondition: current thread is the primary refinement thread.
void reduce_threads_wanted();
// Indicate that last refinement adjustment had been deferred due to not
// obtaining the heap lock.
bool heap_was_locked() const { return _heap_was_locked; }
// Test whether the thread designated by worker_id should be active.
bool is_thread_wanted(uint worker_id) const;
// Return total of concurrent refinement stats for the
// ConcurrentRefineThreads. Also reset the stats for the threads.
G1ConcurrentRefineStats get_and_reset_refinement_stats();
// Perform a single refinement step; called by the refinement
// threads. Returns true if there was refinement work available.
// Updates stats.
bool try_refinement_step(uint worker_id,
size_t stop_at,
G1ConcurrentRefineStats* stats);
uint num_threads_wanted() const { return _num_threads_wanted; }
uint max_num_threads() const { return _thread_control.max_num_threads(); }
// Iterate over all concurrent refinement threads applying the given closure.
void threads_do(ThreadClosure *tc);
// Iterate over specific refinement threads applying the given closure.
void worker_threads_do(ThreadClosure *tc);
void control_thread_do(ThreadClosure *tc);
};
#endif // SHARE_GC_G1_G1CONCURRENTREFINE_HPP

View File

@ -23,41 +23,33 @@
*/
#include "gc/g1/g1ConcurrentRefineStats.hpp"
#include "runtime/atomicAccess.hpp"
#include "runtime/timer.hpp"
G1ConcurrentRefineStats::G1ConcurrentRefineStats() :
_refinement_time(),
_refined_cards(0),
_precleaned_cards(0),
_dirtied_cards(0)
_sweep_duration(0),
_yield_during_sweep_duration(0),
_cards_scanned(0),
_cards_clean(0),
_cards_not_parsable(0),
_cards_already_refer_to_cset(0),
_cards_refer_to_cset(0),
_cards_no_cross_region(0),
_refine_duration(0)
{}
double G1ConcurrentRefineStats::refinement_rate_ms() const {
// Report 0 when no time recorded because no refinement performed.
double secs = refinement_time().seconds();
return (secs > 0) ? (refined_cards() / (secs * MILLIUNITS)) : 0.0;
}
void G1ConcurrentRefineStats::add_atomic(G1ConcurrentRefineStats* other) {
AtomicAccess::add(&_sweep_duration, other->_sweep_duration, memory_order_relaxed);
AtomicAccess::add(&_yield_during_sweep_duration, other->_yield_during_sweep_duration, memory_order_relaxed);
G1ConcurrentRefineStats&
G1ConcurrentRefineStats::operator+=(const G1ConcurrentRefineStats& other) {
_refinement_time += other._refinement_time;
_refined_cards += other._refined_cards;
_precleaned_cards += other._precleaned_cards;
_dirtied_cards += other._dirtied_cards;
return *this;
}
AtomicAccess::add(&_cards_scanned, other->_cards_scanned, memory_order_relaxed);
AtomicAccess::add(&_cards_clean, other->_cards_clean, memory_order_relaxed);
AtomicAccess::add(&_cards_not_parsable, other->_cards_not_parsable, memory_order_relaxed);
AtomicAccess::add(&_cards_already_refer_to_cset, other->_cards_already_refer_to_cset, memory_order_relaxed);
AtomicAccess::add(&_cards_refer_to_cset, other->_cards_refer_to_cset, memory_order_relaxed);
AtomicAccess::add(&_cards_no_cross_region, other->_cards_no_cross_region, memory_order_relaxed);
template<typename T>
static T clipped_sub(T x, T y) {
return (x < y) ? T() : (x - y);
}
G1ConcurrentRefineStats&
G1ConcurrentRefineStats::operator-=(const G1ConcurrentRefineStats& other) {
_refinement_time = clipped_sub(_refinement_time, other._refinement_time);
_refined_cards = clipped_sub(_refined_cards, other._refined_cards);
_precleaned_cards = clipped_sub(_precleaned_cards, other._precleaned_cards);
_dirtied_cards = clipped_sub(_dirtied_cards, other._dirtied_cards);
return *this;
AtomicAccess::add(&_refine_duration, other->_refine_duration, memory_order_relaxed);
}
void G1ConcurrentRefineStats::reset() {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -33,47 +33,56 @@
// Used for collecting per-thread statistics and for summaries over a
// collection of threads.
class G1ConcurrentRefineStats : public CHeapObj<mtGC> {
Tickspan _refinement_time;
size_t _refined_cards;
size_t _precleaned_cards;
size_t _dirtied_cards;
jlong _sweep_duration; // Time spent sweeping the table finding non-clean cards
// and refining them.
jlong _yield_during_sweep_duration; // Time spent yielding during the sweep (not doing the sweep).
size_t _cards_scanned; // Total number of cards scanned.
size_t _cards_clean; // Number of cards found clean.
size_t _cards_not_parsable; // Number of cards we could not parse and left unrefined.
size_t _cards_already_refer_to_cset;// Number of cards marked found to be already young.
size_t _cards_refer_to_cset; // Number of dirty cards that were recently found to contain a to-cset reference.
size_t _cards_no_cross_region; // Number of dirty cards that were dirtied, but then cleaned again by the mutator.
jlong _refine_duration; // Time spent during actual refinement.
public:
G1ConcurrentRefineStats();
// Time spent performing concurrent refinement.
Tickspan refinement_time() const { return _refinement_time; }
// Time spent performing sweeping the refinement table (includes actual refinement,
// but not yield time).
jlong sweep_duration() const { return _sweep_duration - _yield_during_sweep_duration; }
jlong yield_during_sweep_duration() const { return _yield_during_sweep_duration; }
jlong refine_duration() const { return _refine_duration; }
// Number of refined cards.
size_t refined_cards() const { return _refined_cards; }
size_t refined_cards() const { return cards_not_clean(); }
// Refinement rate, in cards per ms.
double refinement_rate_ms() const;
size_t cards_scanned() const { return _cards_scanned; }
size_t cards_clean() const { return _cards_clean; }
size_t cards_not_clean() const { return _cards_scanned - _cards_clean; }
size_t cards_not_parsable() const { return _cards_not_parsable; }
size_t cards_already_refer_to_cset() const { return _cards_already_refer_to_cset; }
size_t cards_refer_to_cset() const { return _cards_refer_to_cset; }
size_t cards_no_cross_region() const { return _cards_no_cross_region; }
// Number of cards that were marked dirty and in need of refinement. This includes cards recently
// found to refer to the collection set as they originally were dirty.
size_t cards_pending() const { return cards_not_clean() - _cards_already_refer_to_cset; }
// Number of cards for which refinement was skipped because some other
// thread had already refined them.
size_t precleaned_cards() const { return _precleaned_cards; }
size_t cards_to_cset() const { return _cards_already_refer_to_cset + _cards_refer_to_cset; }
// Number of cards marked dirty and in need of refinement.
size_t dirtied_cards() const { return _dirtied_cards; }
void inc_sweep_time(jlong t) { _sweep_duration += t; }
void inc_yield_during_sweep_duration(jlong t) { _yield_during_sweep_duration += t; }
void inc_refine_duration(jlong t) { _refine_duration += t; }
void inc_refinement_time(Tickspan t) { _refinement_time += t; }
void inc_refined_cards(size_t cards) { _refined_cards += cards; }
void inc_precleaned_cards(size_t cards) { _precleaned_cards += cards; }
void inc_dirtied_cards(size_t cards) { _dirtied_cards += cards; }
void inc_cards_scanned(size_t increment) { _cards_scanned += increment; }
void inc_cards_clean(size_t increment) { _cards_clean += increment; }
void inc_cards_not_parsable() { _cards_not_parsable++; }
void inc_cards_already_refer_to_cset() { _cards_already_refer_to_cset++; }
void inc_cards_refer_to_cset() { _cards_refer_to_cset++; }
void inc_cards_no_cross_region() { _cards_no_cross_region++; }
G1ConcurrentRefineStats& operator+=(const G1ConcurrentRefineStats& other);
G1ConcurrentRefineStats& operator-=(const G1ConcurrentRefineStats& other);
friend G1ConcurrentRefineStats operator+(G1ConcurrentRefineStats x,
const G1ConcurrentRefineStats& y) {
return x += y;
}
friend G1ConcurrentRefineStats operator-(G1ConcurrentRefineStats x,
const G1ConcurrentRefineStats& y) {
return x -= y;
}
void add_atomic(G1ConcurrentRefineStats* other);
void reset();
};

View File

@ -0,0 +1,191 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
class G1RefineRegionClosure : public G1HeapRegionClosure {
using CardValue = G1CardTable::CardValue;
G1RemSet* _rem_set;
G1CardTableClaimTable* _scan_state;
uint _worker_id;
size_t _num_collections_at_start;
bool has_work(G1HeapRegion* r) {
return _scan_state->has_unclaimed_cards(r->hrm_index());
}
void verify_card_pair_refers_to_same_card(CardValue* source_card, CardValue* dest_card) {
#ifdef ASSERT
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1HeapRegion* refinement_r = g1h->heap_region_containing(g1h->refinement_table()->addr_for(source_card));
G1HeapRegion* card_r = g1h->heap_region_containing(g1h->card_table()->addr_for(dest_card));
size_t refinement_i = g1h->refinement_table()->index_for_cardvalue(source_card);
size_t card_i = g1h->card_table()->index_for_cardvalue(dest_card);
assert(refinement_r == card_r, "not same region source %u (%zu) dest %u (%zu) ", refinement_r->hrm_index(), refinement_i, card_r->hrm_index(), card_i);
assert(refinement_i == card_i, "indexes are not same %zu %zu", refinement_i, card_i);
#endif
}
void do_dirty_card(CardValue* source_card, CardValue* dest_card) {
verify_card_pair_refers_to_same_card(source_card, dest_card);
G1RemSet::RefineResult res = _rem_set->refine_card_concurrently(source_card, _worker_id);
// Gather statistics based on the result.
switch (res) {
case G1RemSet::HasRefToCSet: {
*dest_card = G1CardTable::g1_to_cset_card;
_refine_stats.inc_cards_refer_to_cset();
break;
}
case G1RemSet::AlreadyToCSet: {
*dest_card = G1CardTable::g1_to_cset_card;
_refine_stats.inc_cards_already_refer_to_cset();
break;
}
case G1RemSet::NoCrossRegion: {
_refine_stats.inc_cards_no_cross_region();
break;
}
case G1RemSet::CouldNotParse: {
// Could not refine - redirty with the original value.
*dest_card = *source_card;
_refine_stats.inc_cards_not_parsable();
break;
}
case G1RemSet::HasRefToOld : break; // Nothing special to do.
}
// Clean card on source card table.
*source_card = G1CardTable::clean_card_val();
}
void do_claimed_block(CardValue* dirty_l, CardValue* dirty_r, CardValue* dest_card) {
for (CardValue* source = dirty_l; source < dirty_r; ++source, ++dest_card) {
do_dirty_card(source, dest_card);
}
}
public:
bool _completed;
G1ConcurrentRefineStats _refine_stats;
G1RefineRegionClosure(uint worker_id, G1CardTableClaimTable* scan_state) :
G1HeapRegionClosure(),
_rem_set(G1CollectedHeap::heap()->rem_set()),
_scan_state(scan_state),
_worker_id(worker_id),
_completed(true),
_refine_stats() { }
bool do_heap_region(G1HeapRegion* r) override {
if (!has_work(r)) {
return false;
}
G1CollectedHeap* g1h = G1CollectedHeap::heap();
if (r->is_young()) {
if (_scan_state->claim_all_cards(r->hrm_index()) == 0) {
// Clear the pre-dirtying information.
r->clear_refinement_table();
}
return false;
}
G1CardTable* card_table = g1h->card_table();
G1CardTable* refinement_table = g1h->refinement_table();
G1CardTableChunkClaimer claim(_scan_state, r->hrm_index());
size_t const region_card_base_idx = (size_t)r->hrm_index() << G1HeapRegion::LogCardsPerRegion;
while (claim.has_next()) {
size_t const start_idx = region_card_base_idx + claim.value();
CardValue* const start_card = refinement_table->byte_for_index(start_idx);
CardValue* const end_card = start_card + claim.size();
CardValue* dest_card = card_table->byte_for_index(start_idx);
G1ChunkScanner scanner{start_card, end_card};
size_t num_dirty_cards = 0;
scanner.on_dirty_cards([&] (CardValue* dirty_l, CardValue* dirty_r) {
jlong refine_start = os::elapsed_counter();
do_claimed_block(dirty_l, dirty_r, dest_card + pointer_delta(dirty_l, start_card, sizeof(CardValue)));
num_dirty_cards += pointer_delta(dirty_r, dirty_l, sizeof(CardValue));
_refine_stats.inc_refine_duration(os::elapsed_counter() - refine_start);
});
if (VerifyDuringGC) {
for (CardValue* i = start_card; i < end_card; ++i) {
guarantee(*i == G1CardTable::clean_card_val(), "must be");
}
}
_refine_stats.inc_cards_scanned(claim.size());
_refine_stats.inc_cards_clean(claim.size() - num_dirty_cards);
if (SuspendibleThreadSet::should_yield()) {
_completed = false;
break;
}
}
return !_completed;
}
};
G1ConcurrentRefineSweepTask::G1ConcurrentRefineSweepTask(G1CardTableClaimTable* scan_state,
G1ConcurrentRefineStats* stats,
uint max_workers) :
WorkerTask("G1 Refine Task"),
_scan_state(scan_state),
_stats(stats),
_max_workers(max_workers),
_sweep_completed(true)
{ }
void G1ConcurrentRefineSweepTask::work(uint worker_id) {
jlong start = os::elapsed_counter();
G1RefineRegionClosure sweep_cl(worker_id, _scan_state);
_scan_state->heap_region_iterate_from_worker_offset(&sweep_cl, worker_id, _max_workers);
if (!sweep_cl._completed) {
_sweep_completed = false;
}
sweep_cl._refine_stats.inc_sweep_time(os::elapsed_counter() - start);
_stats->add_atomic(&sweep_cl._refine_stats);
}
bool G1ConcurrentRefineSweepTask::sweep_completed() const { return _sweep_completed; }

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -22,20 +22,27 @@
*
*/
#ifndef SHARE_GC_SHARED_BUFFERNODELIST_HPP
#define SHARE_GC_SHARED_BUFFERNODELIST_HPP
#ifndef SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP
#define SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP
#include "utilities/globalDefinitions.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"
#include "gc/shared/workerThread.hpp"
class BufferNode;
class G1CardTableClaimTable;
struct BufferNodeList {
BufferNode* _head; // First node in list or null if empty.
BufferNode* _tail; // Last node in list or null if empty.
size_t _entry_count; // Sum of entries in nodes in list.
class G1ConcurrentRefineSweepTask : public WorkerTask {
G1CardTableClaimTable* _scan_state;
G1ConcurrentRefineStats* _stats;
uint _max_workers;
bool _sweep_completed;
BufferNodeList();
BufferNodeList(BufferNode* head, BufferNode* tail, size_t entry_count);
public:
G1ConcurrentRefineSweepTask(G1CardTableClaimTable* scan_state, G1ConcurrentRefineStats* stats, uint max_workers);
void work(uint worker_id) override;
bool sweep_completed() const;
};
#endif // SHARE_GC_SHARED_BUFFERNODELIST_HPP
#endif /* SHARE_GC_G1_G1CONCURRENTREFINESWEEPTASK_HPP */

View File

@ -23,10 +23,13 @@
*/
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1CardTableClaimTable.inline.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"
#include "gc/g1/g1ConcurrentRefineSweepTask.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
#include "logging/log.hpp"
#include "runtime/cpuTimeCounters.hpp"
@ -38,60 +41,61 @@
#include "utilities/globalDefinitions.hpp"
#include "utilities/ticks.hpp"
G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr) :
ConcurrentGCThread(),
_notifier(Mutex::nosafepoint, FormatBuffer<>("G1 Refine#%d", worker_id), true),
_notifier(Mutex::nosafepoint, "G1 Refine Control", true),
_requested_active(false),
_refinement_stats(),
_worker_id(worker_id),
_cr(cr)
{
// set name
set_name("G1 Refine#%d", worker_id);
set_name("G1 Refine Control");
}
void G1ConcurrentRefineThread::run_service() {
while (wait_for_completed_buffers()) {
while (wait_for_work()) {
SuspendibleThreadSetJoiner sts_join;
G1ConcurrentRefineStats active_stats_start = _refinement_stats;
report_active("Activated");
while (!should_terminate()) {
if (sts_join.should_yield()) {
report_inactive("Paused", _refinement_stats - active_stats_start);
report_inactive("Paused");
sts_join.yield();
// Reset after yield rather than accumulating across yields, else a
// very long running thread could overflow.
active_stats_start = _refinement_stats;
report_active("Resumed");
} else if (maybe_deactivate()) {
break;
}
// Look if we want to do refinement. If we don't then don't do any refinement
// this. This thread may have just woken up but no threads are currently
// needed, which is common. In this case we want to just go back to
// waiting, with a minimum of fuss; in particular, don't do any "premature"
// refinement. However, adjustment may be pending but temporarily
// blocked. In that case we wait for adjustment to succeed.
Ticks adjust_start = Ticks::now();
if (cr()->adjust_num_threads_periodically()) {
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Cycle");
do_refinement();
} else {
do_refinement_step();
log_debug(gc, refine)("Concurrent Refine Adjust Only (#threads wanted: %u adjustment_needed: %s wait_for_heap_lock: %s) %.2fms",
cr()->num_threads_wanted(),
BOOL_TO_STR(cr()->is_thread_adjustment_needed()),
BOOL_TO_STR(cr()->heap_was_locked()),
(Ticks::now() - adjust_start).seconds() * MILLIUNITS);
deactivate();
break;
}
}
report_inactive("Deactivated", _refinement_stats - active_stats_start);
report_inactive("Deactivated");
update_perf_counter_cpu_time();
}
log_debug(gc, refine)("Stopping %d", _worker_id);
log_debug(gc, refine)("Stopping %s", name());
}
void G1ConcurrentRefineThread::report_active(const char* reason) const {
log_trace(gc, refine)("%s worker %u, current: %zu",
reason,
_worker_id,
G1BarrierSet::dirty_card_queue_set().num_cards());
log_trace(gc, refine)("%s active (%s)", name(), reason);
}
void G1ConcurrentRefineThread::report_inactive(const char* reason,
const G1ConcurrentRefineStats& stats) const {
log_trace(gc, refine)
("%s worker %u, cards: %zu, refined %zu, rate %1.2fc/ms",
reason,
_worker_id,
G1BarrierSet::dirty_card_queue_set().num_cards(),
stats.refined_cards(),
stats.refinement_rate_ms());
void G1ConcurrentRefineThread::report_inactive(const char* reason) const {
log_trace(gc, refine)("%s inactive (%s)", name(), reason);
}
void G1ConcurrentRefineThread::activate() {
@ -103,21 +107,12 @@ void G1ConcurrentRefineThread::activate() {
}
}
bool G1ConcurrentRefineThread::maybe_deactivate() {
bool G1ConcurrentRefineThread::deactivate() {
assert(this == Thread::current(), "precondition");
if (cr()->is_thread_wanted(_worker_id)) {
return false;
} else {
MutexLocker ml(&_notifier, Mutex::_no_safepoint_check_flag);
bool requested = _requested_active;
_requested_active = false;
return !requested; // Deactivate only if not recently requested active.
}
}
bool G1ConcurrentRefineThread::try_refinement_step(size_t stop_at) {
assert(this == Thread::current(), "precondition");
return _cr->try_refinement_step(_worker_id, stop_at, &_refinement_stats);
MutexLocker ml(&_notifier, Mutex::_no_safepoint_check_flag);
bool requested = _requested_active;
_requested_active = false;
return !requested; // Deactivate only if not recently requested active.
}
void G1ConcurrentRefineThread::stop_service() {
@ -128,23 +123,9 @@ jlong G1ConcurrentRefineThread::cpu_time() {
return os::thread_cpu_time(this);
}
// The (single) primary thread drives the controller for the refinement threads.
class G1PrimaryConcurrentRefineThread final : public G1ConcurrentRefineThread {
bool wait_for_completed_buffers() override;
bool maybe_deactivate() override;
void do_refinement_step() override;
// Updates jstat cpu usage for all refinement threads.
void update_perf_counter_cpu_time() override;
public:
G1PrimaryConcurrentRefineThread(G1ConcurrentRefine* cr) :
G1ConcurrentRefineThread(cr, 0)
{}
};
// When inactive, the primary thread periodically wakes up and requests
// adjustment of the number of active refinement threads.
bool G1PrimaryConcurrentRefineThread::wait_for_completed_buffers() {
// When inactive, the control thread periodically wakes up to check if there is
// refinement work pending.
bool G1ConcurrentRefineThread::wait_for_work() {
assert(this == Thread::current(), "precondition");
MonitorLocker ml(notifier(), Mutex::_no_safepoint_check_flag);
if (!requested_active() && !should_terminate()) {
@ -157,78 +138,115 @@ bool G1PrimaryConcurrentRefineThread::wait_for_completed_buffers() {
return !should_terminate();
}
bool G1PrimaryConcurrentRefineThread::maybe_deactivate() {
// Don't deactivate while needing to adjust the number of active threads.
return !cr()->is_thread_adjustment_needed() &&
G1ConcurrentRefineThread::maybe_deactivate();
void G1ConcurrentRefineThread::do_refinement() {
G1ConcurrentRefineSweepState& state = _cr->sweep_state();
state.start_work();
// Swap card tables.
// 1. Global card table
if (!state.swap_global_card_table()) {
log_debug(gc, refine)("GC pause after Global Card Table Swap");
return;
}
// 2. Java threads
if (!state.swap_java_threads_ct()) {
log_debug(gc, refine)("GC pause after Java Thread CT swap");
return;
}
// 3. GC threads
if (!state.swap_gc_threads_ct()) {
log_debug(gc, refine)("GC pause after GC Thread CT swap");
return;
}
G1CollectedHeap* g1h = G1CollectedHeap::heap();
jlong epoch_yield_duration = g1h->yield_duration_in_refinement_epoch();
jlong next_epoch_start = os::elapsed_counter();
jlong total_yield_during_sweep_duration = 0;
// 4. Snapshot heap.
state.snapshot_heap();
// 5. Sweep refinement table until done
bool interrupted_by_gc = false;
log_info(gc, task)("Concurrent Refine Sweep Using %u of %u Workers", _cr->num_threads_wanted(), _cr->max_num_threads());
state.sweep_refinement_table_start();
while (true) {
bool completed = state.sweep_refinement_table_step();
if (completed) {
break;
}
if (SuspendibleThreadSet::should_yield()) {
jlong yield_during_sweep_start = os::elapsed_counter();
SuspendibleThreadSet::yield();
// The yielding may have completed the task, check.
if (!state.is_in_progress()) {
log_debug(gc, refine)("GC completed sweeping, aborting concurrent operation");
interrupted_by_gc = true;
break;
} else {
jlong yield_during_sweep_duration = os::elapsed_counter() - yield_during_sweep_start;
log_debug(gc, refine)("Yielded from card table sweeping for %.2fms, no GC inbetween, continue",
TimeHelper::counter_to_millis(yield_during_sweep_duration));
total_yield_during_sweep_duration += yield_during_sweep_duration;
}
}
}
if (!interrupted_by_gc) {
GCTraceTime(Info, gc, refine) tm("Concurrent Refine Complete Work");
state.add_yield_during_sweep_duration(total_yield_during_sweep_duration);
state.complete_work(true);
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1Policy* policy = g1h->policy();
G1ConcurrentRefineStats* stats = state.stats();
policy->record_refinement_stats(stats);
{
// The young gen revising mechanism reads the predictor and the values set
// here. Avoid inconsistencies by locking.
MutexLocker x(G1ReviseYoungLength_lock, Mutex::_no_safepoint_check_flag);
policy->record_dirtying_stats(TimeHelper::counter_to_millis(G1CollectedHeap::heap()->last_refinement_epoch_start()),
TimeHelper::counter_to_millis(next_epoch_start),
stats->cards_pending(),
TimeHelper::counter_to_millis(epoch_yield_duration),
0 /* pending_cards_from_gc */,
stats->cards_to_cset());
G1CollectedHeap::heap()->set_last_refinement_epoch_start(next_epoch_start, epoch_yield_duration);
}
stats->reset();
}
}
void G1PrimaryConcurrentRefineThread::do_refinement_step() {
// Try adjustment first. If it succeeds then don't do any refinement this
// round. This thread may have just woken up but no threads are currently
// needed, which is common. In this case we want to just go back to
// waiting, with a minimum of fuss; in particular, don't do any "premature"
// refinement. However, adjustment may be pending but temporarily
// blocked. In that case we *do* try refinement, rather than possibly
// uselessly spinning while waiting for adjustment to succeed.
if (!cr()->adjust_threads_periodically()) {
// No adjustment, so try refinement, with the target as a cuttoff.
if (!try_refinement_step(cr()->pending_cards_target())) {
// Refinement was cut off, so proceed with fewer threads.
cr()->reduce_threads_wanted();
void G1ConcurrentRefineThread::update_perf_counter_cpu_time() {
// The control thread is responsible for updating the CPU time for all workers.
if (UsePerfData) {
{
ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine);
cr()->worker_threads_do(&tttc);
}
{
ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine_control);
cr()->control_thread_do(&tttc);
}
}
}
void G1PrimaryConcurrentRefineThread::update_perf_counter_cpu_time() {
if (UsePerfData) {
ThreadTotalCPUTimeClosure tttc(CPUTimeGroups::CPUTimeType::gc_conc_refine);
cr()->threads_do(&tttc);
}
}
class G1SecondaryConcurrentRefineThread final : public G1ConcurrentRefineThread {
bool wait_for_completed_buffers() override;
void do_refinement_step() override;
void update_perf_counter_cpu_time() override { /* Nothing to do. The primary thread does all the work. */ }
public:
G1SecondaryConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
G1ConcurrentRefineThread(cr, worker_id)
{
assert(worker_id > 0, "precondition");
}
};
bool G1SecondaryConcurrentRefineThread::wait_for_completed_buffers() {
assert(this == Thread::current(), "precondition");
MonitorLocker ml(notifier(), Mutex::_no_safepoint_check_flag);
while (!requested_active() && !should_terminate()) {
ml.wait();
}
return !should_terminate();
}
void G1SecondaryConcurrentRefineThread::do_refinement_step() {
assert(this == Thread::current(), "precondition");
// Secondary threads ignore the target and just drive the number of pending
// dirty cards down. The primary thread is responsible for noticing the
// target has been reached and reducing the number of wanted threads. This
// makes the control of wanted threads all under the primary, while avoiding
// useless spinning by secondary threads until the primary thread notices.
// (Useless spinning is still possible if there are no pending cards, but
// that should rarely happen.)
try_refinement_step(0);
}
G1ConcurrentRefineThread*
G1ConcurrentRefineThread::create(G1ConcurrentRefine* cr, uint worker_id) {
G1ConcurrentRefineThread* crt;
if (worker_id == 0) {
crt = new (std::nothrow) G1PrimaryConcurrentRefineThread(cr);
} else {
crt = new (std::nothrow) G1SecondaryConcurrentRefineThread(cr, worker_id);
}
G1ConcurrentRefineThread* G1ConcurrentRefineThread::create(G1ConcurrentRefine* cr) {
G1ConcurrentRefineThread* crt = new (std::nothrow) G1ConcurrentRefineThread(cr);
if (crt != nullptr) {
crt->create_and_start();
}

View File

@ -33,8 +33,8 @@
// Forward Decl.
class G1ConcurrentRefine;
// One or more G1 Concurrent Refinement Threads may be active if concurrent
// refinement is in progress.
// Concurrent refinement control thread watching card mark accrual on the card table
// and starting refinement work.
class G1ConcurrentRefineThread: public ConcurrentGCThread {
friend class VMStructs;
friend class G1CollectedHeap;
@ -42,43 +42,34 @@ class G1ConcurrentRefineThread: public ConcurrentGCThread {
Monitor _notifier;
bool _requested_active;
G1ConcurrentRefineStats _refinement_stats;
uint _worker_id;
G1ConcurrentRefine* _cr;
NONCOPYABLE(G1ConcurrentRefineThread);
protected:
G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id);
G1ConcurrentRefineThread(G1ConcurrentRefine* cr);
Monitor* notifier() { return &_notifier; }
bool requested_active() const { return _requested_active; }
// Returns !should_terminate().
// precondition: this is the current thread.
virtual bool wait_for_completed_buffers() = 0;
bool wait_for_work();
// Deactivate if appropriate. Returns true if deactivated.
// precondition: this is the current thread.
virtual bool maybe_deactivate();
bool deactivate();
// Attempt to do some refinement work.
// precondition: this is the current thread.
virtual void do_refinement_step() = 0;
// Swap card table and do a complete re-examination/refinement pass over the
// refinement table.
void do_refinement();
// Update concurrent refine threads cpu time stats.
virtual void update_perf_counter_cpu_time() = 0;
// Helper for do_refinement_step implementations. Try to perform some
// refinement work, limited by stop_at. Returns true if any refinement work
// was performed, false if no work available per stop_at.
// precondition: this is the current thread.
bool try_refinement_step(size_t stop_at);
void update_perf_counter_cpu_time();
void report_active(const char* reason) const;
void report_inactive(const char* reason, const G1ConcurrentRefineStats& stats) const;
void report_inactive(const char* reason) const;
G1ConcurrentRefine* cr() const { return _cr; }
@ -86,23 +77,12 @@ protected:
void stop_service() override;
public:
static G1ConcurrentRefineThread* create(G1ConcurrentRefine* cr, uint worker_id);
virtual ~G1ConcurrentRefineThread() = default;
uint worker_id() const { return _worker_id; }
static G1ConcurrentRefineThread* create(G1ConcurrentRefine* cr);
// Activate this thread.
// precondition: this is not the current thread.
void activate();
G1ConcurrentRefineStats* refinement_stats() {
return &_refinement_stats;
}
const G1ConcurrentRefineStats* refinement_stats() const {
return &_refinement_stats;
}
// Total cpu time spent in this thread so far.
jlong cpu_time();
};

View File

@ -45,48 +45,22 @@ G1ConcurrentRefineThreadsNeeded::G1ConcurrentRefineThreadsNeeded(G1Policy* polic
//
// 1. Minimize the number of refinement threads running at once.
//
// 2. Minimize the number of activations and deactivations for the
// refinement threads that run.
//
// 3. Delay performing refinement work. Having more dirty cards waiting to
// 2. Delay performing refinement work. Having more dirty cards waiting to
// be refined can be beneficial, as further writes to the same card don't
// create more work.
void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
size_t available_bytes,
size_t num_cards,
size_t target_num_cards) {
_predicted_time_until_next_gc_ms = _policy->predict_time_to_next_gc_ms(available_bytes);
// Estimate number of cards that need to be processed before next GC.
const G1Analytics* analytics = _policy->analytics();
// Estimate time until next GC, based on remaining bytes available for
// allocation and the allocation rate.
double alloc_region_rate = analytics->predict_alloc_rate_ms();
double alloc_bytes_rate = alloc_region_rate * G1HeapRegion::GrainBytes;
if (alloc_bytes_rate == 0.0) {
// A zero rate indicates we don't yet have data to use for predictions.
// Since we don't have any idea how long until the next GC, use a time of
// zero.
_predicted_time_until_next_gc_ms = 0.0;
} else {
// If the heap size is large and the allocation rate is small, we can get
// a predicted time until next GC that is so large it can cause problems
// (such as overflow) in other calculations. Limit the prediction to one
// hour, which is still large in this context.
const double one_hour_ms = 60.0 * 60.0 * MILLIUNITS;
double raw_time_ms = available_bytes / alloc_bytes_rate;
_predicted_time_until_next_gc_ms = MIN2(raw_time_ms, one_hour_ms);
}
double incoming_rate = analytics->predict_dirtied_cards_rate_ms();
double raw_cards = incoming_rate * _predicted_time_until_next_gc_ms;
size_t incoming_cards = static_cast<size_t>(raw_cards);
// Estimate number of cards that need to be processed before next GC. There
// are no incoming cards when time is short, because in that case the
// controller activates refinement by mutator threads to stay on target even
// if threads deactivate in the meantime. This also covers the case of not
// having a real prediction of time until GC.
size_t incoming_cards = 0;
if (_predicted_time_until_next_gc_ms > _update_period_ms) {
double incoming_rate = analytics->predict_dirtied_cards_rate_ms();
double raw_cards = incoming_rate * _predicted_time_until_next_gc_ms;
incoming_cards = static_cast<size_t>(raw_cards);
}
size_t total_cards = num_cards + incoming_cards;
_predicted_cards_at_next_gc = total_cards;
@ -100,9 +74,8 @@ void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
// The calculation of the number of threads needed isn't very stable when
// time is short, and can lead to starting up lots of threads for not much
// profit. If we're in the last update period, don't change the number of
// threads running, other than to treat the current thread as running. That
// might not be sufficient, but hopefully we were already reasonably close.
// We won't accumulate more because mutator refinement will be activated.
// threads needed. That might not be sufficient, but hopefully we were
// already reasonably close.
if (_predicted_time_until_next_gc_ms <= _update_period_ms) {
_threads_needed = MAX2(active_threads, 1u);
return;
@ -133,11 +106,12 @@ void G1ConcurrentRefineThreadsNeeded::update(uint active_threads,
// close to the next GC we want to drive toward the target, so round up
// then. The rest of the time we round to nearest, trying to remain near
// the middle of the range.
double rthreads = nthreads;
if (_predicted_time_until_next_gc_ms <= _update_period_ms * 5.0) {
nthreads = ::ceil(nthreads);
rthreads = ::ceil(nthreads);
} else {
nthreads = ::round(nthreads);
rthreads = ::round(nthreads);
}
_threads_needed = static_cast<uint>(MIN2<size_t>(nthreads, UINT_MAX));
_threads_needed = static_cast<uint>(MIN2<size_t>(rthreads, UINT_MAX));
}

View File

@ -1,599 +0,0 @@
/*
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "gc/g1/g1BarrierSet.inline.hpp"
#include "gc/g1/g1CardTableEntryClosure.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1FreeIdSet.hpp"
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/g1/g1RemSet.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/shared/bufferNode.hpp"
#include "gc/shared/bufferNodeList.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
#include "memory/iterator.hpp"
#include "runtime/atomicAccess.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/mutex.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/os.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/threads.hpp"
#include "runtime/threadSMR.hpp"
#include "utilities/globalCounter.inline.hpp"
#include "utilities/macros.hpp"
#include "utilities/nonblockingQueue.inline.hpp"
#include "utilities/pair.hpp"
#include "utilities/quickSort.hpp"
#include "utilities/ticks.hpp"
G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
PtrQueue(qset),
_refinement_stats(new G1ConcurrentRefineStats())
{ }
G1DirtyCardQueue::~G1DirtyCardQueue() {
delete _refinement_stats;
}
// Assumed to be zero by concurrent threads.
static uint par_ids_start() { return 0; }
G1DirtyCardQueueSet::G1DirtyCardQueueSet(BufferNode::Allocator* allocator) :
PtrQueueSet(allocator),
_num_cards(0),
_mutator_refinement_threshold(SIZE_MAX),
_completed(),
_paused(),
_free_ids(par_ids_start(), num_par_ids()),
_detached_refinement_stats()
{}
G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
abandon_completed_buffers();
}
// Determines how many mutator threads can process the buffers in parallel.
uint G1DirtyCardQueueSet::num_par_ids() {
return (uint)os::initial_active_processor_count();
}
void G1DirtyCardQueueSet::flush_queue(G1DirtyCardQueue& queue) {
if (queue.buffer() != nullptr) {
G1ConcurrentRefineStats* stats = queue.refinement_stats();
stats->inc_dirtied_cards(queue.size());
}
PtrQueueSet::flush_queue(queue);
}
void G1DirtyCardQueueSet::enqueue(G1DirtyCardQueue& queue,
volatile CardValue* card_ptr) {
CardValue* value = const_cast<CardValue*>(card_ptr);
if (!try_enqueue(queue, value)) {
handle_zero_index(queue);
retry_enqueue(queue, value);
}
}
void G1DirtyCardQueueSet::handle_zero_index(G1DirtyCardQueue& queue) {
assert(queue.index() == 0, "precondition");
BufferNode* old_node = exchange_buffer_with_new(queue);
if (old_node != nullptr) {
assert(old_node->index() == 0, "invariant");
G1ConcurrentRefineStats* stats = queue.refinement_stats();
stats->inc_dirtied_cards(old_node->capacity());
handle_completed_buffer(old_node, stats);
}
}
void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
G1BarrierSet::dirty_card_queue_set().handle_zero_index(queue);
}
size_t G1DirtyCardQueueSet::num_cards() const {
return AtomicAccess::load(&_num_cards);
}
void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
assert(cbn != nullptr, "precondition");
// Increment _num_cards before adding to queue, so queue removal doesn't
// need to deal with _num_cards possibly going negative.
AtomicAccess::add(&_num_cards, cbn->size());
// Perform push in CS. The old tail may be popped while the push is
// observing it (attaching it to the new buffer). We need to ensure it
// can't be reused until the push completes, to avoid ABA problems.
GlobalCounter::CriticalSection cs(Thread::current());
_completed.push(*cbn);
}
// Thread-safe attempt to remove and return the first buffer from
// the _completed queue, using the NonblockingQueue::try_pop() underneath.
// It has a limitation that it may return null when there are objects
// in the queue if there is a concurrent push/append operation.
BufferNode* G1DirtyCardQueueSet::dequeue_completed_buffer() {
Thread* current_thread = Thread::current();
BufferNode* result = nullptr;
while (true) {
// Use GlobalCounter critical section to avoid ABA problem.
// The release of a buffer to its allocator's free list uses
// GlobalCounter::write_synchronize() to coordinate with this
// dequeuing operation.
// We use a CS per iteration, rather than over the whole loop,
// because we're not guaranteed to make progress. Lingering in
// one CS could defer releasing buffer to the free list for reuse,
// leading to excessive allocations.
GlobalCounter::CriticalSection cs(current_thread);
if (_completed.try_pop(&result)) return result;
}
}
BufferNode* G1DirtyCardQueueSet::get_completed_buffer() {
BufferNode* result = dequeue_completed_buffer();
if (result == nullptr) { // Unlikely if no paused buffers.
enqueue_previous_paused_buffers();
result = dequeue_completed_buffer();
if (result == nullptr) return nullptr;
}
AtomicAccess::sub(&_num_cards, result->size());
return result;
}
#ifdef ASSERT
void G1DirtyCardQueueSet::verify_num_cards() const {
size_t actual = 0;
for (BufferNode* cur = _completed.first();
!_completed.is_end(cur);
cur = cur->next()) {
actual += cur->size();
}
assert(actual == AtomicAccess::load(&_num_cards),
"Num entries in completed buffers should be %zu but are %zu",
AtomicAccess::load(&_num_cards), actual);
}
#endif // ASSERT
G1DirtyCardQueueSet::PausedBuffers::PausedList::PausedList() :
_head(nullptr), _tail(nullptr),
_safepoint_id(SafepointSynchronize::safepoint_id())
{}
#ifdef ASSERT
G1DirtyCardQueueSet::PausedBuffers::PausedList::~PausedList() {
assert(AtomicAccess::load(&_head) == nullptr, "precondition");
assert(_tail == nullptr, "precondition");
}
#endif // ASSERT
bool G1DirtyCardQueueSet::PausedBuffers::PausedList::is_next() const {
assert_not_at_safepoint();
return _safepoint_id == SafepointSynchronize::safepoint_id();
}
void G1DirtyCardQueueSet::PausedBuffers::PausedList::add(BufferNode* node) {
assert_not_at_safepoint();
assert(is_next(), "precondition");
BufferNode* old_head = AtomicAccess::xchg(&_head, node);
if (old_head == nullptr) {
assert(_tail == nullptr, "invariant");
_tail = node;
} else {
node->set_next(old_head);
}
}
G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::PausedList::take() {
BufferNode* head = AtomicAccess::load(&_head);
BufferNode* tail = _tail;
AtomicAccess::store(&_head, (BufferNode*)nullptr);
_tail = nullptr;
return HeadTail(head, tail);
}
G1DirtyCardQueueSet::PausedBuffers::PausedBuffers() : _plist(nullptr) {}
#ifdef ASSERT
G1DirtyCardQueueSet::PausedBuffers::~PausedBuffers() {
assert(AtomicAccess::load(&_plist) == nullptr, "invariant");
}
#endif // ASSERT
void G1DirtyCardQueueSet::PausedBuffers::add(BufferNode* node) {
assert_not_at_safepoint();
PausedList* plist = AtomicAccess::load_acquire(&_plist);
if (plist == nullptr) {
// Try to install a new next list.
plist = new PausedList();
PausedList* old_plist = AtomicAccess::cmpxchg(&_plist, (PausedList*)nullptr, plist);
if (old_plist != nullptr) {
// Some other thread installed a new next list. Use it instead.
delete plist;
plist = old_plist;
}
}
assert(plist->is_next(), "invariant");
plist->add(node);
}
G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_previous() {
assert_not_at_safepoint();
PausedList* previous;
{
// Deal with plist in a critical section, to prevent it from being
// deleted out from under us by a concurrent take_previous().
GlobalCounter::CriticalSection cs(Thread::current());
previous = AtomicAccess::load_acquire(&_plist);
if ((previous == nullptr) || // Nothing to take.
previous->is_next() || // Not from a previous safepoint.
// Some other thread stole it.
(AtomicAccess::cmpxchg(&_plist, previous, (PausedList*)nullptr) != previous)) {
return HeadTail();
}
}
// We now own previous.
HeadTail result = previous->take();
// There might be other threads examining previous (in concurrent
// take_previous()). Synchronize to wait until any such threads are
// done with such examination before deleting.
GlobalCounter::write_synchronize();
delete previous;
return result;
}
G1DirtyCardQueueSet::HeadTail G1DirtyCardQueueSet::PausedBuffers::take_all() {
assert_at_safepoint();
HeadTail result;
PausedList* plist = AtomicAccess::load(&_plist);
if (plist != nullptr) {
AtomicAccess::store(&_plist, (PausedList*)nullptr);
result = plist->take();
delete plist;
}
return result;
}
void G1DirtyCardQueueSet::record_paused_buffer(BufferNode* node) {
assert_not_at_safepoint();
assert(node->next() == nullptr, "precondition");
// Ensure there aren't any paused buffers from a previous safepoint.
enqueue_previous_paused_buffers();
// Cards for paused buffers are included in count, to contribute to
// notification checking after the coming safepoint if it doesn't GC.
// Note that this means the queue's _num_cards differs from the number
// of cards in the queued buffers when there are paused buffers.
AtomicAccess::add(&_num_cards, node->size());
_paused.add(node);
}
void G1DirtyCardQueueSet::enqueue_paused_buffers_aux(const HeadTail& paused) {
if (paused._head != nullptr) {
assert(paused._tail != nullptr, "invariant");
// Cards from paused buffers are already recorded in the queue count.
_completed.append(*paused._head, *paused._tail);
}
}
void G1DirtyCardQueueSet::enqueue_previous_paused_buffers() {
assert_not_at_safepoint();
enqueue_paused_buffers_aux(_paused.take_previous());
}
void G1DirtyCardQueueSet::enqueue_all_paused_buffers() {
assert_at_safepoint();
enqueue_paused_buffers_aux(_paused.take_all());
}
void G1DirtyCardQueueSet::abandon_completed_buffers() {
BufferNodeList list = take_all_completed_buffers();
BufferNode* buffers_to_delete = list._head;
while (buffers_to_delete != nullptr) {
BufferNode* bn = buffers_to_delete;
buffers_to_delete = bn->next();
bn->set_next(nullptr);
deallocate_buffer(bn);
}
}
// Merge lists of buffers. The source queue set is emptied as a
// result. The queue sets must share the same allocator.
void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
assert(allocator() == src->allocator(), "precondition");
const BufferNodeList from = src->take_all_completed_buffers();
if (from._head != nullptr) {
AtomicAccess::add(&_num_cards, from._entry_count);
_completed.append(*from._head, *from._tail);
}
}
BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
enqueue_all_paused_buffers();
verify_num_cards();
Pair<BufferNode*, BufferNode*> pair = _completed.take_all();
size_t num_cards = AtomicAccess::load(&_num_cards);
AtomicAccess::store(&_num_cards, size_t(0));
return BufferNodeList(pair.first, pair.second, num_cards);
}
class G1RefineBufferedCards : public StackObj {
BufferNode* const _node;
CardTable::CardValue** const _node_buffer;
const size_t _node_buffer_capacity;
const uint _worker_id;
G1ConcurrentRefineStats* _stats;
G1RemSet* const _g1rs;
static inline ptrdiff_t compare_cards(const CardTable::CardValue* p1,
const CardTable::CardValue* p2) {
return p2 - p1;
}
// Sorts the cards from start_index to _node_buffer_capacity in *decreasing*
// address order. Tests showed that this order is preferable to not sorting
// or increasing address order.
void sort_cards(size_t start_index) {
QuickSort::sort(&_node_buffer[start_index],
_node_buffer_capacity - start_index,
compare_cards);
}
// Returns the index to the first clean card in the buffer.
size_t clean_cards() {
const size_t start = _node->index();
assert(start <= _node_buffer_capacity, "invariant");
// Two-fingered compaction algorithm similar to the filtering mechanism in
// SATBMarkQueue. The main difference is that clean_card_before_refine()
// could change the buffer element in-place.
// We don't check for SuspendibleThreadSet::should_yield(), because
// cleaning and redirtying the cards is fast.
CardTable::CardValue** src = &_node_buffer[start];
CardTable::CardValue** dst = &_node_buffer[_node_buffer_capacity];
assert(src <= dst, "invariant");
for ( ; src < dst; ++src) {
// Search low to high for a card to keep.
if (_g1rs->clean_card_before_refine(src)) {
// Found keeper. Search high to low for a card to discard.
while (src < --dst) {
if (!_g1rs->clean_card_before_refine(dst)) {
*dst = *src; // Replace discard with keeper.
break;
}
}
// If discard search failed (src == dst), the outer loop will also end.
}
}
// dst points to the first retained clean card, or the end of the buffer
// if all the cards were discarded.
const size_t first_clean = dst - _node_buffer;
assert(first_clean >= start && first_clean <= _node_buffer_capacity, "invariant");
// Discarded cards are considered as refined.
_stats->inc_refined_cards(first_clean - start);
_stats->inc_precleaned_cards(first_clean - start);
return first_clean;
}
bool refine_cleaned_cards(size_t start_index) {
bool result = true;
size_t i = start_index;
for ( ; i < _node_buffer_capacity; ++i) {
if (SuspendibleThreadSet::should_yield()) {
redirty_unrefined_cards(i);
result = false;
break;
}
_g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
}
_node->set_index(i);
_stats->inc_refined_cards(i - start_index);
return result;
}
void redirty_unrefined_cards(size_t start) {
for ( ; start < _node_buffer_capacity; ++start) {
*_node_buffer[start] = G1CardTable::dirty_card_val();
}
}
public:
G1RefineBufferedCards(BufferNode* node,
uint worker_id,
G1ConcurrentRefineStats* stats) :
_node(node),
_node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
_node_buffer_capacity(node->capacity()),
_worker_id(worker_id),
_stats(stats),
_g1rs(G1CollectedHeap::heap()->rem_set()) {}
bool refine() {
size_t first_clean_index = clean_cards();
if (first_clean_index == _node_buffer_capacity) {
_node->set_index(first_clean_index);
return true;
}
// This fence serves two purposes. First, the cards must be cleaned
// before processing the contents. Second, we can't proceed with
// processing a region until after the read of the region's top in
// collect_and_clean_cards(), for synchronization with possibly concurrent
// humongous object allocation (see comment at the StoreStore fence before
// setting the regions' tops in humongous allocation path).
// It's okay that reading region's top and reading region's type were racy
// wrto each other. We need both set, in any order, to proceed.
OrderAccess::fence();
sort_cards(first_clean_index);
return refine_cleaned_cards(first_clean_index);
}
};
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
uint worker_id,
G1ConcurrentRefineStats* stats) {
Ticks start_time = Ticks::now();
G1RefineBufferedCards buffered_cards(node, worker_id, stats);
bool result = buffered_cards.refine();
stats->inc_refinement_time(Ticks::now() - start_time);
return result;
}
void G1DirtyCardQueueSet::handle_refined_buffer(BufferNode* node,
bool fully_processed) {
if (fully_processed) {
assert(node->is_empty(), "Buffer not fully consumed: index: %zu, size: %zu",
node->index(), node->capacity());
deallocate_buffer(node);
} else {
assert(!node->is_empty(), "Buffer fully consumed.");
// Buffer incompletely processed because there is a pending safepoint.
// Record partially processed buffer, to be finished later.
record_paused_buffer(node);
}
}
void G1DirtyCardQueueSet::handle_completed_buffer(BufferNode* new_node,
G1ConcurrentRefineStats* stats) {
enqueue_completed_buffer(new_node);
// No need for mutator refinement if number of cards is below limit.
if (AtomicAccess::load(&_num_cards) <= AtomicAccess::load(&_mutator_refinement_threshold)) {
return;
}
// Don't try to process a buffer that will just get immediately paused.
// When going into a safepoint it's just a waste of effort.
// When coming out of a safepoint, Java threads may be running before the
// yield request (for non-Java threads) has been cleared.
if (SuspendibleThreadSet::should_yield()) {
return;
}
// Only Java threads perform mutator refinement.
if (!Thread::current()->is_Java_thread()) {
return;
}
BufferNode* node = get_completed_buffer();
if (node == nullptr) return; // Didn't get a buffer to process.
// Refine cards in buffer.
uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
bool fully_processed = refine_buffer(node, worker_id, stats);
_free_ids.release_par_id(worker_id); // release the id
// Deal with buffer after releasing id, to let another thread use id.
handle_refined_buffer(node, fully_processed);
}
bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
size_t stop_at,
G1ConcurrentRefineStats* stats) {
// Not enough cards to trigger processing.
if (AtomicAccess::load(&_num_cards) <= stop_at) return false;
BufferNode* node = get_completed_buffer();
if (node == nullptr) return false; // Didn't get a buffer to process.
bool fully_processed = refine_buffer(node, worker_id, stats);
handle_refined_buffer(node, fully_processed);
return true;
}
void G1DirtyCardQueueSet::abandon_logs_and_stats() {
assert_at_safepoint();
// Disable mutator refinement until concurrent refinement decides otherwise.
set_mutator_refinement_threshold(SIZE_MAX);
// Iterate over all the threads, resetting per-thread queues and stats.
struct AbandonThreadLogClosure : public ThreadClosure {
G1DirtyCardQueueSet& _qset;
AbandonThreadLogClosure(G1DirtyCardQueueSet& qset) : _qset(qset) {}
virtual void do_thread(Thread* t) {
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
_qset.reset_queue(queue);
queue.refinement_stats()->reset();
}
} closure(*this);
Threads::threads_do(&closure);
enqueue_all_paused_buffers();
abandon_completed_buffers();
// Reset stats from detached threads.
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
_detached_refinement_stats.reset();
}
void G1DirtyCardQueueSet::update_refinement_stats(G1ConcurrentRefineStats& stats) {
assert_at_safepoint();
_concatenated_refinement_stats = stats;
enqueue_all_paused_buffers();
verify_num_cards();
// Collect and reset stats from detached threads.
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
_concatenated_refinement_stats += _detached_refinement_stats;
_detached_refinement_stats.reset();
}
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenate_log_and_stats(Thread* thread) {
assert_at_safepoint();
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread);
// Flush the buffer if non-empty. Flush before accumulating and
// resetting stats, since flushing may modify the stats.
if (!queue.is_empty()) {
flush_queue(queue);
}
G1ConcurrentRefineStats result = *queue.refinement_stats();
queue.refinement_stats()->reset();
return result;
}
G1ConcurrentRefineStats G1DirtyCardQueueSet::concatenated_refinement_stats() const {
assert_at_safepoint();
return _concatenated_refinement_stats;
}
void G1DirtyCardQueueSet::record_detached_refinement_stats(G1ConcurrentRefineStats* stats) {
MutexLocker ml(G1DetachedRefinementStats_lock, Mutex::_no_safepoint_check_flag);
_detached_refinement_stats += *stats;
stats->reset();
}
size_t G1DirtyCardQueueSet::mutator_refinement_threshold() const {
return AtomicAccess::load(&_mutator_refinement_threshold);
}
void G1DirtyCardQueueSet::set_mutator_refinement_threshold(size_t value) {
AtomicAccess::store(&_mutator_refinement_threshold, value);
}

View File

@ -1,302 +0,0 @@
/*
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
#define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"
#include "gc/g1/g1FreeIdSet.hpp"
#include "gc/shared/bufferNode.hpp"
#include "gc/shared/bufferNodeList.hpp"
#include "gc/shared/ptrQueue.hpp"
#include "memory/allocation.hpp"
#include "memory/padded.hpp"
#include "utilities/nonblockingQueue.hpp"
class G1PrimaryConcurrentRefineThread;
class G1DirtyCardQueueSet;
class G1RedirtyCardsQueueSet;
class Thread;
// A ptrQueue whose elements are "oops", pointers to object heads.
class G1DirtyCardQueue: public PtrQueue {
G1ConcurrentRefineStats* _refinement_stats;
public:
G1DirtyCardQueue(G1DirtyCardQueueSet* qset);
// Flush before destroying; queue may be used to capture pending work while
// doing something else, with auto-flush on completion.
~G1DirtyCardQueue();
G1ConcurrentRefineStats* refinement_stats() const {
return _refinement_stats;
}
// Compiler support.
static ByteSize byte_offset_of_index() {
return PtrQueue::byte_offset_of_index<G1DirtyCardQueue>();
}
using PtrQueue::byte_width_of_index;
static ByteSize byte_offset_of_buf() {
return PtrQueue::byte_offset_of_buf<G1DirtyCardQueue>();
}
using PtrQueue::byte_width_of_buf;
};
class G1DirtyCardQueueSet: public PtrQueueSet {
// Head and tail of a list of BufferNodes, linked through their next()
// fields. Similar to BufferNodeList, but without the _entry_count.
struct HeadTail {
BufferNode* _head;
BufferNode* _tail;
HeadTail() : _head(nullptr), _tail(nullptr) {}
HeadTail(BufferNode* head, BufferNode* tail) : _head(head), _tail(tail) {}
};
// Concurrent refinement may stop processing in the middle of a buffer if
// there is a pending safepoint, to avoid long delays to safepoint. A
// partially processed buffer needs to be recorded for processing by the
// safepoint if it's a GC safepoint; otherwise it needs to be recorded for
// further concurrent refinement work after the safepoint. But if the
// buffer was obtained from the completed buffer queue then it can't simply
// be added back to the queue, as that would introduce a new source of ABA
// for the queue.
//
// The PausedBuffer object is used to record such buffers for the upcoming
// safepoint, and provides access to the buffers recorded for previous
// safepoints. Before obtaining a buffer from the completed buffers queue,
// we first transfer any buffers from previous safepoints to the queue.
// This is ABA-safe because threads cannot be in the midst of a queue pop
// across a safepoint.
//
// The paused buffers are conceptually an extension of the completed buffers
// queue, and operations which need to deal with all of the queued buffers
// (such as concatenating or abandoning logs) also need to deal with any
// paused buffers. In general, if a safepoint performs a GC then the paused
// buffers will be processed as part of it, and there won't be any paused
// buffers after a GC safepoint.
class PausedBuffers {
class PausedList : public CHeapObj<mtGC> {
BufferNode* volatile _head;
BufferNode* _tail;
size_t _safepoint_id;
NONCOPYABLE(PausedList);
public:
PausedList();
DEBUG_ONLY(~PausedList();)
// Return true if this list was created to hold buffers for the
// next safepoint.
// precondition: not at safepoint.
bool is_next() const;
// Thread-safe add the buffer to the list.
// precondition: not at safepoint.
// precondition: is_next().
void add(BufferNode* node);
// Take all the buffers from the list. Not thread-safe.
HeadTail take();
};
// The most recently created list, which might be for either the next or
// a previous safepoint, or might be null if the next list hasn't been
// created yet. We only need one list because of the requirement that
// threads calling add() must first ensure there are no paused buffers
// from a previous safepoint. There might be many list instances existing
// at the same time though; there can be many threads competing to create
// and install the next list, and meanwhile there can be a thread dealing
// with the previous list.
PausedList* volatile _plist;
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(PausedList*));
NONCOPYABLE(PausedBuffers);
public:
PausedBuffers();
DEBUG_ONLY(~PausedBuffers();)
// Thread-safe add the buffer to paused list for next safepoint.
// precondition: not at safepoint.
// precondition: does not have paused buffers from a previous safepoint.
void add(BufferNode* node);
// Thread-safe take all paused buffers for previous safepoints.
// precondition: not at safepoint.
HeadTail take_previous();
// Take all the paused buffers.
// precondition: at safepoint.
HeadTail take_all();
};
DEFINE_PAD_MINUS_SIZE(0, DEFAULT_PADDING_SIZE, 0);
// Upper bound on the number of cards in the completed and paused buffers.
volatile size_t _num_cards;
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, sizeof(size_t));
// If the queue contains more cards than configured here, the
// mutator must start doing some of the concurrent refinement work.
volatile size_t _mutator_refinement_threshold;
DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(size_t));
// Buffers ready for refinement.
// NonblockingQueue has inner padding of one cache line.
NonblockingQueue<BufferNode, &BufferNode::next_ptr> _completed;
// Add a trailer padding after NonblockingQueue.
DEFINE_PAD_MINUS_SIZE(3, DEFAULT_PADDING_SIZE, sizeof(BufferNode*));
// Buffers for which refinement is temporarily paused.
// PausedBuffers has inner padding, including trailer.
PausedBuffers _paused;
G1FreeIdSet _free_ids;
G1ConcurrentRefineStats _concatenated_refinement_stats;
G1ConcurrentRefineStats _detached_refinement_stats;
// Verify _num_cards == sum of cards in the completed queue.
void verify_num_cards() const NOT_DEBUG_RETURN;
// Thread-safe add a buffer to paused list for next safepoint.
// precondition: not at safepoint.
void record_paused_buffer(BufferNode* node);
void enqueue_paused_buffers_aux(const HeadTail& paused);
// Thread-safe transfer paused buffers for previous safepoints to the queue.
// precondition: not at safepoint.
void enqueue_previous_paused_buffers();
// Transfer all paused buffers to the queue.
// precondition: at safepoint.
void enqueue_all_paused_buffers();
void abandon_completed_buffers();
// Refine the cards in "node" from its index to buffer_capacity.
// Stops processing if SuspendibleThreadSet::should_yield() is true.
// Returns true if the entire buffer was processed, false if there
// is a pending yield request. The node's index is updated to exclude
// the processed elements, e.g. up to the element before processing
// stopped, or one past the last element if the entire buffer was
// processed. Updates stats.
bool refine_buffer(BufferNode* node,
uint worker_id,
G1ConcurrentRefineStats* stats);
// Deal with buffer after a call to refine_buffer. If fully processed,
// deallocate the buffer. Otherwise, record it as paused.
void handle_refined_buffer(BufferNode* node, bool fully_processed);
// Thread-safe attempt to remove and return the first buffer from
// the _completed queue.
// Returns null if the queue is empty, or if a concurrent push/append
// interferes. It uses GlobalCounter critical section to avoid ABA problem.
BufferNode* dequeue_completed_buffer();
// Remove and return a completed buffer from the list, or return null
// if none available.
BufferNode* get_completed_buffer();
// Called when queue is full or has no buffer.
void handle_zero_index(G1DirtyCardQueue& queue);
// Enqueue the buffer, and optionally perform refinement by the mutator.
// Mutator refinement is only done by Java threads, and only if there
// are more than mutator_refinement_threshold cards in the completed buffers.
// Updates stats.
//
// Mutator refinement, if performed, stops processing a buffer if
// SuspendibleThreadSet::should_yield(), recording the incompletely
// processed buffer for later processing of the remainder.
void handle_completed_buffer(BufferNode* node, G1ConcurrentRefineStats* stats);
public:
G1DirtyCardQueueSet(BufferNode::Allocator* allocator);
~G1DirtyCardQueueSet();
// The number of parallel ids that can be claimed to allow collector or
// mutator threads to do card-processing work.
static uint num_par_ids();
static void handle_zero_index_for_thread(Thread* t);
virtual void enqueue_completed_buffer(BufferNode* node);
// Upper bound on the number of cards currently in this queue set.
// Read without synchronization. The value may be high because there
// is a concurrent modification of the set of buffers.
size_t num_cards() const;
void merge_bufferlists(G1RedirtyCardsQueueSet* src);
BufferNodeList take_all_completed_buffers();
void flush_queue(G1DirtyCardQueue& queue);
using CardValue = G1CardTable::CardValue;
void enqueue(G1DirtyCardQueue& queue, volatile CardValue* card_ptr);
// If there are more than stop_at cards in the completed buffers, pop
// a buffer, refine its contents, and return true. Otherwise return
// false. Updates stats.
//
// Stops processing a buffer if SuspendibleThreadSet::should_yield(),
// recording the incompletely processed buffer for later processing of
// the remainder.
bool refine_completed_buffer_concurrently(uint worker_id,
size_t stop_at,
G1ConcurrentRefineStats* stats);
// If a full collection is happening, reset per-thread refinement stats and
// partial logs, and release completed logs. The full collection will make
// them all irrelevant.
// precondition: at safepoint.
void abandon_logs_and_stats();
// Update global refinement statistics with the ones given and the ones from
// detached threads.
// precondition: at safepoint.
void update_refinement_stats(G1ConcurrentRefineStats& stats);
// Add the given thread's partial logs to the global list and return and reset
// its refinement stats.
// precondition: at safepoint.
G1ConcurrentRefineStats concatenate_log_and_stats(Thread* thread);
// Return the total of mutator refinement stats for all threads.
// precondition: at safepoint.
// precondition: only call after concatenate_logs_and_stats.
G1ConcurrentRefineStats concatenated_refinement_stats() const;
// Accumulate refinement stats from threads that are detaching.
void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);
// Number of cards above which mutator threads should do refinement.
size_t mutator_refinement_threshold() const;
// Set number of cards above which mutator threads should do refinement.
void set_mutator_refinement_threshold(size_t value);
};
#endif // SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP

View File

@ -22,8 +22,6 @@
*
*/
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1FromCardCache.hpp"
#include "gc/shared/gc_globals.hpp"
#include "memory/padded.inline.hpp"
@ -80,7 +78,7 @@ void G1FromCardCache::print(outputStream* out) {
#endif
uint G1FromCardCache::num_par_rem_sets() {
return G1DirtyCardQueueSet::num_par_ids() + G1ConcRefinementThreads + MAX2(ConcGCThreads, ParallelGCThreads);
return G1ConcRefinementThreads + ConcGCThreads;
}
void G1FromCardCache::clear(uint region_idx) {

View File

@ -147,6 +147,10 @@ void G1FullGCCompactTask::free_non_overlapping_regions(uint src_start_idx, uint
for (uint i = non_overlapping_start; i <= src_end_idx; ++i) {
G1HeapRegion* hr = _g1h->region_at(i);
if (VerifyDuringGC) {
// Satisfy some asserts in free_..._region
hr->clear_both_card_tables();
}
_g1h->free_humongous_region(hr, nullptr);
}
}

View File

@ -35,6 +35,10 @@
#include "gc/shared/fullGCForwarding.inline.hpp"
void G1DetermineCompactionQueueClosure::free_empty_humongous_region(G1HeapRegion* hr) {
if (VerifyDuringGC) {
// Satisfy some asserts in free_..._region.
hr->clear_both_card_tables();
}
_g1h->free_humongous_region(hr, nullptr);
_collector->set_free(hr->hrm_index());
add_to_compaction_queue(hr);

View File

@ -32,7 +32,7 @@ G1FullGCResetMetadataTask::G1ResetMetadataClosure::G1ResetMetadataClosure(G1Full
void G1FullGCResetMetadataTask::G1ResetMetadataClosure::reset_region_metadata(G1HeapRegion* hr) {
hr->rem_set()->clear();
hr->clear_cardtable();
hr->clear_both_card_tables();
}
bool G1FullGCResetMetadataTask::G1ResetMetadataClosure::do_heap_region(G1HeapRegion* hr) {

View File

@ -50,8 +50,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
{
assert(max_gc_threads > 0, "Must have some GC threads");
_gc_par_phases[RetireTLABsAndFlushLogs] = new WorkerDataArray<double>("RetireTLABsAndFlushLogs", "JT Retire TLABs And Flush Logs (ms):", max_gc_threads);
_gc_par_phases[NonJavaThreadFlushLogs] = new WorkerDataArray<double>("NonJavaThreadFlushLogs", "Non-JT Flush Logs (ms):", max_gc_threads);
_gc_par_phases[RetireTLABs] = new WorkerDataArray<double>("RetireTLABs", "JavaThread Retire TLABs (ms):", max_gc_threads);
_gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>("GCWorkerStart", "GC Worker Start (ms):", max_gc_threads);
_gc_par_phases[ExtRootScan] = new WorkerDataArray<double>("ExtRootScan", "Ext Root Scanning (ms):", max_gc_threads);
@ -83,7 +82,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[OptMergeRS]->create_thread_work_items(GCMergeRSWorkItemsStrings[i], i);
}
_gc_par_phases[MergeLB] = new WorkerDataArray<double>("MergeLB", "Log Buffers (ms):", max_gc_threads);
_gc_par_phases[SweepRT] = new WorkerDataArray<double>("SweepRT", "Sweep (ms):", max_gc_threads);
_gc_par_phases[ScanHR] = new WorkerDataArray<double>("ScanHR", "Scan Heap Roots (ms):", max_gc_threads);
_gc_par_phases[OptScanHR] = new WorkerDataArray<double>("OptScanHR", "Optional Scan Heap Roots (ms):", max_gc_threads);
_gc_par_phases[CodeRoots] = new WorkerDataArray<double>("CodeRoots", "Code Root Scan (ms):", max_gc_threads);
@ -98,7 +97,7 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[MergePSS] = new WorkerDataArray<double>("MergePSS", "Merge Per-Thread State (ms):", max_gc_threads);
_gc_par_phases[RestoreEvacuationFailedRegions] = new WorkerDataArray<double>("RestoreEvacuationFailedRegions", "Restore Evacuation Failed Regions (ms):", max_gc_threads);
_gc_par_phases[RemoveSelfForwards] = new WorkerDataArray<double>("RemoveSelfForwards", "Remove Self Forwards (ms):", max_gc_threads);
_gc_par_phases[ClearCardTable] = new WorkerDataArray<double>("ClearLoggedCards", "Clear Logged Cards (ms):", max_gc_threads);
_gc_par_phases[ClearCardTable] = new WorkerDataArray<double>("ClearPendingCards", "Clear Pending Cards (ms):", max_gc_threads);
_gc_par_phases[RecalculateUsed] = new WorkerDataArray<double>("RecalculateUsed", "Recalculate Used Memory (ms):", max_gc_threads);
#if COMPILER2_OR_JVMCI
_gc_par_phases[UpdateDerivedPointers] = new WorkerDataArray<double>("UpdateDerivedPointers", "Update Derived Pointers (ms):", max_gc_threads);
@ -107,11 +106,15 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[ResetPartialArrayStateManager] = new WorkerDataArray<double>("ResetPartialArrayStateManager", "Reset Partial Array State Manager (ms):", max_gc_threads);
_gc_par_phases[ProcessEvacuationFailedRegions] = new WorkerDataArray<double>("ProcessEvacuationFailedRegions", "Process Evacuation Failed Regions (ms):", max_gc_threads);
_gc_par_phases[ScanHR]->create_thread_work_items("Pending Cards:", ScanHRPendingCards);
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Empty:", ScanHRScannedEmptyCards);
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Cards:", ScanHRScannedCards);
_gc_par_phases[ScanHR]->create_thread_work_items("Scanned Blocks:", ScanHRScannedBlocks);
_gc_par_phases[ScanHR]->create_thread_work_items("Claimed Chunks:", ScanHRClaimedChunks);
_gc_par_phases[ScanHR]->create_thread_work_items("Found Roots:", ScanHRFoundRoots);
_gc_par_phases[OptScanHR]->create_thread_work_items("Pending Cards:", ScanHRPendingCards);
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Empty:", ScanHRScannedEmptyCards);
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Cards:", ScanHRScannedCards);
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Blocks:", ScanHRScannedBlocks);
_gc_par_phases[OptScanHR]->create_thread_work_items("Claimed Chunks:", ScanHRClaimedChunks);
@ -119,9 +122,6 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[OptScanHR]->create_thread_work_items("Scanned Refs:", ScanHRScannedOptRefs);
_gc_par_phases[OptScanHR]->create_thread_work_items("Used Memory:", ScanHRUsedMemory);
_gc_par_phases[MergeLB]->create_thread_work_items("Dirty Cards:", MergeLBDirtyCards);
_gc_par_phases[MergeLB]->create_thread_work_items("Skipped Cards:", MergeLBSkippedCards);
_gc_par_phases[CodeRoots]->create_thread_work_items("Scanned Nmethods:", CodeRootsScannedNMethods);
_gc_par_phases[OptCodeRoots]->create_thread_work_items("Scanned Nmethods:", CodeRootsScannedNMethods);
@ -129,7 +129,10 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[MergePSS]->create_thread_work_items("Copied Bytes:", MergePSSCopiedBytes);
_gc_par_phases[MergePSS]->create_thread_work_items("LAB Waste:", MergePSSLABWasteBytes);
_gc_par_phases[MergePSS]->create_thread_work_items("LAB Undo Waste:", MergePSSLABUndoWasteBytes);
_gc_par_phases[MergePSS]->create_thread_work_items("Evac Fail Extra Cards:", MergePSSEvacFailExtra);
_gc_par_phases[MergePSS]->create_thread_work_items("Pending Cards:", MergePSSPendingCards);
_gc_par_phases[MergePSS]->create_thread_work_items("To-Young-Gen Cards:", MergePSSToYoungGenCards);
_gc_par_phases[MergePSS]->create_thread_work_items("Evac-Fail Cards:", MergePSSEvacFail);
_gc_par_phases[MergePSS]->create_thread_work_items("Marked Cards:", MergePSSMarked);
_gc_par_phases[RestoreEvacuationFailedRegions]->create_thread_work_items("Evacuation Failed Regions:", RestoreEvacFailureRegionsEvacFailedNum);
_gc_par_phases[RestoreEvacuationFailedRegions]->create_thread_work_items("Pinned Regions:", RestoreEvacFailureRegionsPinnedNum);
@ -150,9 +153,6 @@ G1GCPhaseTimes::G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads) :
_gc_par_phases[OptTermination]->create_thread_work_items("Optional Termination Attempts:");
_gc_par_phases[RedirtyCards] = new WorkerDataArray<double>("RedirtyCards", "Redirty Logged Cards (ms):", max_gc_threads);
_gc_par_phases[RedirtyCards]->create_thread_work_items("Redirtied Cards:");
_gc_par_phases[ResizeThreadLABs] = new WorkerDataArray<double>("ResizeTLABs", "Resize TLABs (ms):", max_gc_threads);
_gc_par_phases[FreeCollectionSet] = new WorkerDataArray<double>("FreeCSet", "Free Collection Set (ms):", max_gc_threads);
@ -171,9 +171,9 @@ void G1GCPhaseTimes::reset() {
_cur_optional_evac_time_ms = 0.0;
_cur_collection_nmethod_list_cleanup_time_ms = 0.0;
_cur_merge_heap_roots_time_ms = 0.0;
_cur_merge_refinement_table_time_ms = 0.0;
_cur_optional_merge_heap_roots_time_ms = 0.0;
_cur_prepare_merge_heap_roots_time_ms = 0.0;
_cur_distribute_log_buffers_time_ms = 0.0;
_cur_optional_prepare_merge_heap_roots_time_ms = 0.0;
_cur_pre_evacuate_prepare_time_ms = 0.0;
_cur_post_evacuate_cleanup_1_time_ms = 0.0;
@ -249,7 +249,7 @@ void G1GCPhaseTimes::record_gc_pause_end() {
ASSERT_PHASE_UNINITIALIZED(MergeER);
ASSERT_PHASE_UNINITIALIZED(MergeRS);
ASSERT_PHASE_UNINITIALIZED(OptMergeRS);
ASSERT_PHASE_UNINITIALIZED(MergeLB);
ASSERT_PHASE_UNINITIALIZED(SweepRT);
ASSERT_PHASE_UNINITIALIZED(ScanHR);
ASSERT_PHASE_UNINITIALIZED(CodeRoots);
ASSERT_PHASE_UNINITIALIZED(OptCodeRoots);
@ -425,8 +425,7 @@ double G1GCPhaseTimes::print_pre_evacuate_collection_set() const {
}
debug_time("Pre Evacuate Prepare", _cur_pre_evacuate_prepare_time_ms);
debug_phase(_gc_par_phases[RetireTLABsAndFlushLogs], 1);
debug_phase(_gc_par_phases[NonJavaThreadFlushLogs], 1);
debug_phase(_gc_par_phases[RetireTLABs], 1);
debug_time("Choose Collection Set", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms));
debug_time("Region Register", _cur_region_register_time);
@ -458,8 +457,8 @@ double G1GCPhaseTimes::print_evacuate_initial_collection_set() const {
debug_time("Prepare Merge Heap Roots", _cur_prepare_merge_heap_roots_time_ms);
debug_phase_merge_remset();
debug_time("Distribute Log Buffers", _cur_distribute_log_buffers_time_ms);
debug_phase(_gc_par_phases[MergeLB]);
debug_time("Merge Refinement Table", _cur_merge_refinement_table_time_ms);
debug_phase(_gc_par_phases[SweepRT], 1);
info_time("Evacuate Collection Set", _cur_collection_initial_evac_time_ms);
@ -521,7 +520,6 @@ double G1GCPhaseTimes::print_post_evacuate_collection_set(bool evacuation_failed
if (G1CollectedHeap::heap()->should_sample_collection_set_candidates()) {
debug_phase(_gc_par_phases[SampleCollectionSetCandidates], 1);
}
debug_phase(_gc_par_phases[RedirtyCards], 1);
if (UseTLAB && ResizeTLAB) {
debug_phase(_gc_par_phases[ResizeThreadLABs], 1);
}

View File

@ -46,8 +46,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
public:
enum GCParPhases {
RetireTLABsAndFlushLogs,
NonJavaThreadFlushLogs,
RetireTLABs,
GCWorkerStart,
ExtRootScan,
ThreadRoots,
@ -59,7 +58,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
MergeER = StrongOopStorageSetRoots + EnumRange<OopStorageSet::StrongId>().size(),
MergeRS,
OptMergeRS,
MergeLB,
SweepRT,
ScanHR,
OptScanHR,
CodeRoots,
@ -71,7 +70,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
Other,
GCWorkerTotal,
GCWorkerEnd,
RedirtyCards,
FreeCollectionSet,
YoungFreeCSet,
NonYoungFreeCSet,
@ -111,16 +109,19 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
MergeRSHowlArrayOfCards,
MergeRSHowlBitmap,
MergeRSHowlFull,
MergeRSCards,
MergeRSFromRemSetCards,
MergeRSTotalCards,
MergeRSContainersSentinel
};
static constexpr const char* GCMergeRSWorkItemsStrings[MergeRSContainersSentinel] =
{ "Merged Inline:", "Merged ArrayOfCards:", "Merged Howl:", "Merged Full:",
"Merged Howl Inline:", "Merged Howl ArrayOfCards:", "Merged Howl BitMap:", "Merged Howl Full:",
"Merged Cards:" };
"Merged From RS Cards:", "Total Cards:" };
enum GCScanHRWorkItems {
ScanHRPendingCards,
ScanHRScannedEmptyCards,
ScanHRScannedCards,
ScanHRScannedBlocks,
ScanHRClaimedChunks,
@ -129,11 +130,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
ScanHRUsedMemory
};
enum GCMergeLBWorkItems {
MergeLBDirtyCards,
MergeLBSkippedCards
};
enum GCCodeRootsWorkItems {
CodeRootsScannedNMethods
};
@ -143,7 +139,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
MergePSSLABSize,
MergePSSLABWasteBytes,
MergePSSLABUndoWasteBytes,
MergePSSEvacFailExtra
MergePSSPendingCards, // To be scanned cards generated by GC (from cross-references and evacuation failure).
MergePSSToYoungGenCards, // To-young-gen cards generated by GC.
MergePSSEvacFail, // Evacuation failure generated dirty cards by GC.
MergePSSMarked, // Total newly marked cards.
};
enum RestoreEvacFailureRegionsWorkItems {
@ -176,9 +175,9 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
double _cur_collection_nmethod_list_cleanup_time_ms;
double _cur_merge_heap_roots_time_ms;
// Merge refinement table time. Note that this time is included in _cur_merge_heap_roots_time_ms.
double _cur_merge_refinement_table_time_ms;
double _cur_optional_merge_heap_roots_time_ms;
// Included in above merge and optional-merge time.
double _cur_distribute_log_buffers_time_ms;
double _cur_prepare_merge_heap_roots_time_ms;
double _cur_optional_prepare_merge_heap_roots_time_ms;
@ -302,6 +301,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_cur_merge_heap_roots_time_ms += ms;
}
void record_merge_refinement_table_time(double ms) {
_cur_merge_refinement_table_time_ms = ms;
}
void record_or_add_optional_merge_heap_roots_time(double ms) {
_cur_optional_merge_heap_roots_time_ms += ms;
}
@ -310,10 +313,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_cur_prepare_merge_heap_roots_time_ms += ms;
}
void record_distribute_log_buffers_time_ms(double ms) {
_cur_distribute_log_buffers_time_ms += ms;
}
void record_or_add_optional_prepare_merge_heap_roots_time(double ms) {
_cur_optional_prepare_merge_heap_roots_time_ms += ms;
}
@ -382,10 +381,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms;
}
double cur_distribute_log_buffers_time_ms() {
return _cur_distribute_log_buffers_time_ms;
}
double cur_collection_par_time_ms() {
return _cur_collection_initial_evac_time_ms +
_cur_optional_evac_time_ms +
@ -396,6 +391,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
_cur_collection_nmethod_list_cleanup_time_ms;
}
double cur_merge_refinement_table_time() const {
return _cur_merge_refinement_table_time_ms;
}
double cur_resize_heap_time_ms() {
return _cur_resize_heap_time_ms;
}

View File

@ -39,6 +39,7 @@
#include "logging/log.hpp"
#include "logging/logStream.hpp"
#include "memory/iterator.inline.hpp"
#include "memory/memRegion.hpp"
#include "memory/resourceArea.hpp"
#include "oops/access.inline.hpp"
#include "oops/compressedOops.inline.hpp"
@ -137,11 +138,21 @@ void G1HeapRegion::hr_clear(bool clear_space) {
if (clear_space) clear(SpaceDecorator::Mangle);
}
void G1HeapRegion::clear_cardtable() {
void G1HeapRegion::clear_card_table() {
G1CardTable* ct = G1CollectedHeap::heap()->card_table();
ct->clear_MemRegion(MemRegion(bottom(), end()));
}
void G1HeapRegion::clear_refinement_table() {
G1CardTable* ct = G1CollectedHeap::heap()->refinement_table();
ct->clear_MemRegion(MemRegion(bottom(), end()));
}
void G1HeapRegion::clear_both_card_tables() {
clear_card_table();
clear_refinement_table();
}
void G1HeapRegion::set_free() {
if (!is_free()) {
report_region_type_change(G1HeapRegionTraceType::Free);
@ -591,8 +602,12 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
G1HeapRegion* _from;
G1HeapRegion* _to;
CardValue _cv_obj;
CardValue _cv_field;
CardValue _cv_obj_ct; // In card table.
CardValue _cv_field_ct;
CardValue _cv_obj_rt; // In refinement table.
CardValue _cv_field_rt;
RemSetChecker(G1VerifyFailureCounter* failures, oop containing_obj, T* p, oop obj)
: Checker<T>(failures, containing_obj, p, obj) {
@ -600,19 +615,23 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
_to = this->_g1h->heap_region_containing(obj);
CardTable* ct = this->_g1h->card_table();
_cv_obj = *ct->byte_for_const(this->_containing_obj);
_cv_field = *ct->byte_for_const(p);
_cv_obj_ct = *ct->byte_for_const(this->_containing_obj);
_cv_field_ct = *ct->byte_for_const(p);
ct = this->_g1h->refinement_table();
_cv_obj_rt = *ct->byte_for_const(this->_containing_obj);
_cv_field_rt = *ct->byte_for_const(p);
}
bool failed() const {
if (_from != _to && !_from->is_young() &&
_to->rem_set()->is_complete() &&
_from->rem_set()->cset_group() != _to->rem_set()->cset_group()) {
const CardValue dirty = G1CardTable::dirty_card_val();
const CardValue clean = G1CardTable::clean_card_val();
return !(_to->rem_set()->contains_reference(this->_p) ||
(this->_containing_obj->is_objArray() ?
_cv_field == dirty :
_cv_obj == dirty || _cv_field == dirty));
(_cv_field_ct != clean || _cv_field_rt != clean) :
(_cv_obj_ct != clean || _cv_field_ct != clean || _cv_obj_rt != clean || _cv_field_rt != clean)));
}
return false;
}
@ -630,7 +649,8 @@ class G1VerifyLiveAndRemSetClosure : public BasicOopIterateClosure {
log.error("Missing rem set entry:");
this->print_containing_obj(&ls, _from);
this->print_referenced_obj(&ls, _to, "");
log.error("Obj head CV = %d, field CV = %d.", _cv_obj, _cv_field);
log.error("CT obj head CV = %d, field CV = %d.", _cv_obj_ct, _cv_field_ct);
log.error("RT Obj head CV = %d, field CV = %d.", _cv_obj_rt, _cv_field_rt);
log.error("----------");
}
};

View File

@ -42,7 +42,6 @@ class G1CollectedHeap;
class G1CMBitMap;
class G1CSetCandidateGroup;
class G1Predictions;
class G1HeapRegion;
class G1HeapRegionRemSet;
class G1HeapRegionSetBase;
class nmethod;
@ -478,7 +477,10 @@ public:
// Callers must ensure this is not called by multiple threads at the same time.
void hr_clear(bool clear_space);
// Clear the card table corresponding to this region.
void clear_cardtable();
void clear_card_table();
void clear_refinement_table();
void clear_both_card_tables();
// Notify the region that an evacuation failure occurred for an object within this
// region.

View File

@ -63,7 +63,8 @@ public:
G1HeapRegionManager::G1HeapRegionManager() :
_bot_mapper(nullptr),
_cardtable_mapper(nullptr),
_card_table_mapper(nullptr),
_refinement_table_mapper(nullptr),
_committed_map(),
_next_highest_used_hrm_index(0),
_regions(), _heap_mapper(nullptr),
@ -74,7 +75,8 @@ G1HeapRegionManager::G1HeapRegionManager() :
void G1HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage,
G1RegionToSpaceMapper* bitmap,
G1RegionToSpaceMapper* bot,
G1RegionToSpaceMapper* cardtable) {
G1RegionToSpaceMapper* card_table,
G1RegionToSpaceMapper* refinement_table) {
_next_highest_used_hrm_index = 0;
_heap_mapper = heap_storage;
@ -82,7 +84,8 @@ void G1HeapRegionManager::initialize(G1RegionToSpaceMapper* heap_storage,
_bitmap_mapper = bitmap;
_bot_mapper = bot;
_cardtable_mapper = cardtable;
_card_table_mapper = card_table;
_refinement_table_mapper = refinement_table;
_regions.initialize(heap_storage->reserved(), G1HeapRegion::GrainBytes);
@ -186,7 +189,8 @@ void G1HeapRegionManager::commit_regions(uint index, size_t num_regions, WorkerT
_bitmap_mapper->commit_regions(index, num_regions, pretouch_workers);
_bot_mapper->commit_regions(index, num_regions, pretouch_workers);
_cardtable_mapper->commit_regions(index, num_regions, pretouch_workers);
_card_table_mapper->commit_regions(index, num_regions, pretouch_workers);
_refinement_table_mapper->commit_regions(index, num_regions, pretouch_workers);
}
void G1HeapRegionManager::uncommit_regions(uint start, uint num_regions) {
@ -209,7 +213,8 @@ void G1HeapRegionManager::uncommit_regions(uint start, uint num_regions) {
_bitmap_mapper->uncommit_regions(start, num_regions);
_bot_mapper->uncommit_regions(start, num_regions);
_cardtable_mapper->uncommit_regions(start, num_regions);
_card_table_mapper->uncommit_regions(start, num_regions);
_refinement_table_mapper->uncommit_regions(start, num_regions);
_committed_map.uncommit(start, end);
}
@ -261,19 +266,23 @@ void G1HeapRegionManager::clear_auxiliary_data_structures(uint start, uint num_r
// Signal G1BlockOffsetTable to clear the given regions.
_bot_mapper->signal_mapping_changed(start, num_regions);
// Signal G1CardTable to clear the given regions.
_cardtable_mapper->signal_mapping_changed(start, num_regions);
_card_table_mapper->signal_mapping_changed(start, num_regions);
// Signal refinement table to clear the given regions.
_refinement_table_mapper->signal_mapping_changed(start, num_regions);
}
MemoryUsage G1HeapRegionManager::get_auxiliary_data_memory_usage() const {
size_t used_sz =
_bitmap_mapper->committed_size() +
_bot_mapper->committed_size() +
_cardtable_mapper->committed_size();
_card_table_mapper->committed_size() +
_refinement_table_mapper->committed_size();
size_t committed_sz =
_bitmap_mapper->reserved_size() +
_bot_mapper->reserved_size() +
_cardtable_mapper->reserved_size();
_card_table_mapper->reserved_size() +
_refinement_table_mapper->reserved_size();
return MemoryUsage(0, used_sz, committed_sz, committed_sz);
}

View File

@ -74,7 +74,8 @@ class G1HeapRegionManager: public CHeapObj<mtGC> {
friend class G1HeapRegionClaimer;
G1RegionToSpaceMapper* _bot_mapper;
G1RegionToSpaceMapper* _cardtable_mapper;
G1RegionToSpaceMapper* _card_table_mapper;
G1RegionToSpaceMapper* _refinement_table_mapper;
// Keeps track of the currently committed regions in the heap. The committed regions
// can either be active (ready for use) or inactive (ready for uncommit).
@ -161,7 +162,8 @@ public:
void initialize(G1RegionToSpaceMapper* heap_storage,
G1RegionToSpaceMapper* bitmap,
G1RegionToSpaceMapper* bot,
G1RegionToSpaceMapper* cardtable);
G1RegionToSpaceMapper* card_table,
G1RegionToSpaceMapper* refinement_table);
// Return the "dummy" region used for G1AllocRegion. This is currently a hardwired
// new G1HeapRegion that owns G1HeapRegion at index 0. Since at the moment we commit

View File

@ -42,6 +42,7 @@
#include "oops/compressedOops.inline.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/threads.hpp"
int G1HeapVerifier::_enabled_verification_types = G1HeapVerifier::G1VerifyAll;
@ -528,6 +529,7 @@ void G1HeapVerifier::verify_before_gc() {
void G1HeapVerifier::verify_after_gc() {
verify(VerifyOption::G1UseConcMarking, "After GC");
verify_card_tables_in_sync();
}
void G1HeapVerifier::verify_bitmap_clear(bool from_tams) {
@ -556,17 +558,17 @@ void G1HeapVerifier::verify_bitmap_clear(bool from_tams) {
G1CollectedHeap::heap()->heap_region_iterate(&cl);
}
#ifndef PRODUCT
class G1VerifyCardTableCleanup: public G1HeapRegionClosure {
G1HeapVerifier* _verifier;
public:
G1VerifyCardTableCleanup(G1HeapVerifier* verifier)
: _verifier(verifier) { }
virtual bool do_heap_region(G1HeapRegion* r) {
_verifier->verify_ct_clean_region(r);
if (r->is_survivor()) {
_verifier->verify_dirty_region(r);
_verifier->verify_rt_clean_region(r);
} else {
_verifier->verify_not_dirty_region(r);
_verifier->verify_rt_clean_from_top(r);
}
return false;
}
@ -579,14 +581,35 @@ void G1HeapVerifier::verify_card_table_cleanup() {
}
}
void G1HeapVerifier::verify_not_dirty_region(G1HeapRegion* hr) {
// All of the region should be clean.
G1CardTable* ct = _g1h->card_table();
MemRegion mr(hr->bottom(), hr->end());
ct->verify_not_dirty_region(mr);
class G1VerifyCardTablesClean: public G1HeapRegionClosure {
G1HeapVerifier* _verifier;
bool _both_card_tables;
public:
G1VerifyCardTablesClean(G1HeapVerifier* verifier, bool both_card_tables = true)
: _verifier(verifier), _both_card_tables(both_card_tables) { }
virtual bool do_heap_region(G1HeapRegion* r) {
_verifier->verify_rt_clean_region(r); // Must be all Clean from bottom -> end.
if (_both_card_tables) {
_verifier->verify_ct_clean_region(r);
}
return false;
}
};
void G1HeapVerifier::verify_card_tables_clean(bool both_card_tables) {
G1VerifyCardTablesClean cl(this, both_card_tables);
_g1h->heap_region_iterate(&cl);
}
void G1HeapVerifier::verify_dirty_region(G1HeapRegion* hr) {
void G1HeapVerifier::verify_rt_clean_from_top(G1HeapRegion* hr) {
G1CardTable* ct = _g1h->refinement_table();
MemRegion mr(align_up(hr->top(), G1CardTable::card_size()), hr->end());
ct->verify_region(mr, G1CardTable::clean_card_val(), true);
}
void G1HeapVerifier::verify_rt_dirty_to_dummy_top(G1HeapRegion* hr) {
// We cannot guarantee that [bottom(),end()] is dirty. Threads
// dirty allocated blocks as they allocate them. The thread that
// retires each region and replaces it with a new one will do a
@ -594,29 +617,56 @@ void G1HeapVerifier::verify_dirty_region(G1HeapRegion* hr) {
// not dirty that area (one less thing to have to do while holding
// a lock). So we can only verify that [bottom(),pre_dummy_top()]
// is dirty.
G1CardTable* ct = _g1h->card_table();
G1CardTable* ct = _g1h->refinement_table();
MemRegion mr(hr->bottom(), hr->pre_dummy_top());
if (hr->is_young()) {
ct->verify_g1_young_region(mr);
} else {
ct->verify_dirty_region(mr);
}
ct->verify_dirty_region(mr);
}
class G1VerifyDirtyYoungListClosure : public G1HeapRegionClosure {
private:
G1HeapVerifier* _verifier;
public:
G1VerifyDirtyYoungListClosure(G1HeapVerifier* verifier) : G1HeapRegionClosure(), _verifier(verifier) { }
virtual bool do_heap_region(G1HeapRegion* r) {
_verifier->verify_dirty_region(r);
return false;
}
};
void G1HeapVerifier::verify_ct_clean_region(G1HeapRegion* hr) {
G1CardTable* ct = _g1h->card_table();
MemRegion mr(hr->bottom(), hr->end());
ct->verify_region(mr, G1CardTable::clean_card_val(), true);
}
void G1HeapVerifier::verify_dirty_young_regions() {
G1VerifyDirtyYoungListClosure cl(this);
_g1h->collection_set()->iterate(&cl);
void G1HeapVerifier::verify_rt_clean_region(G1HeapRegion* hr) {
G1CardTable* ct = _g1h->refinement_table();
MemRegion mr(hr->bottom(), hr->end());
ct->verify_region(mr, G1CardTable::clean_card_val(), true);
}
#ifndef PRODUCT
void G1HeapVerifier::verify_card_tables_in_sync() {
// Non-Java thread card tables must be null.
class AssertCardTableBaseNull : public ThreadClosure {
public:
void do_thread(Thread* thread) {
ResourceMark rm;
assert(G1ThreadLocalData::get_byte_map_base(thread) == nullptr, "thread " PTR_FORMAT " (%s) has non-null card table base",
p2i(thread), thread->name());
}
} check_null_cl;
Threads::non_java_threads_do(&check_null_cl);
// Java thread card tables must be the same as the global card table.
class AssertSameCardTableClosure : public ThreadClosure {
public:
void do_thread(Thread* thread) {
G1CardTable::CardValue* global_ct_base = G1CollectedHeap::heap()->card_table_base();
G1CardTable::CardValue* cur_ct_base = G1ThreadLocalData::get_byte_map_base(thread);
ResourceMark rm;
assert(cur_ct_base == global_ct_base,
"thread " PTR_FORMAT " (%s) has wrong card table base, should be " PTR_FORMAT " is " PTR_FORMAT,
p2i(thread), thread->name(), p2i(global_ct_base), p2i(cur_ct_base));
}
} check_same_cl;
Threads::java_threads_do(&check_same_cl);
}
class G1CheckRegionAttrTableClosure : public G1HeapRegionClosure {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -78,11 +78,16 @@ public:
// Do sanity check on the contents of the in-cset fast test table.
bool check_region_attr_table() PRODUCT_RETURN_( return true; );
void verify_card_table_cleanup() PRODUCT_RETURN;
void verify_card_table_cleanup();
void verify_card_tables_clean(bool both_card_tables);
void verify_not_dirty_region(G1HeapRegion* hr) PRODUCT_RETURN;
void verify_dirty_region(G1HeapRegion* hr) PRODUCT_RETURN;
void verify_dirty_young_regions() PRODUCT_RETURN;
void verify_ct_clean_region(G1HeapRegion* hr);
void verify_rt_dirty_to_dummy_top(G1HeapRegion* hr);
void verify_rt_clean_from_top(G1HeapRegion* hr);
void verify_rt_clean_region(G1HeapRegion* hr);
// Verify that the global card table and the thread's card tables are in sync.
void verify_card_tables_in_sync() PRODUCT_RETURN;
};
#endif // SHARE_GC_G1_G1HEAPVERIFIER_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -86,19 +86,19 @@ public:
// This closure is applied to the fields of the objects that have just been copied during evacuation.
class G1ScanEvacuatedObjClosure : public G1ScanClosureBase {
friend class G1SkipCardEnqueueSetter;
friend class G1SkipCardMarkSetter;
enum SkipCardEnqueueTristate {
enum SkipCardMarkTristate {
False = 0,
True,
Uninitialized
};
SkipCardEnqueueTristate _skip_card_enqueue;
SkipCardMarkTristate _skip_card_mark;
public:
G1ScanEvacuatedObjClosure(G1CollectedHeap* g1h, G1ParScanThreadState* par_scan_state) :
G1ScanClosureBase(g1h, par_scan_state), _skip_card_enqueue(Uninitialized) { }
G1ScanClosureBase(g1h, par_scan_state), _skip_card_mark(Uninitialized) { }
template <class T> void do_oop_work(T* p);
virtual void do_oop(oop* p) { do_oop_work(p); }
@ -109,22 +109,22 @@ public:
}
#ifdef ASSERT
bool skip_card_enqueue_set() const { return _skip_card_enqueue != Uninitialized; }
bool skip_card_mark_set() const { return _skip_card_mark != Uninitialized; }
#endif
};
// RAII object to properly set the _skip_card_enqueue field in G1ScanEvacuatedObjClosure.
class G1SkipCardEnqueueSetter : public StackObj {
// RAII object to properly set the _skip_card_mark field in G1ScanEvacuatedObjClosure.
class G1SkipCardMarkSetter : public StackObj {
G1ScanEvacuatedObjClosure* _closure;
public:
G1SkipCardEnqueueSetter(G1ScanEvacuatedObjClosure* closure, bool skip_card_enqueue) : _closure(closure) {
assert(_closure->_skip_card_enqueue == G1ScanEvacuatedObjClosure::Uninitialized, "Must not be set");
_closure->_skip_card_enqueue = skip_card_enqueue ? G1ScanEvacuatedObjClosure::True : G1ScanEvacuatedObjClosure::False;
G1SkipCardMarkSetter(G1ScanEvacuatedObjClosure* closure, bool skip_card_mark) : _closure(closure) {
assert(_closure->_skip_card_mark == G1ScanEvacuatedObjClosure::Uninitialized, "Must not be set");
_closure->_skip_card_mark = skip_card_mark ? G1ScanEvacuatedObjClosure::True : G1ScanEvacuatedObjClosure::False;
}
~G1SkipCardEnqueueSetter() {
DEBUG_ONLY(_closure->_skip_card_enqueue = G1ScanEvacuatedObjClosure::Uninitialized;)
~G1SkipCardMarkSetter() {
DEBUG_ONLY(_closure->_skip_card_mark = G1ScanEvacuatedObjClosure::Uninitialized;)
}
};
@ -206,13 +206,20 @@ public:
class G1ConcurrentRefineOopClosure: public BasicOopIterateClosure {
G1CollectedHeap* _g1h;
uint _worker_id;
bool _has_ref_to_cset;
bool _has_ref_to_old;
public:
G1ConcurrentRefineOopClosure(G1CollectedHeap* g1h, uint worker_id) :
_g1h(g1h),
_worker_id(worker_id) {
_worker_id(worker_id),
_has_ref_to_cset(false),
_has_ref_to_old(false) {
}
bool has_ref_to_cset() const { return _has_ref_to_cset; }
bool has_ref_to_old() const { return _has_ref_to_old; }
virtual ReferenceIterationMode reference_iteration_mode() { return DO_FIELDS; }
template <class T> void do_oop_work(T* p);
@ -223,6 +230,7 @@ public:
class G1RebuildRemSetClosure : public BasicOopIterateClosure {
G1CollectedHeap* _g1h;
uint _worker_id;
public:
G1RebuildRemSetClosure(G1CollectedHeap* g1h, uint worker_id) : _g1h(g1h), _worker_id(worker_id) {
}

View File

@ -90,11 +90,11 @@ inline void G1ScanEvacuatedObjClosure::do_oop_work(T* p) {
prefetch_and_push(p, obj);
} else if (!G1HeapRegion::is_in_same_region(p, obj)) {
handle_non_cset_obj_common(region_attr, p, obj);
assert(_skip_card_enqueue != Uninitialized, "Scan location has not been initialized.");
if (_skip_card_enqueue == True) {
assert(_skip_card_mark != Uninitialized, "Scan location has not been initialized.");
if (_skip_card_mark == True) {
return;
}
_par_scan_state->enqueue_card_if_tracked(region_attr, p, obj);
_par_scan_state->mark_card_if_tracked(region_attr, p, obj);
}
}
@ -127,6 +127,11 @@ inline static void check_obj_during_refinement(T* p, oop const obj) {
template <class T>
inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
// Early out if we already found a to-young reference.
if (_has_ref_to_cset) {
return;
}
T o = RawAccess<MO_RELAXED>::oop_load(p);
if (CompressedOops::is_null(o)) {
return;
@ -146,7 +151,12 @@ inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
return;
}
G1HeapRegionRemSet* to_rem_set = _g1h->heap_region_containing(obj)->rem_set();
G1HeapRegion* to_region = _g1h->heap_region_containing(obj);
if (to_region->is_young()) {
_has_ref_to_cset = true;
return;
}
G1HeapRegionRemSet* to_rem_set = to_region->rem_set();
assert(to_rem_set != nullptr, "Need per-region 'into' remsets.");
if (to_rem_set->is_tracked()) {
@ -154,6 +164,7 @@ inline void G1ConcurrentRefineOopClosure::do_oop_work(T* p) {
if (from->rem_set()->cset_group() != to_rem_set->cset_group()) {
to_rem_set->add_reference(p, _worker_id);
_has_ref_to_old = true;
}
}
}
@ -180,7 +191,7 @@ inline void G1ScanCardClosure::do_oop_work(T* p) {
_heap_roots_found++;
} else if (!G1HeapRegion::is_in_same_region(p, obj)) {
handle_non_cset_obj_common(region_attr, p, obj);
_par_scan_state->enqueue_card_if_tracked(region_attr, p, obj);
_par_scan_state->mark_card_if_tracked(region_attr, p, obj);
}
}
@ -272,10 +283,14 @@ template <class T> void G1RebuildRemSetClosure::do_oop_work(T* p) {
G1HeapRegion* to = _g1h->heap_region_containing(obj);
G1HeapRegionRemSet* rem_set = to->rem_set();
if (rem_set->is_tracked()) {
G1HeapRegion* from = _g1h->heap_region_containing(p);
if (to->is_young()) {
G1BarrierSet::g1_barrier_set()->write_ref_field_post(p);
} else {
G1HeapRegion* from = _g1h->heap_region_containing(p);
if (from->rem_set()->cset_group() != rem_set->cset_group()) {
rem_set->add_reference(p, _worker_id);
if (from->rem_set()->cset_group() != rem_set->cset_group()) {
rem_set->add_reference(p, _worker_id);
}
}
}
}

View File

@ -57,22 +57,21 @@
#define MAYBE_INLINE_EVACUATION NOT_DEBUG(inline) DEBUG_ONLY(NOINLINE)
G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
G1RedirtyCardsQueueSet* rdcqs,
uint worker_id,
uint num_workers,
G1CollectionSet* collection_set,
G1EvacFailureRegions* evac_failure_regions)
: _g1h(g1h),
_task_queue(g1h->task_queue(worker_id)),
_rdc_local_qset(rdcqs),
_ct(g1h->card_table()),
_ct(g1h->refinement_table()),
_closures(nullptr),
_plab_allocator(nullptr),
_age_table(false),
_tenuring_threshold(g1h->policy()->tenuring_threshold()),
_scanner(g1h, this),
_worker_id(worker_id),
_last_enqueued_card(SIZE_MAX),
_num_cards_marked_dirty(0),
_num_cards_marked_to_cset(0),
_stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
_stack_trim_lower_threshold(GCDrainStackTargetSize),
_trim_ticks(),
@ -88,7 +87,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
ALLOCATION_FAILURE_INJECTOR_ONLY(_allocation_failure_inject_counter(0) COMMA)
_evacuation_failed_info(),
_evac_failure_regions(evac_failure_regions),
_evac_failure_enqueued_cards(0)
_num_cards_from_evac_failure(0)
{
// We allocate number of young gen regions in the collection set plus one
// entries, since entry 0 keeps track of surviving bytes for non-young regions.
@ -112,8 +111,7 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
initialize_numa_stats();
}
size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* rdc_buffers) {
*rdc_buffers = _rdc_local_qset.flush();
size_t G1ParScanThreadState::flush_stats(size_t* surviving_young_words, uint num_workers) {
flush_numa_stats();
// Update allocation statistics.
_plab_allocator->flush_and_retire_stats(num_workers);
@ -147,8 +145,16 @@ size_t G1ParScanThreadState::lab_undo_waste_words() const {
return _plab_allocator->undo_waste();
}
size_t G1ParScanThreadState::evac_failure_enqueued_cards() const {
return _evac_failure_enqueued_cards;
size_t G1ParScanThreadState::num_cards_pending() const {
return _num_cards_marked_dirty + _num_cards_from_evac_failure;
}
size_t G1ParScanThreadState::num_cards_marked() const {
return num_cards_pending() + _num_cards_marked_to_cset;
}
size_t G1ParScanThreadState::num_cards_from_evac_failure() const {
return _num_cards_from_evac_failure;
}
#ifdef ASSERT
@ -230,7 +236,7 @@ void G1ParScanThreadState::do_partial_array(PartialArrayState* state, bool stole
PartialArraySplitter::Claim claim =
_partial_array_splitter.claim(state, _task_queue, stolen);
G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
G1SkipCardMarkSetter x(&_scanner, dest_attr.is_new_survivor());
// Process claimed task.
to_array->oop_iterate_range(&_scanner,
checked_cast<int>(claim._start),
@ -250,7 +256,7 @@ void G1ParScanThreadState::start_partial_objarray(oop from_obj,
// The source array is unused when processing states.
_partial_array_splitter.start(_task_queue, nullptr, to_array, array_length);
assert(_scanner.skip_card_enqueue_set(), "must be");
assert(_scanner.skip_card_mark_set(), "must be");
// Process the initial chunk. No need to process the type in the
// klass, as it will already be handled by processing the built-in
// module.
@ -451,7 +457,7 @@ void G1ParScanThreadState::do_iterate_object(oop const obj,
_string_dedup_requests.add(old);
}
assert(_scanner.skip_card_enqueue_set(), "must be");
assert(_scanner.skip_card_mark_set(), "must be");
obj->oop_iterate_backwards(&_scanner, klass);
}
@ -546,7 +552,7 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
// Instead, we use dest_attr.is_young() because the two values are always
// equal: successfully allocated young regions must be survivor regions.
assert(dest_attr.is_young() == _g1h->heap_region_containing(obj)->is_survivor(), "must be");
G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_young());
G1SkipCardMarkSetter x(&_scanner, dest_attr.is_young());
do_iterate_object(obj, old, klass, region_attr, dest_attr, age);
}
@ -569,7 +575,7 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
assert(worker_id < _num_workers, "out of bounds access");
if (_states[worker_id] == nullptr) {
_states[worker_id] =
new G1ParScanThreadState(_g1h, rdcqs(),
new G1ParScanThreadState(_g1h,
worker_id,
_num_workers,
_collection_set,
@ -595,22 +601,24 @@ void G1ParScanThreadStateSet::flush_stats() {
// because it resets the PLAB allocator where we get this info from.
size_t lab_waste_bytes = pss->lab_waste_words() * HeapWordSize;
size_t lab_undo_waste_bytes = pss->lab_undo_waste_words() * HeapWordSize;
size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers, &_rdc_buffers[worker_id]) * HeapWordSize;
size_t evac_fail_enqueued_cards = pss->evac_failure_enqueued_cards();
size_t copied_bytes = pss->flush_stats(_surviving_young_words_total, _num_workers) * HeapWordSize;
size_t pending_cards = pss->num_cards_pending();
size_t to_young_gen_cards = pss->num_cards_marked() - pss->num_cards_pending();
size_t evac_failure_cards = pss->num_cards_from_evac_failure();
size_t marked_cards = pss->num_cards_marked();
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, copied_bytes, G1GCPhaseTimes::MergePSSCopiedBytes);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_waste_bytes, G1GCPhaseTimes::MergePSSLABWasteBytes);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, lab_undo_waste_bytes, G1GCPhaseTimes::MergePSSLABUndoWasteBytes);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, evac_fail_enqueued_cards, G1GCPhaseTimes::MergePSSEvacFailExtra);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, pending_cards, G1GCPhaseTimes::MergePSSPendingCards);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, to_young_gen_cards, G1GCPhaseTimes::MergePSSToYoungGenCards);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, evac_failure_cards, G1GCPhaseTimes::MergePSSEvacFail);
p->record_or_add_thread_work_item(G1GCPhaseTimes::MergePSS, worker_id, marked_cards, G1GCPhaseTimes::MergePSSMarked);
delete pss;
_states[worker_id] = nullptr;
}
G1DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
dcq.merge_bufferlists(rdcqs());
rdcqs()->verify_empty();
_flushed = true;
}
@ -652,7 +660,7 @@ oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, Kla
// existing closure to scan evacuated objects; since we are iterating from a
// collection set region (i.e. never a Survivor region), we always need to
// gather cards for this case.
G1SkipCardEnqueueSetter x(&_scanner, false /* skip_card_enqueue */);
G1SkipCardMarkSetter x(&_scanner, false /* skip_card_mark */);
do_iterate_object(old, old, klass, attr, attr, m.age());
}
@ -709,9 +717,7 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
G1EvacFailureRegions* evac_failure_regions) :
_g1h(g1h),
_collection_set(collection_set),
_rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
_states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
_rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
_surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
_num_workers(num_workers),
_flushed(false),
@ -719,7 +725,6 @@ G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
{
for (uint i = 0; i < num_workers; ++i) {
_states[i] = nullptr;
_rdc_buffers[i] = BufferNodeList();
}
memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
}
@ -728,7 +733,6 @@ G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
assert(_flushed, "thread local state from the per thread states should have been flushed");
FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
}
#if TASKQUEUE_STATS

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,7 +27,6 @@
#include "gc/g1/g1CollectedHeap.hpp"
#include "gc/g1/g1OopClosures.hpp"
#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/g1/g1YoungGCAllocationFailureInjector.hpp"
#include "gc/shared/ageTable.hpp"
#include "gc/shared/copyFailedInfo.hpp"
@ -52,7 +51,6 @@ class outputStream;
class G1ParScanThreadState : public CHeapObj<mtGC> {
G1CollectedHeap* _g1h;
G1ScannerTasksQueue* _task_queue;
G1RedirtyCardsLocalQueueSet _rdc_local_qset;
G1CardTable* _ct;
G1EvacuationRootClosures* _closures;
@ -65,9 +63,8 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
uint _worker_id;
// Remember the last enqueued card to avoid enqueuing the same card over and over;
// since we only ever scan a card once, this is sufficient.
size_t _last_enqueued_card;
size_t _num_cards_marked_dirty;
size_t _num_cards_marked_to_cset;
// Upper and lower threshold to start and end work queue draining.
uint const _stack_trim_upper_threshold;
@ -104,22 +101,19 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
EvacuationFailedInfo _evacuation_failed_info;
G1EvacFailureRegions* _evac_failure_regions;
// Number of additional cards into evacuation failed regions enqueued into
// the local DCQS. This is an approximation, as cards that would be added later
// outside of evacuation failure will not be subtracted again.
size_t _evac_failure_enqueued_cards;
// Number of additional cards into evacuation failed regions.
size_t _num_cards_from_evac_failure;
// Enqueue the card if not already in the set; this is a best-effort attempt on
// Mark the card if not already in the set; this is a best-effort attempt on
// detecting duplicates.
template <class T> bool enqueue_if_new(T* p);
// Enqueue the card of p into the (evacuation failed) region.
template <class T> void enqueue_card_into_evac_fail_region(T* p, oop obj);
template <class T> bool mark_if_new(T* p, bool into_survivor);
// Mark the card of p into the (evacuation failed) region.
template <class T> void mark_card_into_evac_fail_region(T* p, oop obj);
bool inject_allocation_failure(uint region_idx) ALLOCATION_FAILURE_INJECTOR_RETURN_( return false; );
public:
G1ParScanThreadState(G1CollectedHeap* g1h,
G1RedirtyCardsQueueSet* rdcqs,
uint worker_id,
uint num_workers,
G1CollectionSet* collection_set,
@ -139,16 +133,16 @@ public:
void push_on_queue(ScannerTask task);
// Apply the post barrier to the given reference field. Enqueues the card of p
// Apply the post barrier to the given reference field. Marks the card of p
// if the barrier does not filter out the reference for some reason (e.g.
// p and q are in the same region, p is in survivor, p is in collection set)
// To be called during GC if nothing particular about p and obj are known.
template <class T> void write_ref_field_post(T* p, oop obj);
// Enqueue the card if the reference's target region's remembered set is tracked.
// Mark the card if the reference's target region's remembered set is tracked.
// Assumes that a significant amount of pre-filtering (like done by
// write_ref_field_post() above) has already been performed.
template <class T> void enqueue_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o);
template <class T> void mark_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o);
G1EvacuationRootClosures* closures() { return _closures; }
uint worker_id() { return _worker_id; }
@ -156,11 +150,22 @@ public:
size_t lab_waste_words() const;
size_t lab_undo_waste_words() const;
size_t evac_failure_enqueued_cards() const;
// Newly marked cards during this garbage collection, to be refined concurrently
// later. Contains both marks generated by new cross-region references as well
// as cards generated from regions into evacuation failed regions.
// Does not contain cards into the next collection set (e.g. survivors) - they will not
// be refined concurrently. Calculation is done on a best-effort basis.
size_t num_cards_pending() const;
// Number of cards newly generated by references into evacuation failed regions.
// Calculation is done on a best-effort basis.
size_t num_cards_from_evac_failure() const;
// Sum of cards marked by evacuation. Contains both pending cards as well as cards
// into the next collection set (e.g. survivors).
size_t num_cards_marked() const;
// Pass locally gathered statistics to global state. Returns the total number of
// HeapWords copied.
size_t flush_stats(size_t* surviving_young_words, uint num_workers, BufferNodeList* buffer_log);
size_t flush_stats(size_t* surviving_young_words, uint num_workers);
#if TASKQUEUE_STATS
PartialArrayTaskStats* partial_array_task_stats();
@ -249,9 +254,7 @@ public:
class G1ParScanThreadStateSet : public StackObj {
G1CollectedHeap* _g1h;
G1CollectionSet* _collection_set;
G1RedirtyCardsQueueSet _rdcqs;
G1ParScanThreadState** _states;
BufferNodeList* _rdc_buffers;
size_t* _surviving_young_words_total;
uint _num_workers;
bool _flushed;
@ -264,9 +267,6 @@ class G1ParScanThreadStateSet : public StackObj {
G1EvacFailureRegions* evac_failure_regions);
~G1ParScanThreadStateSet();
G1RedirtyCardsQueueSet* rdcqs() { return &_rdcqs; }
BufferNodeList* rdc_buffers() { return _rdc_buffers; }
void flush_stats();
void record_unused_optional_region(G1HeapRegion* hr);
#if TASKQUEUE_STATS

View File

@ -96,25 +96,24 @@ G1OopStarChunkedList* G1ParScanThreadState::oops_into_optional_region(const G1He
return &_oops_into_optional_regions[hr->index_in_opt_cset()];
}
template <class T> bool G1ParScanThreadState::enqueue_if_new(T* p) {
size_t card_index = ct()->index_for(p);
// If the card hasn't been added to the buffer, do it.
if (_last_enqueued_card != card_index) {
_rdc_local_qset.enqueue(ct()->byte_for_index(card_index));
_last_enqueued_card = card_index;
template <class T> bool G1ParScanThreadState::mark_if_new(T* p, bool into_new_survivor) {
G1CardTable::CardValue* card = ct()->byte_for(p);
G1CardTable::CardValue value = *card;
if (value == G1CardTable::clean_card_val()) {
*card = into_new_survivor ? G1CardTable::g1_to_cset_card : G1CardTable::g1_dirty_card;
return true;
} else {
return false;
}
}
template <class T> void G1ParScanThreadState::enqueue_card_into_evac_fail_region(T* p, oop obj) {
template <class T> void G1ParScanThreadState::mark_card_into_evac_fail_region(T* p, oop obj) {
assert(!G1HeapRegion::is_in_same_region(p, obj), "Should have filtered out cross-region references already.");
assert(!_g1h->heap_region_containing(p)->is_survivor(), "Should have filtered out from-newly allocated survivor references already.");
assert(_g1h->heap_region_containing(obj)->in_collection_set(), "Only for enqeueing reference into collection set region");
if (enqueue_if_new(p)) {
_evac_failure_enqueued_cards++;
if (mark_if_new(p, false /* into_new_survivor */)) { // The reference is never into survivor regions.
_num_cards_from_evac_failure++;
}
}
@ -137,18 +136,18 @@ template <class T> void G1ParScanThreadState::write_ref_field_post(T* p, oop obj
if (dest_attr.is_in_cset()) {
assert(obj->is_forwarded(), "evac-failed but not forwarded: " PTR_FORMAT, p2i(obj));
assert(obj->forwardee() == obj, "evac-failed but not self-forwarded: " PTR_FORMAT, p2i(obj));
enqueue_card_into_evac_fail_region(p, obj);
mark_card_into_evac_fail_region(p, obj);
return;
}
enqueue_card_if_tracked(dest_attr, p, obj);
mark_card_if_tracked(dest_attr, p, obj);
}
template <class T> void G1ParScanThreadState::enqueue_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o) {
template <class T> void G1ParScanThreadState::mark_card_if_tracked(G1HeapRegionAttr region_attr, T* p, oop o) {
assert(!G1HeapRegion::is_in_same_region(p, o), "Should have filtered out cross-region references already.");
assert(!_g1h->heap_region_containing(p)->is_survivor(), "Should have filtered out from-newly allocated survivor references already.");
// We relabel all regions that failed evacuation as old gen without remembered,
// and so pre-filter them out in the caller.
assert(!_g1h->heap_region_containing(o)->in_collection_set(), "Should not try to enqueue reference into collection set region");
assert(!_g1h->heap_region_containing(o)->in_collection_set(), "Should not try to mark reference into collection set region");
#ifdef ASSERT
G1HeapRegion* const hr_obj = _g1h->heap_region_containing(o);
@ -161,7 +160,14 @@ template <class T> void G1ParScanThreadState::enqueue_card_if_tracked(G1HeapRegi
if (!region_attr.remset_is_tracked()) {
return;
}
enqueue_if_new(p);
bool into_survivor = region_attr.is_new_survivor();
if (mark_if_new(p, into_survivor)) {
if (into_survivor) {
_num_cards_marked_to_cset++;
} else {
_num_cards_marked_dirty++;
}
}
}
#endif // SHARE_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP

View File

@ -67,8 +67,7 @@ G1Policy::G1Policy(STWGCTimer* gc_timer) :
_reserve_regions(0),
_young_gen_sizer(),
_free_regions_at_end_of_collection(0),
_card_rs_length(0),
_pending_cards_at_gc_start(0),
_pending_cards_from_gc(0),
_concurrent_start_to_mixed(),
_collection_set(nullptr),
_g1h(nullptr),
@ -553,12 +552,9 @@ G1GCPhaseTimes* G1Policy::phase_times() const {
return _phase_times;
}
void G1Policy::revise_young_list_target_length(size_t card_rs_length, size_t code_root_rs_length) {
void G1Policy::revise_young_list_target_length(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length) {
guarantee(use_adaptive_young_list_length(), "should not call this otherwise" );
size_t thread_buffer_cards = _analytics->predict_dirtied_cards_in_thread_buffers();
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
size_t pending_cards = dcqs.num_cards() + thread_buffer_cards;
update_young_length_bounds(pending_cards, card_rs_length, code_root_rs_length);
}
@ -567,7 +563,7 @@ void G1Policy::record_full_collection_start() {
// Release the future to-space so that it is available for compaction into.
collector_state()->set_in_young_only_phase(false);
collector_state()->set_in_full_gc(true);
_pending_cards_at_gc_start = 0;
_collection_set->abandon_all_candidates();
}
void G1Policy::record_full_collection_end() {
@ -600,59 +596,70 @@ void G1Policy::record_full_collection_end() {
record_pause(G1GCPauseType::FullGC, start_time_sec, end_sec);
}
static void log_refinement_stats(const char* kind, const G1ConcurrentRefineStats& stats) {
static void log_refinement_stats(const G1ConcurrentRefineStats& stats) {
log_debug(gc, refine, stats)
("%s refinement: %.2fms, refined: %zu"
", precleaned: %zu, dirtied: %zu",
kind,
stats.refinement_time().seconds() * MILLIUNITS,
("Refinement: sweep: %.2fms, yield: %.2fms refined: %zu, dirtied: %zu",
TimeHelper::counter_to_millis(stats.sweep_duration()),
TimeHelper::counter_to_millis(stats.yield_during_sweep_duration()),
stats.refined_cards(),
stats.precleaned_cards(),
stats.dirtied_cards());
stats.cards_pending());
}
void G1Policy::record_concurrent_refinement_stats(size_t pending_cards,
size_t thread_buffer_cards) {
_pending_cards_at_gc_start = pending_cards;
_analytics->report_dirtied_cards_in_thread_buffers(thread_buffer_cards);
// Collect per-thread stats, mostly from mutator activity.
G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
G1ConcurrentRefineStats mut_stats = dcqs.concatenated_refinement_stats();
// Collect specialized concurrent refinement thread stats.
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
G1ConcurrentRefineStats cr_stats = cr->get_and_reset_refinement_stats();
G1ConcurrentRefineStats total_stats = mut_stats + cr_stats;
log_refinement_stats("Mutator", mut_stats);
log_refinement_stats("Concurrent", cr_stats);
log_refinement_stats("Total", total_stats);
void G1Policy::record_refinement_stats(G1ConcurrentRefineStats* refine_stats) {
log_refinement_stats(*refine_stats);
// Record the rate at which cards were refined.
// Don't update the rate if the current sample is empty or time is zero.
Tickspan refinement_time = total_stats.refinement_time();
size_t refined_cards = total_stats.refined_cards();
if ((refined_cards > 0) && (refinement_time > Tickspan())) {
double rate = refined_cards / (refinement_time.seconds() * MILLIUNITS);
// Don't update the rate if the current sample is empty or time is zero (which is
// the case during GC).
double refinement_time = TimeHelper::counter_to_millis(refine_stats->sweep_duration());
size_t refined_cards = refine_stats->refined_cards();
if ((refined_cards > 0) && (refinement_time > 0)) {
double rate = refined_cards / refinement_time;
_analytics->report_concurrent_refine_rate_ms(rate);
log_debug(gc, refine, stats)("Concurrent refinement rate: %.2f cards/ms", rate);
log_debug(gc, refine, stats)("Concurrent refinement rate: %.2f cards/ms predicted: %.2f cards/ms", rate, _analytics->predict_concurrent_refine_rate_ms());
}
}
template<typename T>
static T saturated_sub(T x, T y) {
return (x < y) ? T() : (x - y);
}
void G1Policy::record_dirtying_stats(double last_mutator_start_dirty_ms,
double last_mutator_end_dirty_ms,
size_t pending_cards,
double yield_duration_ms,
size_t next_pending_cards_from_gc,
size_t next_to_collection_set_cards) {
assert(SafepointSynchronize::is_at_safepoint() || G1ReviseYoungLength_lock->is_locked(),
"must be (at safepoint %s locked %s)",
BOOL_TO_STR(SafepointSynchronize::is_at_safepoint()), BOOL_TO_STR(G1ReviseYoungLength_lock->is_locked()));
// Record mutator's card logging rate.
double mut_start_time = _analytics->prev_collection_pause_end_ms();
double mut_end_time = cur_pause_start_sec() * MILLIUNITS;
double mut_time = mut_end_time - mut_start_time;
// Unlike above for conc-refine rate, here we should not require a
// non-empty sample, since an application could go some time with only
// young-gen or filtered out writes. But we'll ignore unusually short
// sample periods, as they may just pollute the predictions.
if (mut_time > 1.0) { // Require > 1ms sample time.
double dirtied_rate = total_stats.dirtied_cards() / mut_time;
double const mutator_dirty_time_ms = (last_mutator_end_dirty_ms - last_mutator_start_dirty_ms) - yield_duration_ms;
assert(mutator_dirty_time_ms >= 0.0,
"must be (start: %.2f end: %.2f yield: %.2f)",
last_mutator_start_dirty_ms, last_mutator_end_dirty_ms, yield_duration_ms);
if (mutator_dirty_time_ms > 1.0) { // Require > 1ms sample time.
// The subtractive term is pending_cards_from_gc() which includes both dirtied and dirty-as-young cards,
// which can be larger than what is actually considered as "pending" (dirty cards only).
size_t dirtied_cards = saturated_sub(pending_cards, pending_cards_from_gc());
double dirtied_rate = dirtied_cards / mutator_dirty_time_ms;
_analytics->report_dirtied_cards_rate_ms(dirtied_rate);
log_debug(gc, refine, stats)("Generate dirty cards rate: %.2f cards/ms", dirtied_rate);
log_debug(gc, refine, stats)("Generate dirty cards rate: %.2f cards/ms dirtying time %.2f (start %.2f end %.2f yield %.2f) dirtied %zu (pending %zu during_gc %zu)",
dirtied_rate,
mutator_dirty_time_ms,
last_mutator_start_dirty_ms, last_mutator_end_dirty_ms, yield_duration_ms,
dirtied_cards, pending_cards, pending_cards_from_gc());
}
_pending_cards_from_gc = next_pending_cards_from_gc;
_to_collection_set_cards = next_to_collection_set_cards;
}
bool G1Policy::should_retain_evac_failed_region(uint index) const {
@ -761,27 +768,27 @@ bool G1Policy::concurrent_operation_is_full_mark(const char* msg) {
((_g1h->gc_cause() != GCCause::_g1_humongous_allocation) || need_to_start_conc_mark(msg));
}
double G1Policy::logged_cards_processing_time() const {
double G1Policy::pending_cards_processing_time() const {
double all_cards_processing_time = average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR);
size_t logged_dirty_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
size_t pending_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
size_t scan_heap_roots_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
double merge_logged_cards_time = average_time_ms(G1GCPhaseTimes::MergeLB) +
phase_times()->cur_distribute_log_buffers_time_ms();
double merge_pending_cards_time = phase_times()->cur_merge_refinement_table_time();
// Approximate the time spent processing cards from log buffers by scaling
// the total processing time by the ratio of logged cards to total cards
// Approximate the time spent processing cards from pending cards by scaling
// the total processing time by the ratio of pending cards to total cards
// processed. There might be duplicate cards in different log buffers,
// leading to an overestimate. That effect should be relatively small
// unless there are few cards to process, because cards in buffers are
// dirtied to limit duplication. Also need to avoid scaling when both
// counts are zero, which happens especially during early GCs. So ascribe
// all of the time to the logged cards unless there are more total cards.
if (logged_dirty_cards >= scan_heap_roots_cards) {
return all_cards_processing_time + merge_logged_cards_time;
// all of the time to the pending cards unless there are more total cards.
if (pending_cards >= scan_heap_roots_cards) {
return all_cards_processing_time + merge_pending_cards_time;
}
return (all_cards_processing_time * logged_dirty_cards / scan_heap_roots_cards) + merge_logged_cards_time;
return (all_cards_processing_time * pending_cards / scan_heap_roots_cards) + merge_pending_cards_time;
}
// Anything below that is considered to be zero
@ -815,6 +822,22 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
// We make the assumption that these are rare.
bool update_stats = !allocation_failure;
size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
// Number of scanned cards with "Dirty" value (and nothing else).
size_t const pending_cards_from_refinement_table = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
// Number of cards actually merged in the Merge RS phase. MergeRSCards below includes the cards from the Eager Reclaim phase.
size_t const merged_cards_from_card_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSFromRemSetCards) +
p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSFromRemSetCards);
// Number of cards attempted to merge in the Merge RS phase.
size_t const total_cards_from_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSTotalCards) +
p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSTotalCards);
// Cards marked as being to collection set. May be inaccurate due to races.
size_t const total_non_young_rs_cards = MIN2(pending_cards_from_refinement_table + merged_cards_from_card_rs, total_cards_scanned);
if (update_stats) {
// We maintain the invariant that all objects allocated by mutator
// threads will be allocated out of eden regions. So, we can use
@ -827,6 +850,98 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
uint regions_allocated = _collection_set->eden_region_length();
double alloc_rate_ms = (double) regions_allocated / app_time_ms;
_analytics->report_alloc_rate_ms(alloc_rate_ms);
double merge_refinement_table_time = p->cur_merge_refinement_table_time();
if (merge_refinement_table_time != 0.0) {
_analytics->report_merge_refinement_table_time_ms(merge_refinement_table_time);
}
if (merged_cards_from_card_rs >= G1NumCardsCostSampleThreshold) {
double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
average_time_ms(G1GCPhaseTimes::MergeRS) +
average_time_ms(G1GCPhaseTimes::OptMergeRS);
_analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / merged_cards_from_card_rs, is_young_only_pause);
log_debug(gc, ergo, cset)("cost per card merge (young %s): avg time %.2f merged cards %zu cost(1m) %.2f pred_cost(1m-yo) %.2f pred_cost(1m-old) %.2f",
BOOL_TO_STR(is_young_only_pause),
avg_time_merge_cards, merged_cards_from_card_rs, 1e6 * avg_time_merge_cards / merged_cards_from_card_rs, _analytics->predict_card_merge_time_ms(1e6, true), _analytics->predict_card_merge_time_ms(1e6, false));
} else {
log_debug(gc, ergo, cset)("cost per card merge (young: %s): skipped, total cards %zu", BOOL_TO_STR(is_young_only_pause), total_non_young_rs_cards);
}
// Update prediction for card scan
if (total_cards_scanned >= G1NumCardsCostSampleThreshold) {
double avg_card_scan_time = average_time_ms(G1GCPhaseTimes::ScanHR) +
average_time_ms(G1GCPhaseTimes::OptScanHR);
_analytics->report_cost_per_card_scan_ms(avg_card_scan_time / total_cards_scanned, is_young_only_pause);
log_debug(gc, ergo, cset)("cost per card scan (young: %s): avg time %.2f total cards %zu cost(1m) %.2f pred_cost(1m-yo) %.2f pred_cost(1m-old) %.2f",
BOOL_TO_STR(is_young_only_pause),
avg_card_scan_time, total_cards_scanned, 1e6 * avg_card_scan_time / total_cards_scanned, _analytics->predict_card_scan_time_ms(1e6, true), _analytics->predict_card_scan_time_ms(1e6, false));
} else {
log_debug(gc, ergo, cset)("cost per card scan (young: %s): skipped, total cards %zu", BOOL_TO_STR(is_young_only_pause), total_cards_scanned);
}
// Update prediction for the ratio between cards actually merged onto the card
// table from the remembered sets and the total number of cards attempted to
// merge.
double merge_to_scan_ratio = 1.0;
if (total_cards_from_rs > 0) {
merge_to_scan_ratio = (double)merged_cards_from_card_rs / total_cards_from_rs;
}
_analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, is_young_only_pause);
// Update prediction for code root scan
size_t const total_code_roots_scanned = p->sum_thread_work_items(G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods) +
p->sum_thread_work_items(G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods);
if (total_code_roots_scanned >= G1NumCodeRootsCostSampleThreshold) {
double avg_time_code_root_scan = average_time_ms(G1GCPhaseTimes::CodeRoots) +
average_time_ms(G1GCPhaseTimes::OptCodeRoots);
_analytics->report_cost_per_code_root_scan_ms(avg_time_code_root_scan / total_code_roots_scanned, is_young_only_pause);
}
// Update prediction for copy cost per byte
size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
if (copied_bytes > 0) {
double avg_copy_time = average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy);
double cost_per_byte_ms = avg_copy_time / copied_bytes;
_analytics->report_cost_per_byte_ms(cost_per_byte_ms, is_young_only_pause);
}
if (_collection_set->young_region_length() > 0) {
_analytics->report_young_other_cost_per_region_ms(young_other_time_ms() /
_collection_set->young_region_length());
}
if (_collection_set->initial_old_region_length() > 0) {
_analytics->report_non_young_other_cost_per_region_ms(non_young_other_time_ms() /
_collection_set->initial_old_region_length());
}
_analytics->report_constant_other_time_ms(constant_other_time_ms(pause_time_ms));
_analytics->report_pending_cards(pending_cards_from_refinement_table, is_young_only_pause);
_analytics->report_card_rs_length(total_cards_scanned - total_non_young_rs_cards, is_young_only_pause);
_analytics->report_code_root_rs_length((double)total_code_roots_scanned, is_young_only_pause);
}
{
double mutator_end_time = cur_pause_start_sec() * MILLIUNITS;
G1ConcurrentRefineStats* stats = _g1h->concurrent_refine()->sweep_state().stats();
// Record any available refinement statistics.
record_refinement_stats(stats);
double yield_duration_ms = TimeHelper::counter_to_millis(_g1h->yield_duration_in_refinement_epoch());
record_dirtying_stats(TimeHelper::counter_to_millis(_g1h->last_refinement_epoch_start()),
mutator_end_time,
pending_cards_from_refinement_table,
yield_duration_ms,
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSPendingCards),
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSToYoungGenCards));
}
record_pause(this_pause, start_time_sec, end_time_sec, allocation_failure);
@ -857,82 +972,6 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
_eden_surv_rate_group->start_adding_regions();
if (update_stats) {
// Update prediction for card merge.
size_t const merged_cards_from_log_buffers = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
// MergeRSCards includes the cards from the Eager Reclaim phase.
size_t const merged_cards_from_rs = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSCards) +
p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSCards);
size_t const total_cards_merged = merged_cards_from_rs +
merged_cards_from_log_buffers;
if (total_cards_merged >= G1NumCardsCostSampleThreshold) {
double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
average_time_ms(G1GCPhaseTimes::MergeRS) +
average_time_ms(G1GCPhaseTimes::MergeLB) +
p->cur_distribute_log_buffers_time_ms() +
average_time_ms(G1GCPhaseTimes::OptMergeRS);
_analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, is_young_only_pause);
}
// Update prediction for card scan
size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
if (total_cards_scanned >= G1NumCardsCostSampleThreshold) {
double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) +
average_time_ms(G1GCPhaseTimes::OptScanHR);
_analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, is_young_only_pause);
}
// Update prediction for the ratio between cards from the remembered
// sets and actually scanned cards from the remembered sets.
// Due to duplicates in the log buffers, the number of scanned cards
// can be smaller than the cards in the log buffers.
const size_t scanned_cards_from_rs = (total_cards_scanned > merged_cards_from_log_buffers) ? total_cards_scanned - merged_cards_from_log_buffers : 0;
double scan_to_merge_ratio = 0.0;
if (merged_cards_from_rs > 0) {
scan_to_merge_ratio = (double)scanned_cards_from_rs / merged_cards_from_rs;
}
_analytics->report_card_scan_to_merge_ratio(scan_to_merge_ratio, is_young_only_pause);
// Update prediction for code root scan
size_t const total_code_roots_scanned = p->sum_thread_work_items(G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods) +
p->sum_thread_work_items(G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::CodeRootsScannedNMethods);
if (total_code_roots_scanned >= G1NumCodeRootsCostSampleThreshold) {
double avg_time_code_root_scan = average_time_ms(G1GCPhaseTimes::CodeRoots) +
average_time_ms(G1GCPhaseTimes::OptCodeRoots);
_analytics->report_cost_per_code_root_scan_ms(avg_time_code_root_scan / total_code_roots_scanned, is_young_only_pause);
}
// Update prediction for copy cost per byte
size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
if (copied_bytes > 0) {
double cost_per_byte_ms = (average_time_ms(G1GCPhaseTimes::ObjCopy) + average_time_ms(G1GCPhaseTimes::OptObjCopy)) / copied_bytes;
_analytics->report_cost_per_byte_ms(cost_per_byte_ms, is_young_only_pause);
}
if (_collection_set->young_region_length() > 0) {
_analytics->report_young_other_cost_per_region_ms(young_other_time_ms() /
_collection_set->young_region_length());
}
if (_collection_set->initial_old_region_length() > 0) {
_analytics->report_non_young_other_cost_per_region_ms(non_young_other_time_ms() /
_collection_set->initial_old_region_length());
}
_analytics->report_constant_other_time_ms(constant_other_time_ms(pause_time_ms));
_analytics->report_pending_cards((double)pending_cards_at_gc_start(), is_young_only_pause);
_analytics->report_card_rs_length((double)_card_rs_length, is_young_only_pause);
_analytics->report_code_root_rs_length((double)total_code_roots_scanned, is_young_only_pause);
}
assert(!(G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause) && collector_state()->mark_or_rebuild_in_progress()),
"If the last pause has been concurrent start, we should not have been in the marking window");
if (G1GCPauseTypeHelper::is_concurrent_start_pause(this_pause)) {
@ -963,29 +1002,26 @@ void G1Policy::record_young_collection_end(bool concurrent_operation_is_full_mar
}
// Note that _mmu_tracker->max_gc_time() returns the time in seconds.
double logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
double pending_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
double const logged_cards_time_ms = logged_cards_processing_time();
size_t logged_cards =
phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB,
G1GCPhaseTimes::MergeLBDirtyCards);
bool exceeded_goal = logged_cards_time_goal_ms < logged_cards_time_ms;
size_t predicted_thread_buffer_cards = _analytics->predict_dirtied_cards_in_thread_buffers();
double const pending_cards_time_ms = pending_cards_processing_time();
size_t pending_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRPendingCards) +
phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRPendingCards);
bool exceeded_goal = pending_cards_time_goal_ms < pending_cards_time_ms;
G1ConcurrentRefine* cr = _g1h->concurrent_refine();
log_debug(gc, ergo, refine)
("GC refinement: goal: %zu + %zu / %1.2fms, actual: %zu / %1.2fms, %s",
("GC refinement: goal: %zu / %1.2fms, actual: %zu / %1.2fms, %s",
cr->pending_cards_target(),
predicted_thread_buffer_cards,
logged_cards_time_goal_ms,
logged_cards,
logged_cards_time_ms,
pending_cards_time_goal_ms,
pending_cards,
pending_cards_time_ms,
(exceeded_goal ? " (exceeded goal)" : ""));
cr->adjust_after_gc(logged_cards_time_ms,
logged_cards,
predicted_thread_buffer_cards,
logged_cards_time_goal_ms);
cr->adjust_after_gc(pending_cards_time_ms,
pending_cards,
pending_cards_time_goal_ms);
}
G1IHOPControl* G1Policy::create_ihop_control(const G1OldGenAllocationTracker* old_gen_alloc_tracker,
@ -1057,34 +1093,27 @@ double G1Policy::predict_base_time_ms(size_t pending_cards,
size_t code_root_rs_length) const {
bool in_young_only_phase = collector_state()->in_young_only_phase();
size_t unique_cards_from_rs = _analytics->predict_scan_card_num(card_rs_length, in_young_only_phase);
// Assume that all cards from the log buffers will be scanned, i.e. there are no
// duplicates in that set.
size_t effective_scanned_cards = unique_cards_from_rs + pending_cards;
// Cards from the refinement table and the cards from the young gen remset are
// unique to each other as they are located on the card table.
size_t effective_scanned_cards = card_rs_length + pending_cards;
double card_merge_time = _analytics->predict_card_merge_time_ms(pending_cards + card_rs_length, in_young_only_phase);
double refinement_table_merge_time = _analytics->predict_merge_refinement_table_time_ms();
double card_scan_time = _analytics->predict_card_scan_time_ms(effective_scanned_cards, in_young_only_phase);
double code_root_scan_time = _analytics->predict_code_root_scan_time_ms(code_root_rs_length, in_young_only_phase);
double constant_other_time = _analytics->predict_constant_other_time_ms();
double survivor_evac_time = predict_survivor_regions_evac_time();
double total_time = card_merge_time + card_scan_time + code_root_scan_time + constant_other_time + survivor_evac_time;
double total_time = refinement_table_merge_time + card_scan_time + code_root_scan_time + constant_other_time + survivor_evac_time;
log_trace(gc, ergo, heap)("Predicted base time: total %f lb_cards %zu card_rs_length %zu effective_scanned_cards %zu "
"card_merge_time %f card_scan_time %f code_root_rs_length %zu code_root_scan_time %f "
"refinement_table_merge_time %f card_scan_time %f code_root_rs_length %zu code_root_scan_time %f "
"constant_other_time %f survivor_evac_time %f",
total_time, pending_cards, card_rs_length, effective_scanned_cards,
card_merge_time, card_scan_time, code_root_rs_length, code_root_scan_time,
refinement_table_merge_time, card_scan_time, code_root_rs_length, code_root_scan_time,
constant_other_time, survivor_evac_time);
return total_time;
}
double G1Policy::predict_base_time_ms(size_t pending_cards) const {
bool for_young_only_phase = collector_state()->in_young_only_phase();
size_t card_rs_length = _analytics->predict_card_rs_length(for_young_only_phase);
return predict_base_time_ms(pending_cards, card_rs_length);
}
double G1Policy::predict_base_time_ms(size_t pending_cards, size_t card_rs_length) const {
bool for_young_only_phase = collector_state()->in_young_only_phase();
size_t code_root_rs_length = _analytics->predict_code_root_rs_length(for_young_only_phase);
@ -1428,6 +1457,64 @@ size_t G1Policy::allowed_waste_in_collection_set() const {
return G1HeapWastePercent * _g1h->capacity() / 100;
}
bool G1Policy::try_get_available_bytes_estimate(size_t& available_bytes) const {
// Getting used young bytes requires holding Heap_lock. But we can't use
// normal lock and block until available. Blocking on the lock could
// deadlock with a GC VMOp that is holding the lock and requesting a
// safepoint. Instead try to lock, and return the result of that attempt,
// and the estimate if successful.
if (Heap_lock->try_lock()) {
size_t used_bytes = estimate_used_young_bytes_locked();
Heap_lock->unlock();
size_t young_bytes = young_list_target_length() * G1HeapRegion::GrainBytes;
available_bytes = young_bytes - MIN2(young_bytes, used_bytes);
return true;
} else {
available_bytes = 0;
return false;
}
}
double G1Policy::predict_time_to_next_gc_ms(size_t available_bytes) const {
double alloc_region_rate = _analytics->predict_alloc_rate_ms();
double alloc_bytes_rate = alloc_region_rate * G1HeapRegion::GrainBytes;
if (alloc_bytes_rate == 0.0) {
// A zero rate indicates we don't yet have data to use for predictions.
// Since we don't have any idea how long until the next GC, use a time of
// zero.
return 0.0;
} else {
// If the heap size is large and the allocation rate is small, we can get
// a predicted time until next GC that is so large it can cause problems
// (such as overflow) in other calculations. Limit the prediction to one
// hour, which is still large in this context.
const double one_hour_ms = 60.0 * 60.0 * MILLIUNITS;
double raw_time_ms = available_bytes / alloc_bytes_rate;
return MIN2(raw_time_ms, one_hour_ms);
}
}
uint64_t G1Policy::adjust_wait_time_ms(double wait_time_ms, uint64_t min_time_ms) {
return MAX2(static_cast<uint64_t>(sqrt(wait_time_ms) * 4.0), min_time_ms);
}
double G1Policy::last_mutator_dirty_start_time_ms() {
return TimeHelper::counter_to_millis(_g1h->last_refinement_epoch_start());
}
size_t G1Policy::current_pending_cards() {
double now = os::elapsedTime() * MILLIUNITS;
return _pending_cards_from_gc + _analytics->predict_dirtied_cards_rate_ms() * (now - last_mutator_dirty_start_time_ms());
}
size_t G1Policy::current_to_collection_set_cards() {
// The incremental part is covered by the dirtied_cards_rate, i.e. pending cards
// cover both to collection set cards and other interesting cards because we do not
// know which is which until we look.
return _to_collection_set_cards;
}
uint G1Policy::min_retained_old_cset_length() const {
// Guarantee some progress with retained regions regardless of available time by
// taking at least one region.

View File

@ -48,6 +48,7 @@ class G1HeapRegion;
class G1CollectionSet;
class G1CollectionSetCandidates;
class G1CollectionSetChooser;
class G1ConcurrentRefineStats;
class G1IHOPControl;
class G1Analytics;
class G1SurvivorRegions;
@ -101,9 +102,18 @@ class G1Policy: public CHeapObj<mtGC> {
uint _free_regions_at_end_of_collection;
size_t _card_rs_length;
size_t _pending_cards_at_gc_start;
// Tracks the number of cards marked as dirty (only) during garbage collection
// (evacuation) on the card table.
// This is needed to properly account for those cards in the heuristics to start
// refinement at the correct time which needs to know how many cards are currently
// approximately on the card table.
// After the first completed refinement sweep of the refinement table between two
// garbage collections this value is reset to zero as that refinement processed all
// those cards.
size_t _pending_cards_from_gc;
// Tracks the approximate number of cards found as to-collection-set by either the
// garbage collection or the most recent refinement sweep.
size_t _to_collection_set_cards;
G1ConcurrentStartToMixedTimeTracker _concurrent_start_to_mixed;
@ -111,7 +121,7 @@ class G1Policy: public CHeapObj<mtGC> {
return collector_state()->in_young_only_phase() && !collector_state()->mark_or_rebuild_in_progress();
}
double logged_cards_processing_time() const;
double pending_cards_processing_time() const;
public:
const G1Predictions& predictor() const { return _predictor; }
const G1Analytics* analytics() const { return const_cast<const G1Analytics*>(_analytics); }
@ -129,16 +139,10 @@ public:
hr->install_surv_rate_group(_survivor_surv_rate_group);
}
void record_card_rs_length(size_t num_cards) {
_card_rs_length = num_cards;
}
double cur_pause_start_sec() const {
return _cur_pause_start_sec;
}
double predict_base_time_ms(size_t pending_cards) const;
double predict_base_time_ms(size_t pending_cards, size_t card_rs_length) const;
// Base time contains handling remembered sets and constant other time of the
@ -239,7 +243,13 @@ private:
public:
size_t predict_bytes_to_copy(G1HeapRegion* hr) const;
size_t pending_cards_at_gc_start() const { return _pending_cards_at_gc_start; }
double last_mutator_dirty_start_time_ms();
size_t pending_cards_from_gc() const { return _pending_cards_from_gc; }
size_t current_pending_cards();
size_t current_to_collection_set_cards();
// GC efficiency for collecting the region based on the time estimate for
// merging and scanning incoming references.
@ -286,7 +296,7 @@ public:
// Check the current value of the young list RSet length and
// compare it against the last prediction. If the current value is
// higher, recalculate the young list target length prediction.
void revise_young_list_target_length(size_t card_rs_length, size_t code_root_rs_length);
void revise_young_list_target_length(size_t pending_cards, size_t card_rs_length, size_t code_root_rs_length);
// This should be called after the heap is resized.
void record_new_heap_size(uint new_number_of_regions);
@ -325,7 +335,6 @@ public:
// Amount of allowed waste in bytes in the collection set.
size_t allowed_waste_in_collection_set() const;
private:
// Predict the number of bytes of surviving objects from survivor and old
@ -359,17 +368,39 @@ public:
bool use_adaptive_young_list_length() const;
// Try to get an estimate of the currently available bytes in the young gen. This
// operation considers itself low-priority: if other threads need the resources
// required to get the information, return false to indicate that the caller
// should retry "soon".
bool try_get_available_bytes_estimate(size_t& bytes) const;
// Estimate time until next GC, based on remaining bytes available for
// allocation and the allocation rate.
double predict_time_to_next_gc_ms(size_t available_bytes) const;
// Adjust wait times to make them less frequent the longer the next GC is away.
// But don't increase the wait time too rapidly, further bound it by min_time_ms.
// This reduces the number of thread wakeups that just immediately
// go back to waiting, while still being responsive to behavior changes.
uint64_t adjust_wait_time_ms(double wait_time_ms, uint64_t min_time_ms);
private:
// Return an estimate of the number of bytes used in young gen.
// precondition: holding Heap_lock
size_t estimate_used_young_bytes_locked() const;
public:
void transfer_survivors_to_cset(const G1SurvivorRegions* survivors);
// Record and log stats and pending cards before not-full collection.
// thread_buffer_cards is the number of cards that were in per-thread
// buffers. pending_cards includes thread_buffer_cards.
void record_concurrent_refinement_stats(size_t pending_cards,
size_t thread_buffer_cards);
// Record and log stats and pending cards to update predictors.
void record_refinement_stats(G1ConcurrentRefineStats* stats);
void record_dirtying_stats(double last_mutator_start_dirty_ms,
double last_mutator_end_dirty_ms,
size_t pending_cards,
double yield_duration,
size_t next_pending_cards_from_gc,
size_t next_to_collection_set_cards);
bool should_retain_evac_failed_region(G1HeapRegion* r) const {
return should_retain_evac_failed_region(r->hrm_index());

View File

@ -1,148 +0,0 @@
/*
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/shared/bufferNode.hpp"
#include "runtime/atomicAccess.hpp"
#include "utilities/debug.hpp"
#include "utilities/macros.hpp"
// G1RedirtyCardsLocalQueueSet
G1RedirtyCardsLocalQueueSet::G1RedirtyCardsLocalQueueSet(G1RedirtyCardsQueueSet* shared_qset) :
PtrQueueSet(shared_qset->allocator()),
_shared_qset(shared_qset),
_buffers(),
_queue(this)
{}
#ifdef ASSERT
G1RedirtyCardsLocalQueueSet::~G1RedirtyCardsLocalQueueSet() {
assert(_buffers._head == nullptr, "unflushed qset");
assert(_buffers._tail == nullptr, "invariant");
assert(_buffers._entry_count == 0, "invariant");
}
#endif // ASSERT
void G1RedirtyCardsLocalQueueSet::enqueue_completed_buffer(BufferNode* node) {
_buffers._entry_count += node->size();
node->set_next(_buffers._head);
_buffers._head = node;
if (_buffers._tail == nullptr) {
_buffers._tail = node;
}
}
void G1RedirtyCardsLocalQueueSet::enqueue(void* value) {
if (!try_enqueue(_queue, value)) {
BufferNode* old_node = exchange_buffer_with_new(_queue);
if (old_node != nullptr) {
enqueue_completed_buffer(old_node);
}
retry_enqueue(_queue, value);
}
}
BufferNodeList G1RedirtyCardsLocalQueueSet::flush() {
flush_queue(_queue);
BufferNodeList cur_buffers = _buffers;
_shared_qset->add_bufferlist(_buffers);
_buffers = BufferNodeList();
return cur_buffers;
}
// G1RedirtyCardsLocalQueueSet::Queue
G1RedirtyCardsLocalQueueSet::Queue::Queue(G1RedirtyCardsLocalQueueSet* qset) :
PtrQueue(qset)
{}
#ifdef ASSERT
G1RedirtyCardsLocalQueueSet::Queue::~Queue() {
assert(buffer() == nullptr, "unflushed queue");
}
#endif // ASSERT
// G1RedirtyCardsQueueSet
G1RedirtyCardsQueueSet::G1RedirtyCardsQueueSet(BufferNode::Allocator* allocator) :
PtrQueueSet(allocator),
_list(),
_entry_count(0),
_tail(nullptr)
DEBUG_ONLY(COMMA _collecting(true))
{}
G1RedirtyCardsQueueSet::~G1RedirtyCardsQueueSet() {
verify_empty();
}
#ifdef ASSERT
void G1RedirtyCardsQueueSet::verify_empty() const {
assert(_list.empty(), "precondition");
assert(_tail == nullptr, "invariant");
assert(_entry_count == 0, "invariant");
}
#endif // ASSERT
BufferNode* G1RedirtyCardsQueueSet::all_completed_buffers() const {
DEBUG_ONLY(_collecting = false;)
return _list.top();
}
BufferNodeList G1RedirtyCardsQueueSet::take_all_completed_buffers() {
DEBUG_ONLY(_collecting = false;)
BufferNodeList result(_list.pop_all(), _tail, _entry_count);
_tail = nullptr;
_entry_count = 0;
DEBUG_ONLY(_collecting = true;)
return result;
}
void G1RedirtyCardsQueueSet::update_tail(BufferNode* node) {
// Node is the tail of a (possibly single element) list just prepended to
// _list. If, after that prepend, node's follower is null, then node is
// also the tail of _list, so record it as such.
if (node->next() == nullptr) {
assert(_tail == nullptr, "invariant");
_tail = node;
}
}
void G1RedirtyCardsQueueSet::enqueue_completed_buffer(BufferNode* node) {
assert(_collecting, "precondition");
AtomicAccess::add(&_entry_count, node->size());
_list.push(*node);
update_tail(node);
}
void G1RedirtyCardsQueueSet::add_bufferlist(const BufferNodeList& buffers) {
assert(_collecting, "precondition");
if (buffers._head != nullptr) {
assert(buffers._tail != nullptr, "invariant");
AtomicAccess::add(&_entry_count, buffers._entry_count);
_list.prepend(*buffers._head, *buffers._tail);
update_tail(buffers._tail);
}
}

View File

@ -1,98 +0,0 @@
/*
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
#define SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
#include "gc/shared/bufferNode.hpp"
#include "gc/shared/bufferNodeList.hpp"
#include "gc/shared/ptrQueue.hpp"
#include "memory/padded.hpp"
#include "utilities/macros.hpp"
class G1RedirtyCardsQueueSet;
// A thread-local qset and queue. It provides an uncontended staging
// area for completed buffers, to be flushed to the shared qset en masse.
class G1RedirtyCardsLocalQueueSet : private PtrQueueSet {
class Queue : public PtrQueue {
public:
Queue(G1RedirtyCardsLocalQueueSet* qset);
~Queue() NOT_DEBUG(= default);
};
G1RedirtyCardsQueueSet* _shared_qset;
BufferNodeList _buffers;
Queue _queue;
// Add the buffer to the local list.
virtual void enqueue_completed_buffer(BufferNode* node);
public:
G1RedirtyCardsLocalQueueSet(G1RedirtyCardsQueueSet* shared_qset);
~G1RedirtyCardsLocalQueueSet() NOT_DEBUG(= default);
void enqueue(void* value);
// Transfer all completed buffers to the shared qset.
// Returns the flushed BufferNodeList which is later used
// as a shortcut into the shared qset.
BufferNodeList flush();
};
// Card table entries to be redirtied and the cards reprocessed later.
// Has two phases, collecting and processing. During the collecting
// phase buffers are added to the set. Once collecting is complete and
// processing starts, buffers can no longer be added. Taking all the
// collected (and processed) buffers reverts back to collecting, allowing
// the set to be reused for another round of redirtying.
class G1RedirtyCardsQueueSet : public PtrQueueSet {
DEFINE_PAD_MINUS_SIZE(1, DEFAULT_PADDING_SIZE, 0);
BufferNode::Stack _list;
DEFINE_PAD_MINUS_SIZE(2, DEFAULT_PADDING_SIZE, sizeof(size_t));
volatile size_t _entry_count;
DEFINE_PAD_MINUS_SIZE(3, DEFAULT_PADDING_SIZE, sizeof(BufferNode*));
BufferNode* _tail;
DEBUG_ONLY(mutable bool _collecting;)
void update_tail(BufferNode* node);
public:
G1RedirtyCardsQueueSet(BufferNode::Allocator* allocator);
~G1RedirtyCardsQueueSet();
void verify_empty() const NOT_DEBUG_RETURN;
// Collect buffers. These functions are thread-safe.
// precondition: Must not be concurrent with buffer processing.
virtual void enqueue_completed_buffer(BufferNode* node);
void add_bufferlist(const BufferNodeList& buffers);
// Processing phase operations.
// precondition: Must not be concurrent with buffer collection.
BufferNode* all_completed_buffers() const;
BufferNodeList take_all_completed_buffers();
};
#endif // SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,7 @@
#define SHARE_GC_G1_G1REMSET_HPP
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1CardTableClaimTable.hpp"
#include "gc/g1/g1GCPhaseTimes.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1OopClosures.hpp"
@ -65,20 +66,15 @@ private:
G1CollectedHeap* _g1h;
G1CardTable* _ct;
G1Policy* _g1p;
void print_merge_heap_roots_stats();
G1Policy* _g1p;
void assert_scan_top_is_null(uint hrm_index) NOT_DEBUG_RETURN;
void enqueue_for_reprocessing(CardValue* card_ptr);
public:
// Initialize data that depends on the heap size being known.
void initialize(uint max_num_regions);
G1RemSet(G1CollectedHeap* g1h, G1CardTable* ct);
G1RemSet(G1CollectedHeap* g1h);
~G1RemSet();
// Scan all cards in the non-collection set regions that potentially contain
@ -101,7 +97,7 @@ public:
// Print coarsening stats.
void print_coarsen_stats();
// Creates a task for cleaining up temporary data structures and the
// Creates a task for cleaning up temporary data structures and the
// card table, removing temporary duplicate detection information.
G1AbstractSubTask* create_cleanup_after_scan_heap_roots_task();
// Excludes the given region from heap root scanning.
@ -122,16 +118,19 @@ public:
G1GCPhaseTimes::GCParPhases scan_phase,
G1GCPhaseTimes::GCParPhases objcopy_phase);
// Two methods for concurrent refinement support, executed concurrently to
// the mutator:
// Cleans the card at "*card_ptr_addr" before refinement, returns true iff the
// card needs later refinement.
bool clean_card_before_refine(CardValue** const card_ptr_addr);
enum RefineResult {
HasRefToCSet, // The (dirty) card has a reference to the collection set.
AlreadyToCSet, // The card is already one marked as having a reference to the collection set.
HasRefToOld, // The dirty card contains references to other old regions (not the collection set).
NoCrossRegion, // There is no interesting reference in the card any more. The mutator changed all
// references to such after dirtying the card.
CouldNotParse // The card is unparsable, need to retry later.
};
// Refine the region corresponding to "card_ptr". Must be called after
// being filtered by clean_card_before_refine(), and after proper
// fence/synchronization.
void refine_card_concurrently(CardValue* const card_ptr,
const uint worker_id);
RefineResult refine_card_concurrently(CardValue* const card_ptr,
const uint worker_id);
// Print accumulated summary info from the start of the VM.
void print_summary_info();

View File

@ -27,7 +27,6 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1HeapRegion.hpp"
#include "gc/g1/g1HeapRegionRemSet.inline.hpp"
#include "gc/g1/g1RemSet.hpp"
@ -37,39 +36,61 @@
#include "runtime/javaThread.hpp"
void G1RemSetSummary::update() {
class CollectData : public ThreadClosure {
G1ConcurrentRefine* refine = G1CollectedHeap::heap()->concurrent_refine();
class CollectWorkerData : public ThreadClosure {
G1RemSetSummary* _summary;
uint _counter;
public:
CollectData(G1RemSetSummary * summary) : _summary(summary), _counter(0) {}
CollectWorkerData(G1RemSetSummary* summary) : _summary(summary), _counter(0) {}
virtual void do_thread(Thread* t) {
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
_summary->set_refine_thread_cpu_time(_counter, crt->cpu_time());
_summary->set_worker_thread_cpu_time(_counter, crt->cpu_time());
_counter++;
}
} collector(this);
G1CollectedHeap* g1h = G1CollectedHeap::heap();
g1h->concurrent_refine()->threads_do(&collector);
refine->worker_threads_do(&collector);
class CollectControlData : public ThreadClosure {
G1RemSetSummary* _summary;
public:
CollectControlData(G1RemSetSummary* summary) : _summary(summary) {}
virtual void do_thread(Thread* t) {
G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
_summary->set_control_thread_cpu_time(crt->cpu_time());
}
} control(this);
refine->control_thread_do(&control);
}
void G1RemSetSummary::set_refine_thread_cpu_time(uint thread, jlong value) {
assert(_refine_threads_cpu_times != nullptr, "just checking");
assert(thread < _num_refine_threads, "just checking");
_refine_threads_cpu_times[thread] = value;
void G1RemSetSummary::set_worker_thread_cpu_time(uint thread, jlong value) {
assert(_worker_threads_cpu_times != nullptr, "just checking");
assert(thread < _num_worker_threads, "just checking");
_worker_threads_cpu_times[thread] = value;
}
jlong G1RemSetSummary::refine_thread_cpu_time(uint thread) const {
assert(_refine_threads_cpu_times != nullptr, "just checking");
assert(thread < _num_refine_threads, "just checking");
return _refine_threads_cpu_times[thread];
void G1RemSetSummary::set_control_thread_cpu_time(jlong value) {
_control_thread_cpu_time = value;
}
jlong G1RemSetSummary::worker_thread_cpu_time(uint thread) const {
assert(_worker_threads_cpu_times != nullptr, "just checking");
assert(thread < _num_worker_threads, "just checking");
return _worker_threads_cpu_times[thread];
}
jlong G1RemSetSummary::control_thread_cpu_time() const {
return _control_thread_cpu_time;
}
G1RemSetSummary::G1RemSetSummary(bool should_update) :
_num_refine_threads(G1ConcRefinementThreads),
_refine_threads_cpu_times(NEW_C_HEAP_ARRAY(jlong, _num_refine_threads, mtGC)) {
_num_worker_threads(G1ConcRefinementThreads),
_worker_threads_cpu_times(NEW_C_HEAP_ARRAY(jlong, _num_worker_threads, mtGC)),
_control_thread_cpu_time(0) {
memset(_refine_threads_cpu_times, 0, sizeof(jlong) * _num_refine_threads);
memset(_worker_threads_cpu_times, 0, sizeof(jlong) * _num_worker_threads);
if (should_update) {
update();
@ -77,23 +98,25 @@ G1RemSetSummary::G1RemSetSummary(bool should_update) :
}
G1RemSetSummary::~G1RemSetSummary() {
FREE_C_HEAP_ARRAY(jlong, _refine_threads_cpu_times);
FREE_C_HEAP_ARRAY(jlong, _worker_threads_cpu_times);
}
void G1RemSetSummary::set(G1RemSetSummary* other) {
assert(other != nullptr, "just checking");
assert(_num_refine_threads == other->_num_refine_threads, "just checking");
assert(_num_worker_threads == other->_num_worker_threads, "just checking");
memcpy(_refine_threads_cpu_times, other->_refine_threads_cpu_times, sizeof(jlong) * _num_refine_threads);
memcpy(_worker_threads_cpu_times, other->_worker_threads_cpu_times, sizeof(jlong) * _num_worker_threads);
_control_thread_cpu_time = other->_control_thread_cpu_time;
}
void G1RemSetSummary::subtract_from(G1RemSetSummary* other) {
assert(other != nullptr, "just checking");
assert(_num_refine_threads == other->_num_refine_threads, "just checking");
assert(_num_worker_threads == other->_num_worker_threads, "just checking");
for (uint i = 0; i < _num_refine_threads; i++) {
set_refine_thread_cpu_time(i, other->refine_thread_cpu_time(i) - refine_thread_cpu_time(i));
for (uint i = 0; i < _num_worker_threads; i++) {
set_worker_thread_cpu_time(i, other->worker_thread_cpu_time(i) - worker_thread_cpu_time(i));
}
_control_thread_cpu_time = other->_control_thread_cpu_time - _control_thread_cpu_time;
}
class G1PerRegionTypeRemSetCounters {
@ -376,9 +399,10 @@ public:
void G1RemSetSummary::print_on(outputStream* out, bool show_thread_times) {
if (show_thread_times) {
out->print_cr(" Concurrent refinement threads times (s)");
out->print_cr(" Control %5.2f Workers", (double)control_thread_cpu_time() / NANOSECS_PER_SEC);
out->print(" ");
for (uint i = 0; i < _num_refine_threads; i++) {
out->print(" %5.2f", (double)refine_thread_cpu_time(i) / NANOSECS_PER_SEC);
for (uint i = 0; i < _num_worker_threads; i++) {
out->print(" %5.2f", (double)worker_thread_cpu_time(i) / NANOSECS_PER_SEC);
}
out->cr();
}

View File

@ -33,10 +33,12 @@ class G1RemSet;
// A G1RemSetSummary manages statistical information about the remembered set.
class G1RemSetSummary {
size_t _num_refine_threads;
jlong* _refine_threads_cpu_times;
size_t _num_worker_threads;
jlong* _worker_threads_cpu_times;
jlong _control_thread_cpu_time;
void set_refine_thread_cpu_time(uint thread, jlong value);
void set_worker_thread_cpu_time(uint thread, jlong value);
void set_control_thread_cpu_time(jlong value);
// Update this summary with current data from various places.
void update();
@ -53,7 +55,8 @@ public:
void print_on(outputStream* out, bool show_thread_times);
jlong refine_thread_cpu_time(uint thread) const;
jlong worker_thread_cpu_time(uint thread) const;
jlong control_thread_cpu_time() const;
};
#endif // SHARE_GC_G1_G1REMSETSUMMARY_HPP

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "gc/g1/g1CollectedHeap.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/g1/g1ReviseYoungLengthTask.hpp"
#include "gc/g1/g1ServiceThread.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
jlong G1ReviseYoungLengthTask::reschedule_delay_ms() const {
G1Policy* policy = G1CollectedHeap::heap()->policy();
size_t available_bytes;
if (policy->try_get_available_bytes_estimate(available_bytes)) {
double predicted_time_to_next_gc_ms = policy->predict_time_to_next_gc_ms(available_bytes);
// Use a prime number close to 50ms as minimum time, different to other components
// that derive their wait time from the try_get_available_bytes_estimate() call
// to minimize interference.
uint64_t const min_wait_time_ms = 47;
return policy->adjust_wait_time_ms(predicted_time_to_next_gc_ms, min_wait_time_ms);
} else {
// Failed to get estimate of available bytes. Try again asap.
return 1;
}
}
class G1ReviseYoungLengthTask::RemSetSamplingClosure : public G1HeapRegionClosure {
size_t _sampled_code_root_rs_length;
public:
RemSetSamplingClosure() : _sampled_code_root_rs_length(0) { }
bool do_heap_region(G1HeapRegion* r) override {
G1HeapRegionRemSet* rem_set = r->rem_set();
_sampled_code_root_rs_length += rem_set->code_roots_list_length();
return false;
}
size_t sampled_code_root_rs_length() const { return _sampled_code_root_rs_length; }
};
void G1ReviseYoungLengthTask::adjust_young_list_target_length() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1Policy* policy = g1h->policy();
assert(policy->use_adaptive_young_list_length(), "should not call otherwise");
size_t pending_cards;
size_t current_to_collection_set_cards;
{
MutexLocker x(G1ReviseYoungLength_lock, Mutex::_no_safepoint_check_flag);
pending_cards = policy->current_pending_cards();
current_to_collection_set_cards = policy->current_to_collection_set_cards();
}
RemSetSamplingClosure cl;
g1h->collection_set()->iterate(&cl);
policy->revise_young_list_target_length(pending_cards,
current_to_collection_set_cards,
cl.sampled_code_root_rs_length());
}
G1ReviseYoungLengthTask::G1ReviseYoungLengthTask(const char* name) :
G1ServiceTask(name) { }
void G1ReviseYoungLengthTask::execute() {
SuspendibleThreadSetJoiner sts;
adjust_young_list_target_length();
schedule(reschedule_delay_ms());
}

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
#define SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP
#include "gc/g1/g1CardSetMemory.hpp"
#include "gc/g1/g1HeapRegionRemSet.hpp"
#include "gc/g1/g1MonotonicArenaFreePool.hpp"
#include "gc/g1/g1ServiceThread.hpp"
#include "utilities/growableArray.hpp"
#include "utilities/ticks.hpp"
// ServiceTask to revise the young generation target length.
class G1ReviseYoungLengthTask : public G1ServiceTask {
// The delay used to reschedule this task.
jlong reschedule_delay_ms() const;
class RemSetSamplingClosure; // Helper class for calculating remembered set summary.
// Adjust the target length (in regions) of the young gen, based on the
// current length of the remembered sets.
//
// At the end of the GC G1 determines the length of the young gen based on
// how much time the next GC can take, and when the next GC may occur
// according to the MMU.
//
// The assumption is that a significant part of the GC is spent on scanning
// the remembered sets (and many other components), so this thread constantly
// reevaluates the prediction for the remembered set scanning costs, and potentially
// resizes the young gen. This may do a premature GC or even increase the young
// gen size to keep pause time length goal.
void adjust_young_list_target_length();
public:
explicit G1ReviseYoungLengthTask(const char* name);
void execute() override;
};
#endif // SHARE_GC_G1_G1REVISEYOUNGLENGTHTASK_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,7 +25,7 @@
#define SHARE_GC_G1_G1THREADLOCALDATA_HPP
#include "gc/g1/g1BarrierSet.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1CardTable.hpp"
#include "gc/g1/g1RegionPinCache.hpp"
#include "gc/shared/gc_globals.hpp"
#include "gc/shared/satbMarkQueue.hpp"
@ -36,7 +36,7 @@
class G1ThreadLocalData {
private:
SATBMarkQueue _satb_mark_queue;
G1DirtyCardQueue _dirty_card_queue;
G1CardTable::CardValue* _byte_map_base;
// Per-thread cache of pinned object count to reduce atomic operation traffic
// due to region pinning. Holds the last region where the mutator pinned an
@ -45,8 +45,8 @@ private:
G1ThreadLocalData() :
_satb_mark_queue(&G1BarrierSet::satb_mark_queue_set()),
_dirty_card_queue(&G1BarrierSet::dirty_card_queue_set()),
_pin_cache() {}
_byte_map_base(nullptr),
_pin_cache() { }
static G1ThreadLocalData* data(Thread* thread) {
assert(UseG1GC, "Sanity");
@ -57,10 +57,6 @@ private:
return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _satb_mark_queue);
}
static ByteSize dirty_card_queue_offset() {
return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _dirty_card_queue);
}
public:
static void create(Thread* thread) {
new (data(thread)) G1ThreadLocalData();
@ -74,10 +70,6 @@ public:
return data(thread)->_satb_mark_queue;
}
static G1DirtyCardQueue& dirty_card_queue(Thread* thread) {
return data(thread)->_dirty_card_queue;
}
static ByteSize satb_mark_queue_active_offset() {
return satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active();
}
@ -90,14 +82,20 @@ public:
return satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf();
}
static ByteSize dirty_card_queue_index_offset() {
return dirty_card_queue_offset() + G1DirtyCardQueue::byte_offset_of_index();
static ByteSize card_table_base_offset() {
return Thread::gc_data_offset() + byte_offset_of(G1ThreadLocalData, _byte_map_base);
}
static ByteSize dirty_card_queue_buffer_offset() {
return dirty_card_queue_offset() + G1DirtyCardQueue::byte_offset_of_buf();
static void set_byte_map_base(Thread* thread, G1CardTable::CardValue* new_byte_map_base) {
data(thread)->_byte_map_base = new_byte_map_base;
}
#ifndef PRODUCT
static G1CardTable::CardValue* get_byte_map_base(Thread* thread) {
return data(thread)->_byte_map_base;
}
#endif
static G1RegionPinCache& pin_count_cache(Thread* thread) {
return data(thread)->_pin_cache;
}

View File

@ -39,7 +39,6 @@
#include "gc/g1/g1MonitoringSupport.hpp"
#include "gc/g1/g1ParScanThreadState.inline.hpp"
#include "gc/g1/g1Policy.hpp"
#include "gc/g1/g1RedirtyCardsQueue.hpp"
#include "gc/g1/g1RegionPinCache.inline.hpp"
#include "gc/g1/g1RemSet.hpp"
#include "gc/g1/g1RootProcessor.hpp"
@ -914,13 +913,8 @@ class G1STWRefProcProxyTask : public RefProcProxyTask {
TaskTerminator _terminator;
G1ScannerTasksQueueSet& _task_queues;
// Special closure for enqueuing discovered fields: during enqueue the card table
// may not be in shape to properly handle normal barrier calls (e.g. card marks
// in regions that failed evacuation, scribbling of various values by card table
// scan code). Additionally the regular barrier enqueues into the "global"
// DCQS, but during GC we need these to-be-refined entries in the GC local queue
// so that after clearing the card table, the redirty cards phase will properly
// mark all dirty cards to be picked up by refinement.
// G1 specific closure for marking discovered fields. Need to mark the card in the
// refinement table as the card table is in use by garbage collection.
class G1EnqueueDiscoveredFieldClosure : public EnqueueDiscoveredFieldClosure {
G1CollectedHeap* _g1h;
G1ParScanThreadState* _pss;

View File

@ -45,7 +45,6 @@ class G1MonotonicArenaMemoryStats;
class G1NewTracer;
class G1ParScanThreadStateSet;
class G1Policy;
class G1RedirtyCardsQueueSet;
class G1RemSet;
class G1SurvivorRegions;
class G1YoungGCAllocationFailureInjector;

View File

@ -287,7 +287,7 @@ public:
_chunk_bitmap(mtGC) {
_num_evac_fail_regions = _evac_failure_regions->num_regions_evac_failed();
_num_chunks_per_region = G1CollectedHeap::get_chunks_per_region();
_num_chunks_per_region = G1CollectedHeap::get_chunks_per_region_for_scan();
_chunk_size = static_cast<uint>(G1HeapRegion::GrainWords / _num_chunks_per_region);
@ -300,7 +300,7 @@ public:
double worker_cost() const override {
assert(_evac_failure_regions->has_regions_evac_failed(), "Should not call this if there were no evacuation failures");
double workers_per_region = (double)G1CollectedHeap::get_chunks_per_region() / G1RestoreRetainedRegionChunksPerWorker;
double workers_per_region = (double)G1CollectedHeap::get_chunks_per_region_for_scan() / G1RestoreRetainedRegionChunksPerWorker;
return workers_per_region * _evac_failure_regions->num_regions_evac_failed();
}
@ -480,43 +480,6 @@ public:
}
};
class RedirtyLoggedCardTableEntryClosure : public G1CardTableEntryClosure {
size_t _num_dirtied;
G1CollectedHeap* _g1h;
G1CardTable* _g1_ct;
G1EvacFailureRegions* _evac_failure_regions;
G1HeapRegion* region_for_card(CardValue* card_ptr) const {
return _g1h->heap_region_containing(_g1_ct->addr_for(card_ptr));
}
bool will_become_free(G1HeapRegion* hr) const {
// A region will be freed by during the FreeCollectionSet phase if the region is in the
// collection set and has not had an evacuation failure.
return _g1h->is_in_cset(hr) && !_evac_failure_regions->contains(hr->hrm_index());
}
public:
RedirtyLoggedCardTableEntryClosure(G1CollectedHeap* g1h, G1EvacFailureRegions* evac_failure_regions) :
G1CardTableEntryClosure(),
_num_dirtied(0),
_g1h(g1h),
_g1_ct(g1h->card_table()),
_evac_failure_regions(evac_failure_regions) { }
void do_card_ptr(CardValue* card_ptr) override {
G1HeapRegion* hr = region_for_card(card_ptr);
// Should only dirty cards in regions that won't be freed.
if (!will_become_free(hr)) {
*card_ptr = G1CardTable::dirty_card_val();
_num_dirtied++;
}
}
size_t num_dirtied() const { return _num_dirtied; }
};
class G1PostEvacuateCollectionSetCleanupTask2::ProcessEvacuationFailedRegionsTask : public G1AbstractSubTask {
G1EvacFailureRegions* _evac_failure_regions;
G1HeapRegionClaimer _claimer;
@ -572,48 +535,6 @@ public:
}
};
class G1PostEvacuateCollectionSetCleanupTask2::RedirtyLoggedCardsTask : public G1AbstractSubTask {
BufferNodeList* _rdc_buffers;
uint _num_buffer_lists;
G1EvacFailureRegions* _evac_failure_regions;
public:
RedirtyLoggedCardsTask(G1EvacFailureRegions* evac_failure_regions, BufferNodeList* rdc_buffers, uint num_buffer_lists) :
G1AbstractSubTask(G1GCPhaseTimes::RedirtyCards),
_rdc_buffers(rdc_buffers),
_num_buffer_lists(num_buffer_lists),
_evac_failure_regions(evac_failure_regions) { }
double worker_cost() const override {
// Needs more investigation.
return G1CollectedHeap::heap()->workers()->active_workers();
}
void do_work(uint worker_id) override {
RedirtyLoggedCardTableEntryClosure cl(G1CollectedHeap::heap(), _evac_failure_regions);
uint start = worker_id;
for (uint i = 0; i < _num_buffer_lists; i++) {
uint index = (start + i) % _num_buffer_lists;
BufferNode* next = AtomicAccess::load(&_rdc_buffers[index]._head);
BufferNode* tail = AtomicAccess::load(&_rdc_buffers[index]._tail);
while (next != nullptr) {
BufferNode* node = next;
next = AtomicAccess::cmpxchg(&_rdc_buffers[index]._head, node, (node != tail ) ? node->next() : nullptr);
if (next == node) {
cl.apply_to_buffer(node, worker_id);
next = (node != tail ) ? node->next() : nullptr;
} else {
break; // If there is contention, move to the next BufferNodeList
}
}
}
record_work_item(worker_id, 0, cl.num_dirtied());
}
};
// Helper class to keep statistics for the collection set freeing
class FreeCSetStats {
size_t _before_used_bytes; // Usage in regions successfully evacuate
@ -797,7 +718,6 @@ public:
JFREventForRegion event(r, _worker_id);
TimerForRegion timer(timer_for_region(r));
if (r->is_young()) {
assert_tracks_surviving_words(r);
r->record_surv_words_in_group(_surviving_young_words[r->young_index_in_cset()]);
@ -908,24 +828,34 @@ public:
}
};
class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsTask : public G1AbstractSubTask {
class G1PostEvacuateCollectionSetCleanupTask2::ResizeTLABsAndSwapCardTableTask : public G1AbstractSubTask {
G1JavaThreadsListClaimer _claimer;
// There is not much work per thread so the number of threads per worker is high.
static const uint ThreadsPerWorker = 250;
public:
ResizeTLABsTask() : G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker) { }
ResizeTLABsAndSwapCardTableTask()
: G1AbstractSubTask(G1GCPhaseTimes::ResizeThreadLABs), _claimer(ThreadsPerWorker)
{
G1BarrierSet::g1_barrier_set()->swap_global_card_table();
}
void do_work(uint worker_id) override {
class ResizeClosure : public ThreadClosure {
class ResizeAndSwapCardTableClosure : public ThreadClosure {
public:
void do_thread(Thread* thread) {
static_cast<JavaThread*>(thread)->tlab().resize();
if (UseTLAB && ResizeTLAB) {
static_cast<JavaThread*>(thread)->tlab().resize();
}
G1BarrierSet::g1_barrier_set()->update_card_table_base(thread);
}
} cl;
_claimer.apply(&cl);
} resize_and_swap_cl;
_claimer.apply(&resize_and_swap_cl);
}
double worker_cost() const override {
@ -968,13 +898,8 @@ G1PostEvacuateCollectionSetCleanupTask2::G1PostEvacuateCollectionSetCleanupTask2
if (evac_failure_regions->has_regions_evac_failed()) {
add_parallel_task(new ProcessEvacuationFailedRegionsTask(evac_failure_regions));
}
add_parallel_task(new RedirtyLoggedCardsTask(evac_failure_regions,
per_thread_states->rdc_buffers(),
per_thread_states->num_workers()));
if (UseTLAB && ResizeTLAB) {
add_parallel_task(new ResizeTLABsTask());
}
add_parallel_task(new ResizeTLABsAndSwapCardTableTask());
add_parallel_task(new FreeCollectionSetTask(evacuation_info,
per_thread_states->surviving_young_words(),
evac_failure_regions));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -55,9 +55,8 @@ public:
// - Eagerly Reclaim Humongous Objects (s)
// - Update Derived Pointers (s)
// - Clear Retained Region Data (on evacuation failure)
// - Redirty Logged Cards
// - Free Collection Set
// - Resize TLABs
// - Resize TLABs and Swap Card Table
// - Reset the reusable PartialArrayStateManager.
class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
class EagerlyReclaimHumongousObjectsTask;
@ -66,9 +65,8 @@ class G1PostEvacuateCollectionSetCleanupTask2 : public G1BatchedTask {
#endif
class ProcessEvacuationFailedRegionsTask;
class RedirtyLoggedCardsTask;
class FreeCollectionSetTask;
class ResizeTLABsTask;
class ResizeTLABsAndSwapCardTableTask;
class ResetPartialArrayStateManagerTask;
public:

View File

@ -24,7 +24,6 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentRefineStats.hpp"
#include "gc/g1/g1DirtyCardQueue.hpp"
#include "gc/g1/g1RegionPinCache.inline.hpp"
#include "gc/g1/g1ThreadLocalData.hpp"
#include "gc/g1/g1YoungGCPreEvacuateTasks.hpp"
@ -35,23 +34,21 @@
#include "runtime/thread.inline.hpp"
#include "runtime/threads.hpp"
class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABAndFlushLogs : public G1AbstractSubTask {
class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABs : public G1AbstractSubTask {
G1JavaThreadsListClaimer _claimer;
// Per worker thread statistics.
ThreadLocalAllocStats* _local_tlab_stats;
G1ConcurrentRefineStats* _local_refinement_stats;
uint _num_workers;
// There is relatively little work to do per thread.
static const uint ThreadsPerWorker = 250;
struct RetireTLABAndFlushLogsClosure : public ThreadClosure {
struct RetireTLABClosure : public ThreadClosure {
ThreadLocalAllocStats _tlab_stats;
G1ConcurrentRefineStats _refinement_stats;
RetireTLABAndFlushLogsClosure() : _tlab_stats(), _refinement_stats() { }
RetireTLABClosure() : _tlab_stats() { }
void do_thread(Thread* thread) override {
assert(thread->is_Java_thread(), "must be");
@ -61,37 +58,29 @@ class G1PreEvacuateCollectionSetBatchTask::JavaThreadRetireTLABAndFlushLogs : pu
if (UseTLAB) {
thread->retire_tlab(&_tlab_stats);
}
// Concatenate logs.
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
_refinement_stats += qset.concatenate_log_and_stats(thread);
// Flush region pin count cache.
G1ThreadLocalData::pin_count_cache(thread).flush();
}
};
public:
JavaThreadRetireTLABAndFlushLogs() :
G1AbstractSubTask(G1GCPhaseTimes::RetireTLABsAndFlushLogs),
JavaThreadRetireTLABs() :
G1AbstractSubTask(G1GCPhaseTimes::RetireTLABs),
_claimer(ThreadsPerWorker),
_local_tlab_stats(nullptr),
_local_refinement_stats(nullptr),
_num_workers(0) {
}
~JavaThreadRetireTLABAndFlushLogs() {
static_assert(std::is_trivially_destructible<G1ConcurrentRefineStats>::value, "must be");
FREE_C_HEAP_ARRAY(G1ConcurrentRefineStats, _local_refinement_stats);
~JavaThreadRetireTLABs() {
static_assert(std::is_trivially_destructible<ThreadLocalAllocStats>::value, "must be");
FREE_C_HEAP_ARRAY(ThreadLocalAllocStats, _local_tlab_stats);
}
void do_work(uint worker_id) override {
RetireTLABAndFlushLogsClosure tc;
RetireTLABClosure tc;
_claimer.apply(&tc);
_local_tlab_stats[worker_id] = tc._tlab_stats;
_local_refinement_stats[worker_id] = tc._refinement_stats;
}
double worker_cost() const override {
@ -101,11 +90,9 @@ public:
void set_max_workers(uint max_workers) override {
_num_workers = max_workers;
_local_tlab_stats = NEW_C_HEAP_ARRAY(ThreadLocalAllocStats, _num_workers, mtGC);
_local_refinement_stats = NEW_C_HEAP_ARRAY(G1ConcurrentRefineStats, _num_workers, mtGC);
for (uint i = 0; i < _num_workers; i++) {
::new (&_local_tlab_stats[i]) ThreadLocalAllocStats();
::new (&_local_refinement_stats[i]) G1ConcurrentRefineStats();
}
}
@ -116,85 +103,15 @@ public:
}
return result;
}
G1ConcurrentRefineStats refinement_stats() const {
G1ConcurrentRefineStats result;
for (uint i = 0; i < _num_workers; i++) {
result += _local_refinement_stats[i];
}
return result;
}
};
class G1PreEvacuateCollectionSetBatchTask::NonJavaThreadFlushLogs : public G1AbstractSubTask {
struct FlushLogsClosure : public ThreadClosure {
G1ConcurrentRefineStats _refinement_stats;
FlushLogsClosure() : _refinement_stats() { }
void do_thread(Thread* thread) override {
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
_refinement_stats += qset.concatenate_log_and_stats(thread);
assert(G1ThreadLocalData::pin_count_cache(thread).count() == 0, "NonJava thread has pinned Java objects");
}
} _tc;
public:
NonJavaThreadFlushLogs() : G1AbstractSubTask(G1GCPhaseTimes::NonJavaThreadFlushLogs), _tc() { }
void do_work(uint worker_id) override {
Threads::non_java_threads_do(&_tc);
}
double worker_cost() const override {
return 1.0;
}
G1ConcurrentRefineStats refinement_stats() const { return _tc._refinement_stats; }
};
G1PreEvacuateCollectionSetBatchTask::G1PreEvacuateCollectionSetBatchTask() :
G1BatchedTask("Pre Evacuate Prepare", G1CollectedHeap::heap()->phase_times()),
_old_pending_cards(G1BarrierSet::dirty_card_queue_set().num_cards()),
_java_retire_task(new JavaThreadRetireTLABAndFlushLogs()),
_non_java_retire_task(new NonJavaThreadFlushLogs()) {
_java_retire_task(new JavaThreadRetireTLABs()) {
// Disable mutator refinement until concurrent refinement decides otherwise.
G1BarrierSet::dirty_card_queue_set().set_mutator_refinement_threshold(SIZE_MAX);
add_serial_task(_non_java_retire_task);
add_parallel_task(_java_retire_task);
}
static void verify_empty_dirty_card_logs() {
#ifdef ASSERT
ResourceMark rm;
struct Verifier : public ThreadClosure {
Verifier() {}
void do_thread(Thread* t) override {
G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(t);
assert(queue.is_empty(), "non-empty dirty card queue for thread %s", t->name());
}
} verifier;
Threads::threads_do(&verifier);
#endif
}
G1PreEvacuateCollectionSetBatchTask::~G1PreEvacuateCollectionSetBatchTask() {
_java_retire_task->tlab_stats().publish();
G1DirtyCardQueueSet& qset = G1BarrierSet::dirty_card_queue_set();
G1ConcurrentRefineStats total_refinement_stats;
total_refinement_stats += _java_retire_task->refinement_stats();
total_refinement_stats += _non_java_retire_task->refinement_stats();
qset.update_refinement_stats(total_refinement_stats);
verify_empty_dirty_card_logs();
size_t pending_cards = qset.num_cards();
size_t thread_buffer_cards = pending_cards - _old_pending_cards;
G1CollectedHeap::heap()->policy()->record_concurrent_refinement_stats(pending_cards, thread_buffer_cards);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -28,18 +28,13 @@
#include "gc/g1/g1BatchedTask.hpp"
// Set of pre evacuate collection set tasks containing ("s" means serial):
// - Retire TLAB and Flush Logs (Java threads)
// - Retire TLABs (Java threads)
// - Flush pin count cache (Java threads)
// - Flush Logs (s) (Non-Java threads)
class G1PreEvacuateCollectionSetBatchTask : public G1BatchedTask {
class JavaThreadRetireTLABAndFlushLogs;
class NonJavaThreadFlushLogs;
size_t _old_pending_cards;
class JavaThreadRetireTLABs;
// References to the tasks to retain access to statistics.
JavaThreadRetireTLABAndFlushLogs* _java_retire_task;
NonJavaThreadFlushLogs* _non_java_retire_task;
JavaThreadRetireTLABs* _java_retire_task;
public:
G1PreEvacuateCollectionSetBatchTask();

View File

@ -162,6 +162,11 @@
"a single expand attempt.") \
range(0, 100) \
\
product(size_t, G1PerThreadPendingCardThreshold, 256, DIAGNOSTIC, \
"Number of pending cards allowed on the card table per GC " \
"worker thread before considering starting refinement.") \
range(0, UINT_MAX) \
\
product(uint, G1ShrinkByPercentOfAvailable, 50, DIAGNOSTIC, \
"When shrinking, maximum % of free space to free for a single " \
"shrink attempt.") \
@ -188,10 +193,6 @@
"bound of acceptable deviation range.") \
constraint(G1CPUUsageShrinkConstraintFunc, AfterErgo) \
\
product(size_t, G1UpdateBufferSize, 256, \
"Size of an update buffer") \
constraint(G1UpdateBufferSizeConstraintFunc, AfterErgo) \
\
product(uint, G1RSetUpdatingPauseTimePercent, 10, \
"A target percentage of time that is allowed to be spend on " \
"processing remembered set update buffers during the collection " \

View File

@ -206,12 +206,6 @@ JVMFlag::Error G1SATBBufferSizeConstraintFunc(size_t value, bool verbose) {
verbose);
}
JVMFlag::Error G1UpdateBufferSizeConstraintFunc(size_t value, bool verbose) {
return buffer_size_constraint_helper(FLAG_MEMBER_ENUM(G1UpdateBufferSize),
value,
verbose);
}
JVMFlag::Error gc_cpu_usage_threshold_helper(JVMFlagsEnum flagid,
uint value,
bool verbose) {

View File

@ -47,7 +47,6 @@
\
/* G1 PtrQueue buffer size constraints */ \
f(size_t, G1SATBBufferSizeConstraintFunc) \
f(size_t, G1UpdateBufferSizeConstraintFunc) \
\
/* G1 GC deviation counter threshold constraints */ \
f(uint, G1CPUUsageExpandConstraintFunc) \

View File

@ -82,8 +82,7 @@
declare_constant(G1HeapRegionType::StartsHumongousTag) \
declare_constant(G1HeapRegionType::ContinuesHumongousTag) \
declare_constant(G1HeapRegionType::OldMask) \
declare_constant(BarrierSet::G1BarrierSet) \
declare_constant(G1CardTable::g1_young_gen)
declare_constant(BarrierSet::G1BarrierSet)
#define VM_TYPES_G1GC(declare_type, \
declare_toplevel_type, \
@ -100,7 +99,6 @@
declare_toplevel_type(PtrQueue) \
declare_toplevel_type(G1HeapRegionType) \
declare_toplevel_type(SATBMarkQueue) \
declare_toplevel_type(G1DirtyCardQueue) \
\
declare_toplevel_type(G1CollectedHeap*) \
declare_toplevel_type(G1HeapRegion*) \

View File

@ -1,38 +0,0 @@
/*
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "gc/shared/bufferNodeList.hpp"
#include "utilities/debug.hpp"
BufferNodeList::BufferNodeList() :
_head(nullptr), _tail(nullptr), _entry_count(0) {}
BufferNodeList::BufferNodeList(BufferNode* head,
BufferNode* tail,
size_t entry_count) :
_head(head), _tail(tail), _entry_count(entry_count)
{
assert((_head == nullptr) == (_tail == nullptr), "invariant");
assert((_head == nullptr) == (_entry_count == 0), "invariant");
}

View File

@ -225,6 +225,9 @@ uintx CardTable::ct_max_alignment_constraint() {
#ifndef PRODUCT
void CardTable::verify_region(MemRegion mr, CardValue val, bool val_equals) {
if (mr.is_empty()) {
return;
}
CardValue* start = byte_for(mr.start());
CardValue* end = byte_for(mr.last());
bool failures = false;
@ -255,7 +258,8 @@ void CardTable::verify_dirty_region(MemRegion mr) {
}
#endif
void CardTable::print_on(outputStream* st) const {
st->print_cr("Card table byte_map: [" PTR_FORMAT "," PTR_FORMAT "] _byte_map_base: " PTR_FORMAT,
void CardTable::print_on(outputStream* st, const char* description) const {
st->print_cr("%s table byte_map: [" PTR_FORMAT "," PTR_FORMAT "] _byte_map_base: " PTR_FORMAT,
description,
p2i(_byte_map), p2i(_byte_map + _byte_map_size), p2i(_byte_map_base));
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -203,12 +203,12 @@ public:
virtual bool is_in_young(const void* p) const = 0;
// Print a description of the memory for the card table
virtual void print_on(outputStream* st) const;
// Print card table information.
void print_on(outputStream* st, const char* description = "Card") const;
// val_equals -> it will check that all cards covered by mr equal val
// !val_equals -> it will check that all cards covered by mr do not equal val
void verify_region(MemRegion mr, CardValue val, bool val_equals) PRODUCT_RETURN;
virtual void verify_region(MemRegion mr, CardValue val, bool val_equals) PRODUCT_RETURN;
void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN;
void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,7 @@ template <class T>
class WorkerDataArray : public CHeapObj<mtGC> {
friend class WDAPrinter;
public:
static const uint MaxThreadWorkItems = 9;
static const uint MaxThreadWorkItems = 10;
private:
T* _data;
uint _length;

View File

@ -589,10 +589,6 @@ void JVMCIRuntime::write_barrier_pre(JavaThread* thread, oopDesc* obj) {
G1BarrierSetRuntime::write_ref_field_pre_entry(obj, thread);
}
void JVMCIRuntime::write_barrier_post(JavaThread* thread, volatile CardValue* card_addr) {
G1BarrierSetRuntime::write_ref_field_post_entry(card_addr, thread);
}
#endif // INCLUDE_G1GC
JRT_LEAF(jboolean, JVMCIRuntime::validate_object(JavaThread* thread, oopDesc* parent, oopDesc* child))

Some files were not shown because too many files have changed in this diff Show More