8338658: New Object to ObjectMonitor mapping: s390x implementation

Reviewed-by: lucy, mdoerr
This commit is contained in:
Amit Kumar 2024-09-20 14:46:10 +00:00
parent ae63aaaa58
commit 9bcde4ffca
7 changed files with 151 additions and 82 deletions

View File

@ -79,7 +79,7 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
assert(LockingMode != LM_MONITOR, "LM_MONITOR is already handled, by emit_lock()");
if (LockingMode == LM_LIGHTWEIGHT) {
lightweight_lock(Roop, Rmark, tmp, slow_case);
lightweight_lock(Rbox, Roop, Rmark, tmp, slow_case);
} else if (LockingMode == LM_LEGACY) {
NearLabel done;

View File

@ -34,12 +34,12 @@
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register temp1, Register temp2) {
compiler_fast_lock_lightweight_object(obj, temp1, temp2);
compiler_fast_lock_lightweight_object(obj, box, temp1, temp2);
}
void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Register temp1, Register temp2) {
compiler_fast_unlock_lightweight_object(obj, temp1, temp2);
compiler_fast_unlock_lightweight_object(obj, box, temp1, temp2);
}
//------------------------------------------------------

View File

@ -1012,7 +1012,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
}
if (LockingMode == LM_LIGHTWEIGHT) {
lightweight_lock(object, header, tmp, slow_case);
lightweight_lock(monitor, object, header, tmp, slow_case);
} else if (LockingMode == LM_LEGACY) {
// Load markWord from object into header.

View File

@ -6007,10 +6007,10 @@ SkipIfEqual::~SkipIfEqual() {
// - obj: the object to be locked, contents preserved.
// - temp1, temp2: temporary registers, contents destroyed.
// Note: make sure Z_R1 is not manipulated here when C2 compiler is in play
void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register temp2, Label& slow) {
void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register temp1, Register temp2, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
assert_different_registers(obj, temp1, temp2);
assert_different_registers(basic_lock, obj, temp1, temp2);
Label push;
const Register top = temp1;
@ -6022,6 +6022,11 @@ void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register tem
// instruction emitted as it is part of C1's null check semantics.
z_lg(mark, Address(obj, mark_offset));
if (UseObjectMonitorTable) {
// Clear cache in case fast locking succeeds.
const Address om_cache_addr = Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())));
z_mvghi(om_cache_addr, 0);
}
// First we need to check if the lock-stack has room for pushing the object reference.
z_lgf(top, Address(Z_thread, ls_top_offset));
@ -6145,8 +6150,8 @@ void MacroAssembler::lightweight_unlock(Register obj, Register temp1, Register t
bind(unlocked);
}
void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2) {
assert_different_registers(obj, tmp1, tmp2);
void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) {
assert_different_registers(obj, box, tmp1, tmp2);
// Handle inflated monitor.
NearLabel inflated;
@ -6155,6 +6160,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
// Finish fast lock unsuccessfully. MUST branch to with flag == EQ
NearLabel slow_path;
if (UseObjectMonitorTable) {
// Clear cache in case fast locking succeeds.
z_mvghi(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0);
}
if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp1, obj);
z_tm(Address(tmp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class);
@ -6219,33 +6229,77 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
{ // Handle inflated monitor.
bind(inflated);
const Register tmp1_monitor = tmp1;
if (!UseObjectMonitorTable) {
// mark contains the tagged ObjectMonitor*.
const Register tagged_monitor = mark;
const Register zero = tmp2;
// Try to CAS m->owner from null to current thread.
// If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
// Otherwise, register zero is filled with the current owner.
z_lghi(zero, 0);
z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), tagged_monitor);
z_bre(locked);
// Check if recursive.
z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction
z_brne(slow_path);
// Recursive
z_agsi(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 1ll);
z_cgr(zero, zero);
// z_bru(locked);
// Uncomment above line in the future, for now jump address is right next to us.
assert(tmp1_monitor == mark, "should be the same here");
} else {
// OMCache lookup not supported yet. Take the slowpath.
// Set flag to NE
z_ltgr(obj, obj);
NearLabel monitor_found;
// load cache address
z_la(tmp1, Address(Z_thread, JavaThread::om_cache_oops_offset()));
const int num_unrolled = 2;
for (int i = 0; i < num_unrolled; i++) {
z_cg(obj, Address(tmp1));
z_bre(monitor_found);
add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
}
NearLabel loop;
// Search for obj in cache
bind(loop);
// check for match.
z_cg(obj, Address(tmp1));
z_bre(monitor_found);
// search until null encountered, guaranteed _null_sentinel at end.
add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference()));
z_cghsi(0, tmp1, 0);
z_brne(loop); // if not EQ to 0, go for another loop
// we reached to the end, cache miss
z_ltgr(obj, obj); // set CC to NE
z_bru(slow_path);
// cache hit
bind(monitor_found);
z_lg(tmp1_monitor, Address(tmp1, OMCache::oop_to_monitor_difference()));
}
NearLabel monitor_locked;
// lock the monitor
// mark contains the tagged ObjectMonitor*.
const Register tagged_monitor = mark;
const Register zero = tmp2;
const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag);
const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag);
// Try to CAS m->owner from null to current thread.
// If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
// Otherwise, register zero is filled with the current owner.
z_lghi(zero, 0);
z_csg(zero, Z_thread, owner_address);
z_bre(monitor_locked);
// Check if recursive.
z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction
z_brne(slow_path);
// Recursive
z_agsi(recursions_address, 1ll);
bind(monitor_locked);
if (UseObjectMonitorTable) {
// Cache the monitor for unlock
z_stg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
}
// set the CC now
z_cgr(obj, obj);
}
BLOCK_COMMENT("} handle_inflated_monitor_lightweight_locking");
@ -6270,11 +6324,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe
// C2 uses the value of flag (NE vs EQ) to determine the continuation.
}
void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2) {
assert_different_registers(obj, tmp1, tmp2);
void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) {
assert_different_registers(obj, box, tmp1, tmp2);
// Handle inflated monitor.
NearLabel inflated, inflated_load_monitor;
NearLabel inflated, inflated_load_mark;
// Finish fast unlock successfully. MUST reach to with flag == EQ.
NearLabel unlocked;
// Finish fast unlock unsuccessfully. MUST branch to with flag == NE.
@ -6294,7 +6348,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
z_aghi(top, -oopSize);
z_cg(obj, Address(Z_thread, top));
branch_optimized(bcondNotEqual, inflated_load_monitor);
branch_optimized(bcondNotEqual, inflated_load_mark);
// Pop lock-stack.
#ifdef ASSERT
@ -6315,6 +6369,9 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
// Not recursive
// Check for monitor (0b10).
// Because we got here by popping (meaning we pushed in locked)
// there will be no monitor in the box. So we need to push back the obj
// so that the runtime can fix any potential anonymous owner.
z_lg(mark, Address(obj, mark_offset));
z_tmll(mark, markWord::monitor_value);
if (!UseObjectMonitorTable) {
@ -6353,7 +6410,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
{ // Handle inflated monitor.
bind(inflated_load_monitor);
bind(inflated_load_mark);
z_lg(mark, Address(obj, mark_offset));
@ -6378,49 +6435,61 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
bind(check_done);
#endif // ASSERT
const Register tmp1_monitor = tmp1;
if (!UseObjectMonitorTable) {
// mark contains the tagged ObjectMonitor*.
const Register monitor = mark;
NearLabel not_recursive;
const Register recursions = tmp2;
// Check if recursive.
load_and_test_long(recursions, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
z_bre(not_recursive); // if 0 then jump, it's not recursive locking
// Recursive unlock
z_agsi(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), -1ll);
z_cgr(monitor, monitor); // set the CC to EQUAL
z_bru(unlocked);
bind(not_recursive);
NearLabel not_ok;
// Check if the entry lists are empty.
load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
z_brne(not_ok);
load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
z_brne(not_ok);
z_release();
z_stg(tmp2 /*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor);
z_bru(unlocked); // CC = EQ here
bind(not_ok);
// The owner may be anonymous, and we removed the last obj entry in
// the lock-stack. This loses the information about the owner.
// Write the thread to the owner field so the runtime knows the owner.
z_stg(Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor);
z_bru(slow_path); // CC = NE here
assert(tmp1_monitor == mark, "should be the same here");
} else {
// OMCache lookup not supported yet. Take the slowpath.
// Set flag to NE
z_ltgr(obj, obj);
z_bru(slow_path);
// Uses ObjectMonitorTable. Look for the monitor in our BasicLock on the stack.
z_lg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
// null check with ZF == 0, no valid pointer below alignof(ObjectMonitor*)
z_cghi(tmp1_monitor, alignof(ObjectMonitor*));
z_brl(slow_path);
}
// mark contains the tagged ObjectMonitor*.
const Register monitor = mark;
const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
NearLabel not_recursive;
const Register recursions = tmp2;
// Check if recursive.
load_and_test_long(recursions, recursions_address);
z_bre(not_recursive); // if 0 then jump, it's not recursive locking
// Recursive unlock
z_agsi(recursions_address, -1ll);
z_cgr(monitor, monitor); // set the CC to EQUAL
z_bru(unlocked);
bind(not_recursive);
NearLabel not_ok;
// Check if the entry lists are empty.
load_and_test_long(tmp2, EntryList_address);
z_brne(not_ok);
load_and_test_long(tmp2, cxq_address);
z_brne(not_ok);
z_release();
z_stg(tmp2 /*=0*/, owner_address);
z_bru(unlocked); // CC = EQ here
bind(not_ok);
// The owner may be anonymous, and we removed the last obj entry in
// the lock-stack. This loses the information about the owner.
// Write the thread to the owner field so the runtime knows the owner.
z_stg(Z_thread, owner_address);
z_bru(slow_path); // CC = NE here
}
bind(unlocked);

View File

@ -752,10 +752,10 @@ class MacroAssembler: public Assembler {
void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2);
void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2);
void lightweight_lock(Register obj, Register tmp1, Register tmp2, Label& slow);
void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Label& slow);
void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Label& slow);
void compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2);
void compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2);
void compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2);
void compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2);
void resolve_jobject(Register value, Register tmp1, Register tmp2);
void resolve_global_jobject(Register value, Register tmp1, Register tmp2);

View File

@ -1713,7 +1713,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for locking.
if (LockingMode == LM_LIGHTWEIGHT) {
// Fast_lock kills r_temp_1, r_temp_2.
__ compiler_fast_lock_lightweight_object(r_oop, r_tmp1, r_tmp2);
__ compiler_fast_lock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2);
} else {
// Fast_lock kills r_temp_1, r_temp_2.
__ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
@ -1917,7 +1917,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Try fastpath for unlocking.
if (LockingMode == LM_LIGHTWEIGHT) {
// Fast_unlock kills r_tmp1, r_tmp2.
__ compiler_fast_unlock_lightweight_object(r_oop, r_tmp1, r_tmp2);
__ compiler_fast_unlock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2);
} else {
// Fast_unlock kills r_tmp1, r_tmp2.
__ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2);

View File

@ -39,7 +39,7 @@ inline void BasicLock::set_displaced_header(markWord header) {
inline ObjectMonitor* BasicLock::object_monitor_cache() const {
assert(UseObjectMonitorTable, "must be");
#if defined(X86) || defined(AARCH64) || defined(RISCV64) || defined(PPC64)
#if defined(X86) || defined(AARCH64) || defined(RISCV64) || defined(PPC64) || defined(S390)
return reinterpret_cast<ObjectMonitor*>(get_metadata());
#else
// Other platforms do not make use of the cache yet,