diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp index f3fa19ddb31..f6dd20db3d6 100644 --- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp @@ -79,7 +79,7 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox assert(LockingMode != LM_MONITOR, "LM_MONITOR is already handled, by emit_lock()"); if (LockingMode == LM_LIGHTWEIGHT) { - lightweight_lock(Roop, Rmark, tmp, slow_case); + lightweight_lock(Rbox, Roop, Rmark, tmp, slow_case); } else if (LockingMode == LM_LEGACY) { NearLabel done; diff --git a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp index 3641d82dabe..025ef4c8915 100644 --- a/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp @@ -34,12 +34,12 @@ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register temp1, Register temp2) { - compiler_fast_lock_lightweight_object(obj, temp1, temp2); + compiler_fast_lock_lightweight_object(obj, box, temp1, temp2); } void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Register temp1, Register temp2) { - compiler_fast_unlock_lightweight_object(obj, temp1, temp2); + compiler_fast_unlock_lightweight_object(obj, box, temp1, temp2); } //------------------------------------------------------ diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index e56beaa9f56..d00b6c3e2cc 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -1012,7 +1012,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { } if (LockingMode == LM_LIGHTWEIGHT) { - lightweight_lock(object, header, tmp, slow_case); + lightweight_lock(monitor, object, header, tmp, slow_case); } else if (LockingMode == LM_LEGACY) { // Load markWord from object into header. diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 65a7d3abe90..6c26e17d5ce 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -6007,10 +6007,10 @@ SkipIfEqual::~SkipIfEqual() { // - obj: the object to be locked, contents preserved. // - temp1, temp2: temporary registers, contents destroyed. // Note: make sure Z_R1 is not manipulated here when C2 compiler is in play -void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register temp2, Label& slow) { +void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register temp1, Register temp2, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, temp1, temp2); + assert_different_registers(basic_lock, obj, temp1, temp2); Label push; const Register top = temp1; @@ -6022,6 +6022,11 @@ void MacroAssembler::lightweight_lock(Register obj, Register temp1, Register tem // instruction emitted as it is part of C1's null check semantics. z_lg(mark, Address(obj, mark_offset)); + if (UseObjectMonitorTable) { + // Clear cache in case fast locking succeeds. + const Address om_cache_addr = Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))); + z_mvghi(om_cache_addr, 0); + } // First we need to check if the lock-stack has room for pushing the object reference. z_lgf(top, Address(Z_thread, ls_top_offset)); @@ -6145,8 +6150,8 @@ void MacroAssembler::lightweight_unlock(Register obj, Register temp1, Register t bind(unlocked); } -void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2) { - assert_different_registers(obj, tmp1, tmp2); +void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) { + assert_different_registers(obj, box, tmp1, tmp2); // Handle inflated monitor. NearLabel inflated; @@ -6155,6 +6160,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe // Finish fast lock unsuccessfully. MUST branch to with flag == EQ NearLabel slow_path; + if (UseObjectMonitorTable) { + // Clear cache in case fast locking succeeds. + z_mvghi(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0); + } + if (DiagnoseSyncOnValueBasedClasses != 0) { load_klass(tmp1, obj); z_tm(Address(tmp1, Klass::misc_flags_offset()), KlassFlags::_misc_is_value_based_class); @@ -6219,33 +6229,77 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe { // Handle inflated monitor. bind(inflated); + const Register tmp1_monitor = tmp1; if (!UseObjectMonitorTable) { - // mark contains the tagged ObjectMonitor*. - const Register tagged_monitor = mark; - const Register zero = tmp2; - - // Try to CAS m->owner from null to current thread. - // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. - // Otherwise, register zero is filled with the current owner. - z_lghi(zero, 0); - z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), tagged_monitor); - z_bre(locked); - - // Check if recursive. - z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction - z_brne(slow_path); - - // Recursive - z_agsi(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 1ll); - z_cgr(zero, zero); - // z_bru(locked); - // Uncomment above line in the future, for now jump address is right next to us. + assert(tmp1_monitor == mark, "should be the same here"); } else { - // OMCache lookup not supported yet. Take the slowpath. - // Set flag to NE - z_ltgr(obj, obj); + NearLabel monitor_found; + + // load cache address + z_la(tmp1, Address(Z_thread, JavaThread::om_cache_oops_offset())); + + const int num_unrolled = 2; + for (int i = 0; i < num_unrolled; i++) { + z_cg(obj, Address(tmp1)); + z_bre(monitor_found); + add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference())); + } + + NearLabel loop; + // Search for obj in cache + + bind(loop); + + // check for match. + z_cg(obj, Address(tmp1)); + z_bre(monitor_found); + + // search until null encountered, guaranteed _null_sentinel at end. + add2reg(tmp1, in_bytes(OMCache::oop_to_oop_difference())); + z_cghsi(0, tmp1, 0); + z_brne(loop); // if not EQ to 0, go for another loop + + // we reached to the end, cache miss + z_ltgr(obj, obj); // set CC to NE z_bru(slow_path); + + // cache hit + bind(monitor_found); + z_lg(tmp1_monitor, Address(tmp1, OMCache::oop_to_monitor_difference())); } + NearLabel monitor_locked; + // lock the monitor + + // mark contains the tagged ObjectMonitor*. + const Register tagged_monitor = mark; + const Register zero = tmp2; + + const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); + const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag); + const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag); + + + // Try to CAS m->owner from null to current thread. + // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. + // Otherwise, register zero is filled with the current owner. + z_lghi(zero, 0); + z_csg(zero, Z_thread, owner_address); + z_bre(monitor_locked); + + // Check if recursive. + z_cgr(Z_thread, zero); // zero contains the owner from z_csg instruction + z_brne(slow_path); + + // Recursive + z_agsi(recursions_address, 1ll); + + bind(monitor_locked); + if (UseObjectMonitorTable) { + // Cache the monitor for unlock + z_stg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); + } + // set the CC now + z_cgr(obj, obj); } BLOCK_COMMENT("} handle_inflated_monitor_lightweight_locking"); @@ -6270,11 +6324,11 @@ void MacroAssembler::compiler_fast_lock_lightweight_object(Register obj, Registe // C2 uses the value of flag (NE vs EQ) to determine the continuation. } -void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2) { - assert_different_registers(obj, tmp1, tmp2); +void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2) { + assert_different_registers(obj, box, tmp1, tmp2); // Handle inflated monitor. - NearLabel inflated, inflated_load_monitor; + NearLabel inflated, inflated_load_mark; // Finish fast unlock successfully. MUST reach to with flag == EQ. NearLabel unlocked; // Finish fast unlock unsuccessfully. MUST branch to with flag == NE. @@ -6294,7 +6348,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis z_aghi(top, -oopSize); z_cg(obj, Address(Z_thread, top)); - branch_optimized(bcondNotEqual, inflated_load_monitor); + branch_optimized(bcondNotEqual, inflated_load_mark); // Pop lock-stack. #ifdef ASSERT @@ -6315,6 +6369,9 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis // Not recursive // Check for monitor (0b10). + // Because we got here by popping (meaning we pushed in locked) + // there will be no monitor in the box. So we need to push back the obj + // so that the runtime can fix any potential anonymous owner. z_lg(mark, Address(obj, mark_offset)); z_tmll(mark, markWord::monitor_value); if (!UseObjectMonitorTable) { @@ -6353,7 +6410,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis { // Handle inflated monitor. - bind(inflated_load_monitor); + bind(inflated_load_mark); z_lg(mark, Address(obj, mark_offset)); @@ -6378,49 +6435,61 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis bind(check_done); #endif // ASSERT + const Register tmp1_monitor = tmp1; + if (!UseObjectMonitorTable) { - // mark contains the tagged ObjectMonitor*. - const Register monitor = mark; - - NearLabel not_recursive; - const Register recursions = tmp2; - - // Check if recursive. - load_and_test_long(recursions, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - z_bre(not_recursive); // if 0 then jump, it's not recursive locking - - // Recursive unlock - z_agsi(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), -1ll); - z_cgr(monitor, monitor); // set the CC to EQUAL - z_bru(unlocked); - - bind(not_recursive); - - NearLabel not_ok; - // Check if the entry lists are empty. - load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); - z_brne(not_ok); - load_and_test_long(tmp2, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); - z_brne(not_ok); - - z_release(); - z_stg(tmp2 /*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor); - - z_bru(unlocked); // CC = EQ here - - bind(not_ok); - - // The owner may be anonymous, and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - z_stg(Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor); - z_bru(slow_path); // CC = NE here + assert(tmp1_monitor == mark, "should be the same here"); } else { - // OMCache lookup not supported yet. Take the slowpath. - // Set flag to NE - z_ltgr(obj, obj); - z_bru(slow_path); + // Uses ObjectMonitorTable. Look for the monitor in our BasicLock on the stack. + z_lg(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); + // null check with ZF == 0, no valid pointer below alignof(ObjectMonitor*) + z_cghi(tmp1_monitor, alignof(ObjectMonitor*)); + + z_brl(slow_path); } + + // mark contains the tagged ObjectMonitor*. + const Register monitor = mark; + + const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); + const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag}; + const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag}; + const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag}; + const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag}; + + NearLabel not_recursive; + const Register recursions = tmp2; + + // Check if recursive. + load_and_test_long(recursions, recursions_address); + z_bre(not_recursive); // if 0 then jump, it's not recursive locking + + // Recursive unlock + z_agsi(recursions_address, -1ll); + z_cgr(monitor, monitor); // set the CC to EQUAL + z_bru(unlocked); + + bind(not_recursive); + + NearLabel not_ok; + // Check if the entry lists are empty. + load_and_test_long(tmp2, EntryList_address); + z_brne(not_ok); + load_and_test_long(tmp2, cxq_address); + z_brne(not_ok); + + z_release(); + z_stg(tmp2 /*=0*/, owner_address); + + z_bru(unlocked); // CC = EQ here + + bind(not_ok); + + // The owner may be anonymous, and we removed the last obj entry in + // the lock-stack. This loses the information about the owner. + // Write the thread to the owner field so the runtime knows the owner. + z_stg(Z_thread, owner_address); + z_bru(slow_path); // CC = NE here } bind(unlocked); diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp index c380f4dec10..5d3a4c29940 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp @@ -752,10 +752,10 @@ class MacroAssembler: public Assembler { void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2); void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2); - void lightweight_lock(Register obj, Register tmp1, Register tmp2, Label& slow); + void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Label& slow); void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Label& slow); - void compiler_fast_lock_lightweight_object(Register obj, Register tmp1, Register tmp2); - void compiler_fast_unlock_lightweight_object(Register obj, Register tmp1, Register tmp2); + void compiler_fast_lock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2); + void compiler_fast_unlock_lightweight_object(Register obj, Register box, Register tmp1, Register tmp2); void resolve_jobject(Register value, Register tmp1, Register tmp2); void resolve_global_jobject(Register value, Register tmp1, Register tmp2); diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index 9954c78ce1e..65c94db09dc 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -1713,7 +1713,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Try fastpath for locking. if (LockingMode == LM_LIGHTWEIGHT) { // Fast_lock kills r_temp_1, r_temp_2. - __ compiler_fast_lock_lightweight_object(r_oop, r_tmp1, r_tmp2); + __ compiler_fast_lock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2); } else { // Fast_lock kills r_temp_1, r_temp_2. __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2); @@ -1917,7 +1917,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Try fastpath for unlocking. if (LockingMode == LM_LIGHTWEIGHT) { // Fast_unlock kills r_tmp1, r_tmp2. - __ compiler_fast_unlock_lightweight_object(r_oop, r_tmp1, r_tmp2); + __ compiler_fast_unlock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2); } else { // Fast_unlock kills r_tmp1, r_tmp2. __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); diff --git a/src/hotspot/share/runtime/basicLock.inline.hpp b/src/hotspot/share/runtime/basicLock.inline.hpp index 1f3bf15a876..30abd575da4 100644 --- a/src/hotspot/share/runtime/basicLock.inline.hpp +++ b/src/hotspot/share/runtime/basicLock.inline.hpp @@ -39,7 +39,7 @@ inline void BasicLock::set_displaced_header(markWord header) { inline ObjectMonitor* BasicLock::object_monitor_cache() const { assert(UseObjectMonitorTable, "must be"); -#if defined(X86) || defined(AARCH64) || defined(RISCV64) || defined(PPC64) +#if defined(X86) || defined(AARCH64) || defined(RISCV64) || defined(PPC64) || defined(S390) return reinterpret_cast(get_metadata()); #else // Other platforms do not make use of the cache yet,