diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp index ba0187d0363..219aeaf316d 100644 --- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp @@ -155,9 +155,6 @@ void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rb verify_oop(Roop, FILE_AND_LINE); if (LockingMode == LM_LIGHTWEIGHT) { - ld(Rmark, oopDesc::mark_offset_in_bytes(), Roop); - andi_(R0, Rmark, markWord::monitor_value); - bne(CCR0, slow_int); lightweight_unlock(Roop, Rmark, slow_int); } else if (LockingMode == LM_LEGACY) { // Check if it is still a light weight lock, this is is true if we see diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp index d504c71e2b8..cc69c0abe36 100644 --- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.cpp @@ -36,6 +36,17 @@ #endif #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void C2_MacroAssembler::fast_lock_lightweight(ConditionRegister flag, Register obj, Register box, + Register tmp1, Register tmp2, Register tmp3) { + compiler_fast_lock_lightweight_object(flag, obj, tmp1, tmp2, tmp3); +} + +void C2_MacroAssembler::fast_unlock_lightweight(ConditionRegister flag, Register obj, Register box, + Register tmp1, Register tmp2, Register tmp3) { + compiler_fast_unlock_lightweight_object(flag, obj, tmp1, tmp2, tmp3); +} + // Intrinsics for CompactStrings // Compress char[] to byte[] by compressing 16 bytes at once. diff --git a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp index ef4840b08a2..5096810ef91 100644 --- a/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/c2_MacroAssembler_ppc.hpp @@ -28,6 +28,12 @@ // C2_MacroAssembler contains high-level macros for C2 public: + // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. + void fast_lock_lightweight(ConditionRegister flag, Register obj, Register box, + Register tmp1, Register tmp2, Register tmp3); + void fast_unlock_lightweight(ConditionRegister flag, Register obj, Register box, + Register tmp1, Register tmp2, Register tmp3); + // Intrinsics for CompactStrings // Compress char[] to byte[] by compressing 16 bytes at once. void string_compress_16(Register src, Register dst, Register cnt, diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index 54a31a16c8a..94ef1b3c9d2 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -970,9 +970,6 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // markWord displaced_header = obj->mark().set_unlocked(); - // Load markWord from object into header. - ld(header, oopDesc::mark_offset_in_bytes(), object); - if (DiagnoseSyncOnValueBasedClasses != 0) { load_klass(tmp, object); lwz(tmp, in_bytes(Klass::access_flags_offset()), tmp); @@ -981,9 +978,11 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { } if (LockingMode == LM_LIGHTWEIGHT) { - lightweight_lock(object, /* mark word */ header, tmp, slow_case); + lightweight_lock(object, header, tmp, slow_case); b(count_locking); } else if (LockingMode == LM_LEGACY) { + // Load markWord from object into header. + ld(header, oopDesc::mark_offset_in_bytes(), object); // Set displaced_header to be (markWord of object | UNLOCK_VALUE). ori(header, header, markWord::unlocked_value); @@ -1115,22 +1114,6 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) { ld(object, in_bytes(BasicObjectLock::obj_offset()), monitor); if (LockingMode == LM_LIGHTWEIGHT) { - // Check for non-symmetric locking. This is allowed by the spec and the interpreter - // must handle it. - Register tmp = current_header; - // First check for lock-stack underflow. - lwz(tmp, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); - cmplwi(CCR0, tmp, (unsigned)LockStack::start_offset()); - ble(CCR0, slow_case); - // Then check if the top of the lock-stack matches the unlocked object. - addi(tmp, tmp, -oopSize); - ldx(tmp, tmp, R16_thread); - cmpd(CCR0, tmp, object); - bne(CCR0, slow_case); - - ld(header, oopDesc::mark_offset_in_bytes(), object); - andi_(R0, header, markWord::monitor_value); - bne(CCR0, slow_case); lightweight_unlock(object, header, slow_case); } else { addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index fe19cf03500..b7b5936a58d 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -38,6 +38,7 @@ #include "oops/klass.inline.hpp" #include "oops/methodData.hpp" #include "prims/methodHandles.hpp" +#include "register_ppc.hpp" #include "runtime/icache.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/objectMonitor.hpp" @@ -2259,8 +2260,8 @@ address MacroAssembler::emit_trampoline_stub(int destination_toc_offset, // "The box" is the space on the stack where we copy the object mark. void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, Register temp, Register displaced_header, Register current_header) { + assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_lock_lightweight"); assert_different_registers(oop, box, temp, displaced_header, current_header); - assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register"); Label object_has_monitor; Label cas_failed; Label success, failure; @@ -2284,7 +2285,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register // Set NE to indicate 'failure' -> take slow-path. crandc(flag, Assembler::equal, flag, Assembler::equal); b(failure); - } else if (LockingMode == LM_LEGACY) { + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Set displaced_header to be (markWord of object | UNLOCK_VALUE). ori(displaced_header, displaced_header, markWord::unlocked_value); @@ -2328,10 +2330,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register } beq(CCR0, success); b(failure); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - lightweight_lock(oop, displaced_header, temp, failure); - b(success); } // Handle existing monitor. @@ -2349,10 +2347,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, MacroAssembler::cmpxchgx_hint_acquire_lock()); - if (LockingMode != LM_LIGHTWEIGHT) { - // Store a non-null value into the box. - std(box, BasicLock::displaced_header_offset_in_bytes(), box); - } + // Store a non-null value into the box. + std(box, BasicLock::displaced_header_offset_in_bytes(), box); beq(flag, success); // Check for recursive locking. @@ -2374,8 +2370,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register temp, Register displaced_header, Register current_header) { + assert(LockingMode != LM_LIGHTWEIGHT, "uses fast_unlock_lightweight"); assert_different_registers(oop, box, temp, displaced_header, current_header); - assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register"); Label success, failure, object_has_monitor, notRecursive; if (LockingMode == LM_LEGACY) { @@ -2397,7 +2393,8 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe // Set NE to indicate 'failure' -> take slow-path. crandc(flag, Assembler::equal, flag, Assembler::equal); b(failure); - } else if (LockingMode == LM_LEGACY) { + } else { + assert(LockingMode == LM_LEGACY, "must be"); // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markWord of the object. // Cmpxchg sets flag to cmpd(current_header, box). @@ -2412,10 +2409,6 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe &failure); assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); b(success); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - lightweight_unlock(oop, current_header, failure); - b(success); } // Handle existing monitor. @@ -2455,6 +2448,276 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe bind(failure); } +void MacroAssembler::compiler_fast_lock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1, + Register tmp2, Register tmp3) { + assert_different_registers(obj, tmp1, tmp2, tmp3); + assert(flag == CCR0, "bad condition register"); + + // Handle inflated monitor. + Label inflated; + // Finish fast lock successfully. MUST reach to with flag == NE + Label locked; + // Finish fast lock unsuccessfully. MUST branch to with flag == EQ + Label slow_path; + + if (DiagnoseSyncOnValueBasedClasses != 0) { + load_klass(tmp1, obj); + lwz(tmp1, in_bytes(Klass::access_flags_offset()), tmp1); + testbitdi(flag, R0, tmp1, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); + bne(flag, slow_path); + } + + const Register mark = tmp1; + const Register t = tmp3; // Usage of R0 allowed! + + { // Lightweight locking + + // Push lock to the lock stack and finish successfully. MUST reach to with flag == EQ + Label push; + + const Register top = tmp2; + + // Check if lock-stack is full. + lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + cmplwi(flag, top, LockStack::end_offset() - 1); + bgt(flag, slow_path); + + // The underflow check is elided. The recursive check will always fail + // when the lock stack is empty because of the _bad_oop_sentinel field. + + // Check if recursive. + subi(t, top, oopSize); + ldx(t, R16_thread, t); + cmpd(flag, obj, t); + beq(flag, push); + + // Check for monitor (0b10) or locked (0b00). + ld(mark, oopDesc::mark_offset_in_bytes(), obj); + andi_(t, mark, markWord::lock_mask_in_place); + cmpldi(flag, t, markWord::unlocked_value); + bgt(flag, inflated); + bne(flag, slow_path); + + // Not inflated. + + // Try to lock. Transition lock bits 0b00 => 0b01 + assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea"); + atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow_path, MacroAssembler::MemBarAcq); + + bind(push); + // After successful lock, push object on lock-stack. + stdx(obj, R16_thread, top); + addi(top, top, oopSize); + stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + b(locked); + } + + { // Handle inflated monitor. + bind(inflated); + + // mark contains the tagged ObjectMonitor*. + const Register tagged_monitor = mark; + const uintptr_t monitor_tag = markWord::monitor_value; + const Register owner_addr = tmp2; + + // Compute owner address. + addi(owner_addr, tagged_monitor, in_bytes(ObjectMonitor::owner_offset()) - monitor_tag); + + // CAS owner (null => current thread). + cmpxchgd(/*flag=*/flag, + /*current_value=*/t, + /*compare_value=*/(intptr_t)0, + /*exchange_value=*/R16_thread, + /*where=*/owner_addr, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock()); + beq(flag, locked); + + // Check if recursive. + cmpd(flag, t, R16_thread); + bne(flag, slow_path); + + // Recursive. + ld(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr); + addi(tmp1, tmp1, 1); + std(tmp1, in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), owner_addr); + } + + bind(locked); + inc_held_monitor_count(tmp1); + +#ifdef ASSERT + // Check that locked label is reached with flag == EQ. + Label flag_correct; + beq(flag, flag_correct); + stop("Fast Lock Flag != EQ"); +#endif + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flag == NE. + bne(flag, flag_correct); + stop("Fast Lock Flag != NE"); + bind(flag_correct); +#endif + // C2 uses the value of flag (NE vs EQ) to determine the continuation. +} + +void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register obj, Register tmp1, + Register tmp2, Register tmp3) { + assert_different_registers(obj, tmp1, tmp2, tmp3); + assert(flag == CCR0, "bad condition register"); + + // Handle inflated monitor. + Label inflated, inflated_load_monitor; + // Finish fast unlock successfully. MUST reach to with flag == EQ. + Label unlocked; + // Finish fast unlock unsuccessfully. MUST branch to with flag == NE. + Label slow_path; + + const Register mark = tmp1; + const Register top = tmp2; + const Register t = tmp3; + + { // Lightweight unlock + Label push_and_slow; + + // Check if obj is top of lock-stack. + lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + subi(top, top, oopSize); + ldx(t, R16_thread, top); + cmpd(flag, obj, t); + // Top of lock stack was not obj. Must be monitor. + bne(flag, inflated_load_monitor); + + // Pop lock-stack. + DEBUG_ONLY(li(t, 0);) + DEBUG_ONLY(stdx(t, R16_thread, top);) + stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + + // The underflow check is elided. The recursive check will always fail + // when the lock stack is empty because of the _bad_oop_sentinel field. + + // Check if recursive. + subi(t, top, oopSize); + ldx(t, R16_thread, t); + cmpd(flag, obj, t); + beq(flag, unlocked); + + // Not recursive. + + // Check for monitor (0b10). + ld(mark, oopDesc::mark_offset_in_bytes(), obj); + andi_(t, mark, markWord::monitor_value); + bne(CCR0, inflated); + +#ifdef ASSERT + // Check header not unlocked (0b01). + Label not_unlocked; + andi_(t, mark, markWord::unlocked_value); + beq(CCR0, not_unlocked); + stop("lightweight_unlock already unlocked"); + bind(not_unlocked); +#endif + + // Try to unlock. Transition lock bits 0b00 => 0b01 + atomically_flip_locked_state(/* is_unlock */ true, obj, mark, push_and_slow, MacroAssembler::MemBarRel); + b(unlocked); + + bind(push_and_slow); + // Restore lock-stack and handle the unlock in runtime. + DEBUG_ONLY(stdx(obj, R16_thread, top);) + addi(top, top, oopSize); + stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + b(slow_path); + } + + { // Handle inflated monitor. + bind(inflated_load_monitor); + ld(mark, oopDesc::mark_offset_in_bytes(), obj); +#ifdef ASSERT + andi_(t, mark, markWord::monitor_value); + bne(CCR0, inflated); + stop("Fast Unlock not monitor"); +#endif + + bind(inflated); + +#ifdef ASSERT + Label check_done; + subi(top, top, oopSize); + cmplwi(CCR0, top, in_bytes(JavaThread::lock_stack_base_offset())); + blt(CCR0, check_done); + ldx(t, R16_thread, top); + cmpd(flag, obj, t); + bne(flag, inflated); + stop("Fast Unlock lock on stack"); + bind(check_done); +#endif + + // mark contains the tagged ObjectMonitor*. + const Register monitor = mark; + const uintptr_t monitor_tag = markWord::monitor_value; + + // Untag the monitor. + subi(monitor, mark, monitor_tag); + + const Register recursions = tmp2; + Label not_recursive; + + // Check if recursive. + ld(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor); + addic_(recursions, recursions, -1); + blt(CCR0, not_recursive); + + // Recursive unlock. + std(recursions, in_bytes(ObjectMonitor::recursions_offset()), monitor); + crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); + b(unlocked); + + bind(not_recursive); + + Label release_; + const Register t2 = tmp2; + + // Check if the entry lists are empty. + ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor); + ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor); + orr(t, t, t2); + cmpdi(flag, t, 0); + beq(flag, release_); + + // The owner may be anonymous and we removed the last obj entry in + // the lock-stack. This loses the information about the owner. + // Write the thread to the owner field so the runtime knows the owner. + std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor); + b(slow_path); + + bind(release_); + // Set owner to null. + release(); + // t contains 0 + std(t, in_bytes(ObjectMonitor::owner_offset()), monitor); + } + + bind(unlocked); + dec_held_monitor_count(t); + +#ifdef ASSERT + // Check that unlocked label is reached with flag == EQ. + Label flag_correct; + beq(flag, flag_correct); + stop("Fast Lock Flag != EQ"); +#endif + bind(slow_path); +#ifdef ASSERT + // Check that slow_path label is reached with flag == NE. + bne(flag, flag_correct); + stop("Fast Lock Flag != NE"); + bind(flag_correct); +#endif + // C2 uses the value of flag (NE vs EQ) to determine the continuation. +} + void MacroAssembler::safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod) { ld(temp, in_bytes(JavaThread::polling_word_offset()), R16_thread); @@ -4074,58 +4337,57 @@ void MacroAssembler::atomically_flip_locked_state(bool is_unlock, Register obj, } // Implements lightweight-locking. -// Branches to slow upon failure to lock the object, with CCR0 NE. -// Falls through upon success with CCR0 EQ. // // - obj: the object to be locked -// - hdr: the header, already loaded from obj, will be destroyed -// - t1: temporary register -void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register t1, Label& slow) { +// - t1, t2: temporary register +void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr, t1); + assert_different_registers(obj, t1, t2); - // Check if we would have space on lock-stack for the object. - lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); - cmplwi(CCR0, t1, LockStack::end_offset() - 1); - bgt(CCR0, slow); + Label push; + const Register top = t1; + const Register mark = t2; + const Register t = R0; - // Quick check: Do not reserve cache line for atomic update if not unlocked. - // (Similar to contention_hint in cmpxchg solutions.) - xori(R0, hdr, markWord::unlocked_value); // flip unlocked bit - andi_(R0, R0, markWord::lock_mask_in_place); - bne(CCR0, slow); // failed if new header doesn't contain locked_value (which is 0) + // Check if the lock-stack is full. + lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + cmplwi(CCR0, top, LockStack::end_offset()); + bge(CCR0, slow); - // Note: We're not publishing anything (like the displaced header in LM_LEGACY) - // to other threads at this point. Hence, no release barrier, here. - // (The obj has been written to the BasicObjectLock at obj_offset() within the own thread stack.) - atomically_flip_locked_state(/* is_unlock */ false, obj, hdr, slow, MacroAssembler::MemBarAcq); + // The underflow check is elided. The recursive check will always fail + // when the lock stack is empty because of the _bad_oop_sentinel field. + // Check for recursion. + subi(t, top, oopSize); + ldx(t, R16_thread, t); + cmpd(CCR0, obj, t); + beq(CCR0, push); + + // Check header for monitor (0b10) or locked (0b00). + ld(mark, oopDesc::mark_offset_in_bytes(), obj); + xori(t, mark, markWord::unlocked_value); + andi_(t, t, markWord::lock_mask_in_place); + bne(CCR0, slow); + + // Try to lock. Transition lock bits 0b00 => 0b01 + atomically_flip_locked_state(/* is_unlock */ false, obj, mark, slow, MacroAssembler::MemBarAcq); + + bind(push); // After successful lock, push object on lock-stack - stdx(obj, t1, R16_thread); - addi(t1, t1, oopSize); - stw(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + stdx(obj, R16_thread, top); + addi(top, top, oopSize); + stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); } // Implements lightweight-unlocking. -// Branches to slow upon failure, with CCR0 NE. -// Falls through upon success, with CCR0 EQ. // // - obj: the object to be unlocked -// - hdr: the (pre-loaded) header of the object, will be destroyed -void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Label& slow) { +// - t1: temporary register +void MacroAssembler::lightweight_unlock(Register obj, Register t1, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, hdr); + assert_different_registers(obj, t1); #ifdef ASSERT - { - // Check that hdr is fast-locked. - Label hdr_ok; - andi_(R0, hdr, markWord::lock_mask_in_place); - beq(CCR0, hdr_ok); - stop("Header is not fast-locked"); - bind(hdr_ok); - } - Register t1 = hdr; // Reuse in debug build. { // The following checks rely on the fact that LockStack is only ever modified by // its owning thread, even if the lock got inflated concurrently; removal of LockStack @@ -4135,32 +4397,67 @@ void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Label& slow) Label stack_ok; lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); cmplwi(CCR0, t1, LockStack::start_offset()); - bgt(CCR0, stack_ok); + bge(CCR0, stack_ok); stop("Lock-stack underflow"); bind(stack_ok); } - { - // Check if the top of the lock-stack matches the unlocked object. - Label tos_ok; - addi(t1, t1, -oopSize); - ldx(t1, t1, R16_thread); - cmpd(CCR0, t1, obj); - beq(CCR0, tos_ok); - stop("Top of lock-stack does not match the unlocked object"); - bind(tos_ok); - } #endif - // Release the lock. - atomically_flip_locked_state(/* is_unlock */ true, obj, hdr, slow, MacroAssembler::MemBarRel); + Label unlocked, push_and_slow; + const Register top = t1; + const Register mark = R0; + Register t = R0; + + // Check if obj is top of lock-stack. + lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + subi(top, top, oopSize); + ldx(t, R16_thread, top); + cmpd(CCR0, obj, t); + bne(CCR0, slow); + + // Pop lock-stack. + DEBUG_ONLY(li(t, 0);) + DEBUG_ONLY(stdx(t, R16_thread, top);) + stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + + // The underflow check is elided. The recursive check will always fail + // when the lock stack is empty because of the _bad_oop_sentinel field. + + // Check if recursive. + subi(t, top, oopSize); + ldx(t, R16_thread, t); + cmpd(CCR0, obj, t); + beq(CCR0, unlocked); + + // Use top as tmp + t = top; + + // Not recursive. Check header for monitor (0b10). + ld(mark, oopDesc::mark_offset_in_bytes(), obj); + andi_(t, mark, markWord::monitor_value); + bne(CCR0, push_and_slow); - // After successful unlock, pop object from lock-stack - Register t2 = hdr; - lwz(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); - addi(t2, t2, -oopSize); #ifdef ASSERT - li(R0, 0); - stdx(R0, t2, R16_thread); + // Check header not unlocked (0b01). + Label not_unlocked; + andi_(t, mark, markWord::unlocked_value); + beq(CCR0, not_unlocked); + stop("lightweight_unlock already unlocked"); + bind(not_unlocked); #endif - stw(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + + // Try to unlock. Transition lock bits 0b00 => 0b01 + atomically_flip_locked_state(/* is_unlock */ true, obj, t, push_and_slow, MacroAssembler::MemBarRel); + b(unlocked); + + bind(push_and_slow); + + // Restore lock-stack and handle the unlock in runtime. + lwz(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + DEBUG_ONLY(stdx(obj, R16_thread, top);) + addi(top, top, oopSize); + stw(top, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + b(slow); + + bind(unlocked); } diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp index ec370a450ac..92db8a86b42 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp @@ -615,8 +615,8 @@ class MacroAssembler: public Assembler { void inc_held_monitor_count(Register tmp); void dec_held_monitor_count(Register tmp); void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics); - void lightweight_lock(Register obj, Register hdr, Register t1, Label& slow); - void lightweight_unlock(Register obj, Register hdr, Label& slow); + void lightweight_lock(Register obj, Register t1, Register t2, Label& slow); + void lightweight_unlock(Register obj, Register t1, Label& slow); // allocation (for C1) void tlab_allocate( @@ -637,6 +637,12 @@ class MacroAssembler: public Assembler { void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3); + void compiler_fast_lock_lightweight_object(ConditionRegister flag, Register oop, Register tmp1, + Register tmp2, Register tmp3); + + void compiler_fast_unlock_lightweight_object(ConditionRegister flag, Register oop, Register tmp1, + Register tmp2, Register tmp3); + // Check if safepoint requested and if so branch void safepoint_poll(Label& slow_path, Register temp, bool at_return, bool in_nmethod); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 110c6b9b668..1058ae35b76 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -12122,6 +12122,7 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe // inlined locking and unlocking instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{ + predicate(LockingMode != LM_LIGHTWEIGHT); match(Set crx (FastLock oop box)); effect(TEMP tmp1, TEMP tmp2); @@ -12137,6 +12138,7 @@ instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, %} instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + predicate(LockingMode != LM_LIGHTWEIGHT); match(Set crx (FastUnlock oop box)); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); @@ -12151,6 +12153,38 @@ instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp ins_pipe(pipe_class_compare); %} +instruct cmpFastLockLightweight(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{ + predicate(LockingMode == LM_LIGHTWEIGHT); + match(Set crx (FastLock oop box)); + effect(TEMP tmp1, TEMP tmp2); + + format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2" %} + ins_encode %{ + __ fast_lock_lightweight($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp1$$Register, $tmp2$$Register, /*tmp3*/ R0); + // If locking was successful, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_locking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpFastUnlockLightweight(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + predicate(LockingMode == LM_LIGHTWEIGHT); + match(Set crx (FastUnlock oop box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %} + ins_encode %{ + __ fast_unlock_lightweight($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + // If unlocking was successful, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_unlocking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + // Align address. instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{ match(Set dst (CastX2P (AndL (CastP2X src) mask))); diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp index 5a080adc7a9..66b19794b05 100644 --- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp +++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp @@ -2356,8 +2356,13 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ addi(r_box, R1_SP, lock_offset); // Try fastpath for locking. - // fast_lock kills r_temp_1, r_temp_2, r_temp_3. - __ compiler_fast_lock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); + if (LockingMode == LM_LIGHTWEIGHT) { + // fast_lock kills r_temp_1, r_temp_2, r_temp_3. + __ compiler_fast_lock_lightweight_object(CCR0, r_oop, r_temp_1, r_temp_2, r_temp_3); + } else { + // fast_lock kills r_temp_1, r_temp_2, r_temp_3. + __ compiler_fast_lock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); + } __ beq(CCR0, locked); // None of the above fast optimizations worked so we have to get into the @@ -2567,7 +2572,11 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ addi(r_box, R1_SP, lock_offset); // Try fastpath for unlocking. - __ compiler_fast_unlock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); + if (LockingMode == LM_LIGHTWEIGHT) { + __ compiler_fast_unlock_lightweight_object(CCR0, r_oop, r_temp_1, r_temp_2, r_temp_3); + } else { + __ compiler_fast_unlock_object(CCR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3); + } __ beq(CCR0, done); // Save and restore any potential method result value around the unlocking operation. diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.hpp b/src/hotspot/cpu/ppc/vm_version_ppc.hpp index a5831ef1590..0efde131277 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp @@ -94,6 +94,7 @@ public: // PPC64 supports fast class initialization checks static bool supports_fast_class_init_checks() { return true; } constexpr static bool supports_stack_watermark_barrier() { return true; } + constexpr static bool supports_recursive_lightweight_locking() { return true; } static bool is_determine_features_test_running() { return _is_determine_features_test_running; } // CPU instruction support