From fd80329bfdbc023140376e396a02ad6dc0c5bf09 Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Thu, 12 Mar 2026 08:39:41 +0000 Subject: [PATCH] 8379260: C2: Separate volatile barrier and full barrier Reviewed-by: fyang, mdoerr, amitkumar, aph, dlong --- src/hotspot/cpu/aarch64/aarch64.ad | 29 +++++++++++ src/hotspot/cpu/arm/arm.ad | 24 ++++++++++ src/hotspot/cpu/ppc/ppc.ad | 24 ++++++++++ src/hotspot/cpu/riscv/riscv.ad | 64 +++++++++++++++++++++++++ src/hotspot/cpu/s390/s390.ad | 18 +++++++ src/hotspot/cpu/x86/x86.ad | 30 ++++++++++++ src/hotspot/share/adlc/formssel.cpp | 2 + src/hotspot/share/opto/classes.hpp | 2 + src/hotspot/share/opto/library_call.cpp | 4 +- src/hotspot/share/opto/memnode.cpp | 2 + src/hotspot/share/opto/memnode.hpp | 15 ++++++ 11 files changed, 212 insertions(+), 2 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 51bb15e0f59..3989c5a17f0 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -8024,6 +8024,21 @@ instruct membar_release_lock() %{ ins_pipe(pipe_serial); %} +instruct membar_storeload() %{ + match(MemBarStoreLoad); + ins_cost(VOLATILE_REF_COST*100); + + format %{ "MEMBAR-store-load\n\t" + "dmb ish" %} + + ins_encode %{ + __ block_comment("membar_storeload"); + __ membar(Assembler::StoreLoad); + %} + + ins_pipe(pipe_serial); +%} + instruct unnecessary_membar_volatile() %{ predicate(unnecessary_volatile(n)); match(MemBarVolatile); @@ -8053,6 +8068,20 @@ instruct membar_volatile() %{ ins_pipe(pipe_serial); %} +instruct membar_full() %{ + match(MemBarFull); + ins_cost(VOLATILE_REF_COST*100); + + format %{ "membar_full\n\t" + "dmb ish" %} + ins_encode %{ + __ block_comment("membar_full"); + __ membar(Assembler::AnyAny); + %} + + ins_pipe(pipe_serial); +%} + // ============================================================================ // Cast/Convert Instructions diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 87c609be5a7..7d0d31c1f79 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -4440,6 +4440,18 @@ instruct membar_release_lock() %{ ins_pipe(empty); %} +instruct membar_storeload() %{ + match(MemBarStoreLoad); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-storeload" %} + ins_encode %{ + __ membar(MacroAssembler::StoreLoad, noreg); + %} + ins_pipe(long_memory_op); +%} + instruct membar_volatile() %{ match(MemBarVolatile); ins_cost(4*MEMORY_REF_COST); @@ -4463,6 +4475,18 @@ instruct unnecessary_membar_volatile() %{ ins_pipe(empty); %} +instruct membar_full() %{ + match(MemBarFull); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-full" %} + ins_encode %{ + __ membar(MacroAssembler::StoreLoad, noreg); + %} + ins_pipe(long_memory_op); +%} + //----------Register Move Instructions----------------------------------------- // Cast Index to Pointer for unsafe natives diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 7e3cd04171d..057015d3c39 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -7163,6 +7163,18 @@ instruct membar_release_lock() %{ ins_pipe(pipe_class_default); %} +instruct membar_storeload() %{ + match(MemBarStoreLoad); + ins_cost(4*MEMORY_REF_COST); + + format %{ "MEMBAR-store-load" %} + size(4); + ins_encode %{ + __ fence(); + %} + ins_pipe(pipe_class_default); +%} + instruct membar_volatile() %{ match(MemBarVolatile); ins_cost(4*MEMORY_REF_COST); @@ -7205,6 +7217,18 @@ instruct membar_volatile() %{ // ins_pipe(pipe_class_default); //%} +instruct membar_full() %{ + match(MemBarFull); + ins_cost(4*MEMORY_REF_COST); + + format %{ "MEMBAR-full" %} + size(4); + ins_encode %{ + __ fence(); + %} + ins_pipe(pipe_class_default); +%} + instruct membar_CPUOrder() %{ match(MemBarCPUOrder); ins_cost(0); diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 54ea81683fc..e140052d168 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -8156,6 +8156,22 @@ instruct unnecessary_membar_rvtso() %{ ins_pipe(real_empty); %} +instruct membar_storeload_rvtso() %{ + predicate(UseZtso); + match(MemBarStoreLoad); + ins_cost(VOLATILE_REF_COST); + + format %{ "#@membar_storeload_rvtso\n\t" + "fence w, r"%} + + ins_encode %{ + __ block_comment("membar_storeload_rvtso"); + __ membar(MacroAssembler::StoreLoad); + %} + + ins_pipe(pipe_slow); +%} + instruct membar_volatile_rvtso() %{ predicate(UseZtso); match(MemBarVolatile); @@ -8186,6 +8202,22 @@ instruct unnecessary_membar_volatile_rvtso() %{ ins_pipe(real_empty); %} +instruct membar_full_rvtso() %{ + predicate(UseZtso); + match(MemBarFull); + ins_cost(VOLATILE_REF_COST); + + format %{ "#@membar_full_rvtso\n\t" + "fence rw, rw" %} + + ins_encode %{ + __ block_comment("membar_full_rvtso"); + __ membar(MacroAssembler::AnyAny); + %} + + ins_pipe(pipe_slow); +%} + // RVWMO instruct membar_aqcuire_rvwmo() %{ @@ -8235,6 +8267,22 @@ instruct membar_storestore_rvwmo() %{ ins_pipe(pipe_serial); %} +instruct membar_storeload_rvwmo() %{ + predicate(!UseZtso); + match(MemBarStoreLoad); + ins_cost(VOLATILE_REF_COST); + + format %{ "#@membar_storeload_rvwmo\n\t" + "fence w, r"%} + + ins_encode %{ + __ block_comment("membar_storeload_rvwmo"); + __ membar(MacroAssembler::StoreLoad); + %} + + ins_pipe(pipe_serial); +%} + instruct membar_volatile_rvwmo() %{ predicate(!UseZtso); match(MemBarVolatile); @@ -8279,6 +8327,22 @@ instruct unnecessary_membar_volatile_rvwmo() %{ ins_pipe(real_empty); %} +instruct membar_full_rvwmo() %{ + predicate(!UseZtso); + match(MemBarFull); + ins_cost(VOLATILE_REF_COST); + + format %{ "#@membar_full_rvwmo\n\t" + "fence rw, rw" %} + + ins_encode %{ + __ block_comment("membar_full_rvwmo"); + __ membar(MacroAssembler::AnyAny); + %} + + ins_pipe(pipe_serial); +%} + instruct spin_wait() %{ predicate(UseZihintpause); match(OnSpinWait); diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 1521edde40c..b9982c795cd 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -5239,6 +5239,15 @@ instruct membar_release_lock() %{ ins_pipe(pipe_class_dummy); %} +instruct membar_storeload() %{ + match(MemBarStoreLoad); + ins_cost(4 * MEMORY_REF_COST); + size(2); + format %{ "MEMBAR-storeload" %} + ins_encode %{ __ z_fence(); %} + ins_pipe(pipe_class_dummy); +%} + instruct membar_volatile() %{ match(MemBarVolatile); ins_cost(4 * MEMORY_REF_COST); @@ -5258,6 +5267,15 @@ instruct unnecessary_membar_volatile() %{ ins_pipe(pipe_class_dummy); %} +instruct membar_full() %{ + match(MemBarFull); + ins_cost(4 * MEMORY_REF_COST); + size(2); + format %{ "MEMBAR-full" %} + ins_encode %{ __ z_fence(); %} + ins_pipe(pipe_class_dummy); +%} + instruct membar_CPUOrder() %{ match(MemBarCPUOrder); ins_cost(0); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index ed380105565..8b90655c53c 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -8852,6 +8852,21 @@ instruct membar_release_lock() ins_pipe(empty); %} +instruct membar_storeload(rFlagsReg cr) %{ + match(MemBarStoreLoad); + effect(KILL cr); + ins_cost(400); + + format %{ + $$template + $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload" + %} + ins_encode %{ + __ membar(Assembler::StoreLoad); + %} + ins_pipe(pipe_slow); +%} + instruct membar_volatile(rFlagsReg cr) %{ match(MemBarVolatile); effect(KILL cr); @@ -8879,6 +8894,21 @@ instruct unnecessary_membar_volatile() ins_pipe(empty); %} +instruct membar_full(rFlagsReg cr) %{ + match(MemBarFull); + effect(KILL cr); + ins_cost(400); + + format %{ + $$template + $$emit$$"lock addl [rsp + #0], 0\t! membar_full" + %} + ins_encode %{ + __ membar(Assembler::StoreLoad); + %} + ins_pipe(pipe_slow); +%} + instruct membar_storestore() %{ match(MemBarStoreStore); match(StoreStoreFence); diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index 182587d2f2f..4dd2bff7c89 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -4276,7 +4276,9 @@ bool MatchRule::is_ideal_membar() const { !strcmp(_opType,"LoadFence" ) || !strcmp(_opType,"StoreFence") || !strcmp(_opType,"StoreStoreFence") || + !strcmp(_opType,"MemBarStoreLoad") || !strcmp(_opType,"MemBarVolatile") || + !strcmp(_opType,"MemBarFull") || !strcmp(_opType,"MemBarCPUOrder") || !strcmp(_opType,"MemBarStoreStore") || !strcmp(_opType,"OnSpinWait"); diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index abd93fdd876..719b90ad6dd 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -239,8 +239,10 @@ macro(MemBarRelease) macro(StoreFence) macro(StoreStoreFence) macro(MemBarReleaseLock) +macro(MemBarStoreLoad) macro(MemBarVolatile) macro(MemBarStoreStore) +macro(MemBarFull) macro(MergeMem) macro(MinI) macro(MinL) diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index b3ee060d75f..8d3c5a98ea0 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -2900,7 +2900,7 @@ bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) { insert_mem_bar(Op_StoreStoreFence); return true; case vmIntrinsics::_fullFence: - insert_mem_bar(Op_MemBarVolatile); + insert_mem_bar(Op_MemBarFull); return true; default: fatal_unexpected_iid(id); @@ -3070,7 +3070,7 @@ bool LibraryCallKit::inline_native_vthread_start_transition(address funcAddr, co Node* vt_addr = basic_plus_adr(vt_oop, java_lang_Thread::is_in_vthread_transition_offset()); access_store_at(nullptr, jt_addr, _gvn.type(jt_addr)->is_ptr(), ideal.ConI(1), TypeInt::BOOL, T_BOOLEAN, IN_NATIVE | MO_UNORDERED); access_store_at(nullptr, vt_addr, _gvn.type(vt_addr)->is_ptr(), ideal.ConI(1), TypeInt::BOOL, T_BOOLEAN, IN_NATIVE | MO_UNORDERED); - insert_mem_bar(Op_MemBarVolatile); + insert_mem_bar(Op_MemBarStoreLoad); ideal.sync_kit(this); Node* global_disable_addr = makecon(TypeRawPtr::make((address)MountUnmountDisabler::global_vthread_transition_disable_count_address())); diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 6cb14444f6b..85bc41e71b9 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -4349,7 +4349,9 @@ MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) { case Op_StoreStoreFence: return new StoreStoreFenceNode(C, atp, pn); case Op_MemBarAcquireLock: return new MemBarAcquireLockNode(C, atp, pn); case Op_MemBarReleaseLock: return new MemBarReleaseLockNode(C, atp, pn); + case Op_MemBarStoreLoad: return new MemBarStoreLoadNode(C, atp, pn); case Op_MemBarVolatile: return new MemBarVolatileNode(C, atp, pn); + case Op_MemBarFull: return new MemBarFullNode(C, atp, pn); case Op_MemBarCPUOrder: return new MemBarCPUOrderNode(C, atp, pn); case Op_OnSpinWait: return new OnSpinWaitNode(C, atp, pn); case Op_Initialize: return new InitializeNode(C, atp, pn); diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp index 30d44e82016..7fa238f574d 100644 --- a/src/hotspot/share/opto/memnode.hpp +++ b/src/hotspot/share/opto/memnode.hpp @@ -1321,6 +1321,13 @@ public: virtual int Opcode() const; }; +class MemBarStoreLoadNode : public MemBarNode { +public: + MemBarStoreLoadNode(Compile* C, int alias_idx, Node* precedent) + : MemBarNode(C, alias_idx, precedent) {} + virtual int Opcode() const; +}; + // Ordering between a volatile store and a following volatile load. // Requires multi-CPU visibility? class MemBarVolatileNode: public MemBarNode { @@ -1330,6 +1337,14 @@ public: virtual int Opcode() const; }; +// A full barrier blocks all loads and stores from moving across it +class MemBarFullNode : public MemBarNode { +public: + MemBarFullNode(Compile* C, int alias_idx, Node* precedent) + : MemBarNode(C, alias_idx, precedent) {} + virtual int Opcode() const; +}; + // Ordering within the same CPU. Used to order unsafe memory references // inside the compiler when we lack alias info. Not needed "outside" the // compiler because the CPU does all the ordering for us.