mirror of
https://github.com/openjdk/jdk.git
synced 2026-03-25 15:20:11 +00:00
8261649: AArch64: Optimize LSE atomics in C++ code
Reviewed-by: adinn
This commit is contained in:
parent
61820b74dd
commit
1b0c36b05b
@ -42,5 +42,8 @@ extern aarch64_atomic_stub_t aarch64_atomic_xchg_8_impl;
|
||||
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_1_impl;
|
||||
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_impl;
|
||||
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_impl;
|
||||
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_1_relaxed_impl;
|
||||
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_relaxed_impl;
|
||||
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_relaxed_impl;
|
||||
|
||||
#endif // CPU_AARCH64_ATOMIC_AARCH64_HPP
|
||||
|
||||
@ -5574,87 +5574,167 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
#ifdef LINUX
|
||||
|
||||
// ARMv8.1 LSE versions of the atomic stubs used by Atomic::PlatformXX.
|
||||
//
|
||||
// If LSE is in use, generate LSE versions of all the stubs. The
|
||||
// non-LSE versions are in atomic_aarch64.S.
|
||||
void generate_atomic_entry_points() {
|
||||
|
||||
// class AtomicStubMark records the entry point of a stub and the
|
||||
// stub pointer which will point to it. The stub pointer is set to
|
||||
// the entry point when ~AtomicStubMark() is called, which must be
|
||||
// after ICache::invalidate_range. This ensures safe publication of
|
||||
// the generated code.
|
||||
class AtomicStubMark {
|
||||
address _entry_point;
|
||||
aarch64_atomic_stub_t *_stub;
|
||||
MacroAssembler *_masm;
|
||||
public:
|
||||
AtomicStubMark(MacroAssembler *masm, aarch64_atomic_stub_t *stub) {
|
||||
_masm = masm;
|
||||
__ align(32);
|
||||
_entry_point = __ pc();
|
||||
_stub = stub;
|
||||
}
|
||||
~AtomicStubMark() {
|
||||
*_stub = (aarch64_atomic_stub_t)_entry_point;
|
||||
}
|
||||
};
|
||||
|
||||
// NB: For memory_order_conservative we need a trailing membar after
|
||||
// LSE atomic operations but not a leading membar.
|
||||
//
|
||||
// We don't need a leading membar because a clause in the Arm ARM
|
||||
// says:
|
||||
//
|
||||
// Barrier-ordered-before
|
||||
//
|
||||
// Barrier instructions order prior Memory effects before subsequent
|
||||
// Memory effects generated by the same Observer. A read or a write
|
||||
// RW1 is Barrier-ordered-before a read or a write RW 2 from the same
|
||||
// Observer if and only if RW1 appears in program order before RW 2
|
||||
// and [ ... ] at least one of RW 1 and RW 2 is generated by an atomic
|
||||
// instruction with both Acquire and Release semantics.
|
||||
//
|
||||
// All the atomic instructions {ldaddal, swapal, casal} have Acquire
|
||||
// and Release semantics, therefore we don't need a leading
|
||||
// barrier. However, there is no corresponding Barrier-ordered-after
|
||||
// relationship, therefore we need a trailing membar to prevent a
|
||||
// later store or load from being reordered with the store in an
|
||||
// atomic instruction.
|
||||
//
|
||||
// This was checked by using the herd7 consistency model simulator
|
||||
// (http://diy.inria.fr/) with this test case:
|
||||
//
|
||||
// AArch64 LseCas
|
||||
// { 0:X1=x; 0:X2=y; 1:X1=x; 1:X2=y; }
|
||||
// P0 | P1;
|
||||
// LDR W4, [X2] | MOV W3, #0;
|
||||
// DMB LD | MOV W4, #1;
|
||||
// LDR W3, [X1] | CASAL W3, W4, [X1];
|
||||
// | DMB ISH;
|
||||
// | STR W4, [X2];
|
||||
// exists
|
||||
// (0:X3=0 /\ 0:X4=1)
|
||||
//
|
||||
// If X3 == 0 && X4 == 1, the store to y in P1 has been reordered
|
||||
// with the store to x in P1. Without the DMB in P1 this may happen.
|
||||
//
|
||||
// At the time of writing we don't know of any AArch64 hardware that
|
||||
// reorders stores in this way, but the Reference Manual permits it.
|
||||
|
||||
void gen_cas_entry(Assembler::operand_size size,
|
||||
atomic_memory_order order) {
|
||||
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
|
||||
exchange_val = c_rarg2;
|
||||
bool acquire, release;
|
||||
switch (order) {
|
||||
case memory_order_relaxed:
|
||||
acquire = false;
|
||||
release = false;
|
||||
break;
|
||||
default:
|
||||
acquire = true;
|
||||
release = true;
|
||||
break;
|
||||
}
|
||||
__ mov(prev, compare_val);
|
||||
__ lse_cas(prev, exchange_val, ptr, size, acquire, release, /*not_pair*/true);
|
||||
if (order == memory_order_conservative) {
|
||||
__ membar(Assembler::StoreStore|Assembler::StoreLoad);
|
||||
}
|
||||
if (size == Assembler::xword) {
|
||||
__ mov(r0, prev);
|
||||
} else {
|
||||
__ movw(r0, prev);
|
||||
}
|
||||
__ ret(lr);
|
||||
}
|
||||
|
||||
void gen_ldaddal_entry(Assembler::operand_size size) {
|
||||
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
|
||||
__ ldaddal(size, incr, prev, addr);
|
||||
__ membar(Assembler::StoreStore|Assembler::StoreLoad);
|
||||
if (size == Assembler::xword) {
|
||||
__ mov(r0, prev);
|
||||
} else {
|
||||
__ movw(r0, prev);
|
||||
}
|
||||
__ ret(lr);
|
||||
}
|
||||
|
||||
void gen_swpal_entry(Assembler::operand_size size) {
|
||||
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
|
||||
__ swpal(size, incr, prev, addr);
|
||||
__ membar(Assembler::StoreStore|Assembler::StoreLoad);
|
||||
if (size == Assembler::xword) {
|
||||
__ mov(r0, prev);
|
||||
} else {
|
||||
__ movw(r0, prev);
|
||||
}
|
||||
__ ret(lr);
|
||||
}
|
||||
|
||||
void generate_atomic_entry_points() {
|
||||
if (! UseLSE) {
|
||||
return;
|
||||
}
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "atomic entry points");
|
||||
address first_entry = __ pc();
|
||||
|
||||
__ align(32);
|
||||
aarch64_atomic_fetch_add_8_impl = (aarch64_atomic_stub_t)__ pc();
|
||||
{
|
||||
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
|
||||
__ atomic_addal(prev, incr, addr);
|
||||
__ mov(r0, prev);
|
||||
__ ret(lr);
|
||||
}
|
||||
__ align(32);
|
||||
aarch64_atomic_fetch_add_4_impl = (aarch64_atomic_stub_t)__ pc();
|
||||
{
|
||||
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
|
||||
__ atomic_addalw(prev, incr, addr);
|
||||
__ movw(r0, prev);
|
||||
__ ret(lr);
|
||||
}
|
||||
__ align(32);
|
||||
aarch64_atomic_xchg_4_impl = (aarch64_atomic_stub_t)__ pc();
|
||||
{
|
||||
Register prev = r2, addr = c_rarg0, newv = c_rarg1;
|
||||
__ atomic_xchglw(prev, newv, addr);
|
||||
__ movw(r0, prev);
|
||||
__ ret(lr);
|
||||
}
|
||||
__ align(32);
|
||||
aarch64_atomic_xchg_8_impl = (aarch64_atomic_stub_t)__ pc();
|
||||
{
|
||||
Register prev = r2, addr = c_rarg0, newv = c_rarg1;
|
||||
__ atomic_xchgl(prev, newv, addr);
|
||||
__ mov(r0, prev);
|
||||
__ ret(lr);
|
||||
}
|
||||
__ align(32);
|
||||
aarch64_atomic_cmpxchg_1_impl = (aarch64_atomic_stub_t)__ pc();
|
||||
{
|
||||
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
|
||||
exchange_val = c_rarg2;
|
||||
__ cmpxchg(ptr, compare_val, exchange_val,
|
||||
MacroAssembler::byte,
|
||||
/*acquire*/false, /*release*/false, /*weak*/false,
|
||||
prev);
|
||||
__ movw(r0, prev);
|
||||
__ ret(lr);
|
||||
}
|
||||
__ align(32);
|
||||
aarch64_atomic_cmpxchg_4_impl = (aarch64_atomic_stub_t)__ pc();
|
||||
{
|
||||
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
|
||||
exchange_val = c_rarg2;
|
||||
__ cmpxchg(ptr, compare_val, exchange_val,
|
||||
MacroAssembler::word,
|
||||
/*acquire*/false, /*release*/false, /*weak*/false,
|
||||
prev);
|
||||
__ movw(r0, prev);
|
||||
__ ret(lr);
|
||||
}
|
||||
__ align(32);
|
||||
aarch64_atomic_cmpxchg_8_impl = (aarch64_atomic_stub_t)__ pc();
|
||||
{
|
||||
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
|
||||
exchange_val = c_rarg2;
|
||||
__ cmpxchg(ptr, compare_val, exchange_val,
|
||||
MacroAssembler::xword,
|
||||
/*acquire*/false, /*release*/false, /*weak*/false,
|
||||
prev);
|
||||
__ mov(r0, prev);
|
||||
__ ret(lr);
|
||||
}
|
||||
// All memory_order_conservative
|
||||
AtomicStubMark mark_fetch_add_4(_masm, &aarch64_atomic_fetch_add_4_impl);
|
||||
gen_ldaddal_entry(Assembler::word);
|
||||
AtomicStubMark mark_fetch_add_8(_masm, &aarch64_atomic_fetch_add_8_impl);
|
||||
gen_ldaddal_entry(Assembler::xword);
|
||||
|
||||
AtomicStubMark mark_xchg_4(_masm, &aarch64_atomic_xchg_4_impl);
|
||||
gen_swpal_entry(Assembler::word);
|
||||
AtomicStubMark mark_xchg_8_impl(_masm, &aarch64_atomic_xchg_8_impl);
|
||||
gen_swpal_entry(Assembler::xword);
|
||||
|
||||
// CAS, memory_order_conservative
|
||||
AtomicStubMark mark_cmpxchg_1(_masm, &aarch64_atomic_cmpxchg_1_impl);
|
||||
gen_cas_entry(MacroAssembler::byte, memory_order_conservative);
|
||||
AtomicStubMark mark_cmpxchg_4(_masm, &aarch64_atomic_cmpxchg_4_impl);
|
||||
gen_cas_entry(MacroAssembler::word, memory_order_conservative);
|
||||
AtomicStubMark mark_cmpxchg_8(_masm, &aarch64_atomic_cmpxchg_8_impl);
|
||||
gen_cas_entry(MacroAssembler::xword, memory_order_conservative);
|
||||
|
||||
// CAS, memory_order_relaxed
|
||||
AtomicStubMark mark_cmpxchg_1_relaxed
|
||||
(_masm, &aarch64_atomic_cmpxchg_1_relaxed_impl);
|
||||
gen_cas_entry(MacroAssembler::byte, memory_order_relaxed);
|
||||
AtomicStubMark mark_cmpxchg_4_relaxed
|
||||
(_masm, &aarch64_atomic_cmpxchg_4_relaxed_impl);
|
||||
gen_cas_entry(MacroAssembler::word, memory_order_relaxed);
|
||||
AtomicStubMark mark_cmpxchg_8_relaxed
|
||||
(_masm, &aarch64_atomic_cmpxchg_8_relaxed_impl);
|
||||
gen_cas_entry(MacroAssembler::xword, memory_order_relaxed);
|
||||
|
||||
ICache::invalidate_range(first_entry, __ pc() - first_entry);
|
||||
}
|
||||
#endif // LINUX
|
||||
|
||||
@ -6772,9 +6852,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
#ifdef LINUX
|
||||
|
||||
#if 0 // JDK-8261660: disabled for now.
|
||||
generate_atomic_entry_points();
|
||||
#endif
|
||||
|
||||
#endif // LINUX
|
||||
|
||||
@ -6805,19 +6883,22 @@ void StubGenerator_generate(CodeBuffer* code, bool all) {
|
||||
// Define pointers to atomic stubs and initialize them to point to the
|
||||
// code in atomic_aarch64.S.
|
||||
|
||||
#define DEFAULT_ATOMIC_OP(OPNAME, SIZE) \
|
||||
extern "C" uint64_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _default_impl \
|
||||
#define DEFAULT_ATOMIC_OP(OPNAME, SIZE, RELAXED) \
|
||||
extern "C" uint64_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _default_impl \
|
||||
(volatile void *ptr, uint64_t arg1, uint64_t arg2); \
|
||||
aarch64_atomic_stub_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _impl \
|
||||
= aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _default_impl;
|
||||
aarch64_atomic_stub_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _impl \
|
||||
= aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _default_impl;
|
||||
|
||||
DEFAULT_ATOMIC_OP(fetch_add, 4)
|
||||
DEFAULT_ATOMIC_OP(fetch_add, 8)
|
||||
DEFAULT_ATOMIC_OP(xchg, 4)
|
||||
DEFAULT_ATOMIC_OP(xchg, 8)
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 1)
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 4)
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 8)
|
||||
DEFAULT_ATOMIC_OP(fetch_add, 4, )
|
||||
DEFAULT_ATOMIC_OP(fetch_add, 8, )
|
||||
DEFAULT_ATOMIC_OP(xchg, 4, )
|
||||
DEFAULT_ATOMIC_OP(xchg, 8, )
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 1, )
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 4, )
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 8, )
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 1, _relaxed)
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 4, _relaxed)
|
||||
DEFAULT_ATOMIC_OP(cmpxchg, 8, _relaxed)
|
||||
|
||||
#undef DEFAULT_ATOMIC_OP
|
||||
|
||||
|
||||
@ -26,44 +26,96 @@
|
||||
.globl aarch64_atomic_fetch_add_8_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_fetch_add_8_default_impl:
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldaxr x2, [x0]
|
||||
add x8, x2, x1
|
||||
stlxr w9, x8, [x0]
|
||||
cbnz w9, 0b
|
||||
dmb ish
|
||||
mov x0, x2
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_fetch_add_4_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_fetch_add_4_default_impl:
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldaxr w2, [x0]
|
||||
add w8, w2, w1
|
||||
stlxr w9, w8, [x0]
|
||||
cbnz w9, 0b
|
||||
dmb ish
|
||||
mov w0, w2
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_xchg_4_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_xchg_4_default_impl:
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldaxr w2, [x0]
|
||||
stlxr w8, w1, [x0]
|
||||
cbnz w8, 0b
|
||||
dmb ish
|
||||
mov w0, w2
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_xchg_8_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_xchg_8_default_impl:
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldaxr x2, [x0]
|
||||
stlxr w8, x1, [x0]
|
||||
cbnz w8, 0b
|
||||
dmb ish
|
||||
mov x0, x2
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_cmpxchg_1_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_cmpxchg_1_default_impl:
|
||||
dmb ish
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldxrb w3, [x0]
|
||||
eor w8, w3, w1
|
||||
tst x8, #0xff
|
||||
b.ne 1f
|
||||
stxrb w8, w2, [x0]
|
||||
cbnz w8, 0b
|
||||
1: mov w0, w3
|
||||
dmb ish
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_cmpxchg_4_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_cmpxchg_4_default_impl:
|
||||
dmb ish
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldxr w3, [x0]
|
||||
cmp w3, w1
|
||||
b.ne 1f
|
||||
stxr w8, w2, [x0]
|
||||
cbnz w8, 0b
|
||||
1: mov w0, w3
|
||||
dmb ish
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_cmpxchg_8_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_cmpxchg_8_default_impl:
|
||||
dmb ish
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldxr x3, [x0]
|
||||
cmp x3, x1
|
||||
b.ne 1f
|
||||
stxr w8, x2, [x0]
|
||||
cbnz w8, 0b
|
||||
1: mov x0, x3
|
||||
dmb ish
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_cmpxchg_1_relaxed_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_cmpxchg_1_relaxed_default_impl:
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldxrb w3, [x0]
|
||||
eor w8, w3, w1
|
||||
tst x8, #0xff
|
||||
@ -73,9 +125,10 @@ aarch64_atomic_cmpxchg_1_default_impl:
|
||||
1: mov w0, w3
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_cmpxchg_4_default_impl
|
||||
.globl aarch64_atomic_cmpxchg_4_relaxed_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_cmpxchg_4_default_impl:
|
||||
aarch64_atomic_cmpxchg_4_relaxed_default_impl:
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldxr w3, [x0]
|
||||
cmp w3, w1
|
||||
b.ne 1f
|
||||
@ -84,9 +137,10 @@ aarch64_atomic_cmpxchg_4_default_impl:
|
||||
1: mov w0, w3
|
||||
ret
|
||||
|
||||
.globl aarch64_atomic_cmpxchg_8_default_impl
|
||||
.globl aarch64_atomic_cmpxchg_8_relaxed_default_impl
|
||||
.align 5
|
||||
aarch64_atomic_cmpxchg_8_default_impl:
|
||||
aarch64_atomic_cmpxchg_8_relaxed_default_impl:
|
||||
prfm pstl1strm, [x0]
|
||||
0: ldxr x3, [x0]
|
||||
cmp x3, x1
|
||||
b.ne 1f
|
||||
|
||||
@ -89,7 +89,6 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value,
|
||||
STATIC_ASSERT(4 == sizeof(D));
|
||||
D old_value
|
||||
= atomic_fastcall(aarch64_atomic_fetch_add_4_impl, dest, add_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
@ -101,7 +100,6 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value,
|
||||
STATIC_ASSERT(8 == sizeof(D));
|
||||
D old_value
|
||||
= atomic_fastcall(aarch64_atomic_fetch_add_8_impl, dest, add_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
@ -112,7 +110,6 @@ inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
|
||||
atomic_memory_order order) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
T old_value = atomic_fastcall(aarch64_atomic_xchg_4_impl, dest, exchange_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
@ -122,7 +119,6 @@ inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, T exchange_value,
|
||||
atomic_memory_order order) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
T old_value = atomic_fastcall(aarch64_atomic_xchg_8_impl, dest, exchange_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
@ -133,18 +129,15 @@ inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
|
||||
T exchange_value,
|
||||
atomic_memory_order order) const {
|
||||
STATIC_ASSERT(1 == sizeof(T));
|
||||
aarch64_atomic_stub_t stub = aarch64_atomic_cmpxchg_1_impl;
|
||||
if (order == memory_order_relaxed) {
|
||||
T old_value = atomic_fastcall(stub, dest,
|
||||
compare_value, exchange_value);
|
||||
return old_value;
|
||||
} else {
|
||||
FULL_MEM_BARRIER;
|
||||
T old_value = atomic_fastcall(stub, dest,
|
||||
compare_value, exchange_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return old_value;
|
||||
aarch64_atomic_stub_t stub;
|
||||
switch (order) {
|
||||
case memory_order_relaxed:
|
||||
stub = aarch64_atomic_cmpxchg_1_relaxed_impl; break;
|
||||
default:
|
||||
stub = aarch64_atomic_cmpxchg_1_impl; break;
|
||||
}
|
||||
|
||||
return atomic_fastcall(stub, dest, compare_value, exchange_value);
|
||||
}
|
||||
|
||||
template<>
|
||||
@ -154,18 +147,15 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
|
||||
T exchange_value,
|
||||
atomic_memory_order order) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
aarch64_atomic_stub_t stub = aarch64_atomic_cmpxchg_4_impl;
|
||||
if (order == memory_order_relaxed) {
|
||||
T old_value = atomic_fastcall(stub, dest,
|
||||
compare_value, exchange_value);
|
||||
return old_value;
|
||||
} else {
|
||||
FULL_MEM_BARRIER;
|
||||
T old_value = atomic_fastcall(stub, dest,
|
||||
compare_value, exchange_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return old_value;
|
||||
aarch64_atomic_stub_t stub;
|
||||
switch (order) {
|
||||
case memory_order_relaxed:
|
||||
stub = aarch64_atomic_cmpxchg_4_relaxed_impl; break;
|
||||
default:
|
||||
stub = aarch64_atomic_cmpxchg_4_impl; break;
|
||||
}
|
||||
|
||||
return atomic_fastcall(stub, dest, compare_value, exchange_value);
|
||||
}
|
||||
|
||||
template<>
|
||||
@ -175,18 +165,15 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
|
||||
T exchange_value,
|
||||
atomic_memory_order order) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
aarch64_atomic_stub_t stub = aarch64_atomic_cmpxchg_8_impl;
|
||||
if (order == memory_order_relaxed) {
|
||||
T old_value = atomic_fastcall(stub, dest,
|
||||
compare_value, exchange_value);
|
||||
return old_value;
|
||||
} else {
|
||||
FULL_MEM_BARRIER;
|
||||
T old_value = atomic_fastcall(stub, dest,
|
||||
compare_value, exchange_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return old_value;
|
||||
aarch64_atomic_stub_t stub;
|
||||
switch (order) {
|
||||
case memory_order_relaxed:
|
||||
stub = aarch64_atomic_cmpxchg_8_relaxed_impl; break;
|
||||
default:
|
||||
stub = aarch64_atomic_cmpxchg_8_impl; break;
|
||||
}
|
||||
|
||||
return atomic_fastcall(stub, dest, compare_value, exchange_value);
|
||||
}
|
||||
|
||||
template<size_t byte_size>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user