8366441: AArch64: Support WFET in OnSpinWait

Co-authored-by: Stuart Monteith <smonteith@openjdk.org>
Co-authored-by: Andrew Haley <aph@openjdk.org>
Reviewed-by: aph, eastigeevich
This commit is contained in:
Ruben Ayrapetyan 2026-03-31 10:14:26 +00:00 committed by Fei Gao
parent f45b48ba98
commit dfe438d057
15 changed files with 1299 additions and 1140 deletions

View File

@ -1095,6 +1095,10 @@ public:
#undef INSN
void wfet(Register rt) {
system(0b00, 0b011, 0b0001, 0b0000, 0b000, rt);
}
// we only provide mrs and msr for the special purpose system
// registers where op1 (instr[20:19]) == 11
// n.b msr has L (instr[21]) == 0 mrs has L == 1

View File

@ -115,14 +115,18 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Value -1 means off.") \
range(-1, 4096) \
product(ccstr, OnSpinWaitInst, "yield", DIAGNOSTIC, \
"The instruction to use to implement " \
"java.lang.Thread.onSpinWait()." \
"Valid values are: none, nop, isb, yield, sb.") \
"The instruction to use for java.lang.Thread.onSpinWait(). " \
"Valid values are: none, nop, isb, yield, sb, wfet.") \
constraint(OnSpinWaitInstNameConstraintFunc, AtParse) \
product(uint, OnSpinWaitInstCount, 1, DIAGNOSTIC, \
"The number of OnSpinWaitInst instructions to generate." \
"It cannot be used with OnSpinWaitInst=none.") \
"The number of OnSpinWaitInst instructions to generate. " \
"It cannot be used with OnSpinWaitInst=none. " \
"For OnSpinWaitInst=wfet it must be 1.") \
range(1, 99) \
product(uint, OnSpinWaitDelay, 40, DIAGNOSTIC, \
"The minimum delay (in nanoseconds) of the OnSpinWait loop. " \
"It can only be used with -XX:OnSpinWaitInst=wfet.") \
range(1, 1000) \
product(ccstr, UseBranchProtection, "none", \
"Branch Protection to use: none, standard, pac-ret") \
product(bool, AlwaysMergeDMB, true, DIAGNOSTIC, \

View File

@ -6807,6 +6807,9 @@ void MacroAssembler::spin_wait() {
assert(VM_Version::supports_sb(), "current CPU does not support SB instruction");
sb();
break;
case SpinWait::WFET:
spin_wait_wfet(VM_Version::spin_wait_desc().delay());
break;
default:
ShouldNotReachHere();
}
@ -6814,6 +6817,28 @@ void MacroAssembler::spin_wait() {
block_comment("}");
}
void MacroAssembler::spin_wait_wfet(int delay_ns) {
// The sequence assumes CNTFRQ_EL0 is fixed to 1GHz. The assumption is valid
// starting from Armv8.6, according to the "D12.1.2 The system counter" of the
// Arm Architecture Reference Manual for A-profile architecture version M.a.a.
// This is sufficient because FEAT_WFXT is introduced from Armv8.6.
Register target = rscratch1;
Register current = rscratch2;
get_cntvctss_el0(current);
add(target, current, delay_ns);
Label L_wait_loop;
bind(L_wait_loop);
wfet(target);
get_cntvctss_el0(current);
cmp(current, target);
br(LT, L_wait_loop);
sb();
}
// Stack frame creation/removal
void MacroAssembler::enter(bool strip_ret_addr) {

View File

@ -660,6 +660,14 @@ public:
msr(0b011, 0b0100, 0b0010, 0b000, reg);
}
// CNTVCTSS_EL0: op1 == 011
// CRn == 1110
// CRm == 0000
// op2 == 110
inline void get_cntvctss_el0(Register reg) {
mrs(0b011, 0b1110, 0b0000, 0b110, reg);
}
// idiv variant which deals with MINLONG as dividend and -1 as divisor
int corrected_idivl(Register result, Register ra, Register rb,
bool want_remainder, Register tmp = rscratch1);
@ -1724,6 +1732,7 @@ public:
// Code for java.lang.Thread::onSpinWait() intrinsic.
void spin_wait();
void spin_wait_wfet(int delay_ns);
void fast_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow);
void fast_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow);

View File

@ -32,6 +32,7 @@ bool SpinWait::supports(const char *name) {
strcmp(name, "isb") == 0 ||
strcmp(name, "yield") == 0 ||
strcmp(name, "sb") == 0 ||
strcmp(name, "wfet") == 0 ||
strcmp(name, "none") == 0);
}
@ -46,6 +47,8 @@ SpinWait::Inst SpinWait::from_name(const char* name) {
return SpinWait::YIELD;
} else if (strcmp(name, "sb") == 0) {
return SpinWait::SB;
} else if (strcmp(name, "wfet") == 0) {
return SpinWait::WFET;
}
return SpinWait::NONE;

View File

@ -24,6 +24,8 @@
#ifndef CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
#define CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
#include "utilities/debug.hpp"
class SpinWait {
public:
enum Inst {
@ -31,21 +33,30 @@ public:
NOP,
ISB,
YIELD,
SB
SB,
WFET
};
private:
Inst _inst;
int _count;
int _delay;
Inst from_name(const char *name);
public:
SpinWait(Inst inst = NONE, int count = 0) : _inst(inst), _count(inst == NONE ? 0 : count) {}
SpinWait(const char *name, int count) : SpinWait(from_name(name), count) {}
SpinWait(Inst inst = NONE, int count = 0, int delay = -1)
: _inst(inst), _count(inst == NONE ? 0 : count), _delay(delay) {}
SpinWait(const char *name, int count, int delay)
: SpinWait(from_name(name), count, delay) {}
Inst inst() const { return _inst; }
int inst_count() const { return _count; }
int delay() const {
assert(_inst == WFET, "Specifying the delay value is only supported for WFET");
assert(_delay > 0, "The delay value must be positive");
return _delay;
}
static bool supports(const char *name);
};

View File

@ -1,7 +1,7 @@
/*
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
* Copyright 2025 Arm Limited and/or its affiliates.
* Copyright 2025, 2026 Arm Limited and/or its affiliates.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -55,11 +55,33 @@ SpinWait VM_Version::_spin_wait;
const char* VM_Version::_features_names[MAX_CPU_FEATURES] = { nullptr };
static SpinWait get_spin_wait_desc() {
SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount);
SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount, OnSpinWaitDelay);
if (spin_wait.inst() == SpinWait::SB && !VM_Version::supports_sb()) {
vm_exit_during_initialization("OnSpinWaitInst is SB but current CPU does not support SB instruction");
}
if (spin_wait.inst() == SpinWait::WFET) {
if (!VM_Version::supports_wfxt()) {
vm_exit_during_initialization("OnSpinWaitInst is WFET but the CPU does not support the WFET instruction");
}
if (!VM_Version::supports_ecv()) {
vm_exit_during_initialization("The CPU does not support the FEAT_ECV required by the -XX:OnSpinWaitInst=wfet implementation");
}
if (!VM_Version::supports_sb()) {
vm_exit_during_initialization("The CPU does not support the SB instruction required by the -XX:OnSpinWaitInst=wfet implementation");
}
if (OnSpinWaitInstCount != 1) {
vm_exit_during_initialization("OnSpinWaitInstCount for OnSpinWaitInst 'wfet' must be 1");
}
} else {
if (!FLAG_IS_DEFAULT(OnSpinWaitDelay)) {
vm_exit_during_initialization("OnSpinWaitDelay can only be used with -XX:OnSpinWaitInst=wfet");
}
}
return spin_wait;
}

View File

@ -159,7 +159,9 @@ public:
/* flags above must follow Linux HWCAP */ \
decl(SVEBITPERM, svebitperm, 27) \
decl(SVE2, sve2, 28) \
decl(A53MAC, a53mac, 31)
decl(A53MAC, a53mac, 31) \
decl(ECV, ecv, 32) \
decl(WFXT, wfxt, 33)
enum Feature_Flag {
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = bit,

View File

@ -620,6 +620,8 @@ extern "C" {
assert(VM_Version::supports_sb(), "current CPU does not support SB instruction");
asm volatile(".inst 0xd50330ff" : : : "memory");
break;
case SpinWait::WFET:
ShouldNotReachHere();
#ifdef ASSERT
default:
ShouldNotReachHere();

View File

@ -95,6 +95,13 @@
#define HWCAP2_SVEBITPERM (1 << 4)
#endif
#ifndef HWCAP2_ECV
#define HWCAP2_ECV (1 << 19)
#endif
#ifndef HWCAP2_WFXT
#define HWCAP2_WFXT (1u << 31)
#endif
#ifndef PR_SVE_GET_VL
// For old toolchains which do not have SVE related macros defined.
#define PR_SVE_SET_VL 50
@ -158,6 +165,12 @@ void VM_Version::get_os_cpu_info() {
if (auxv2 & HWCAP2_SVEBITPERM) {
set_feature(CPU_SVEBITPERM);
}
if (auxv2 & HWCAP2_ECV) {
set_feature(CPU_ECV);
}
if (auxv2 & HWCAP2_WFXT) {
set_feature(CPU_WFXT);
}
uint64_t ctr_el0;
uint64_t dczid_el0;

View File

@ -153,6 +153,20 @@ JVMFlag::Error OnSpinWaitInstNameConstraintFunc(ccstr value, bool verbose) {
return JVMFlag::VIOLATES_CONSTRAINT;
}
#ifdef LINUX
if (strcmp(value, "wfet") == 0) {
if (UnlockExperimentalVMOptions) {
return JVMFlag::SUCCESS;
} else {
JVMFlag::printError(verbose,
"'wfet' value for OnSpinWaitInst is experimental and "
"must be enabled via -XX:+UnlockExperimentalVMOptions.\n"
"Error: The unlock option must precede 'OnSpinWaitInst'.\n");
return JVMFlag::VIOLATES_CONSTRAINT;
}
}
#endif
if (strcmp(value, "nop") != 0 &&
strcmp(value, "isb") != 0 &&
strcmp(value, "yield") != 0 &&
@ -160,7 +174,7 @@ JVMFlag::Error OnSpinWaitInstNameConstraintFunc(ccstr value, bool verbose) {
strcmp(value, "none") != 0) {
JVMFlag::printError(verbose,
"Unrecognized value %s for OnSpinWaitInst. Must be one of the following: "
"nop, isb, yield, sb, none\n",
"nop, isb, yield, sb," LINUX_ONLY(" wfet,") " none\n",
value);
return JVMFlag::VIOLATES_CONSTRAINT;
}

View File

@ -184,6 +184,8 @@ public class AArch64 extends Architecture {
SVEBITPERM,
SVE2,
A53MAC,
ECV,
WFXT,
FPHP,
ASIMDHP,
}

View File

@ -391,6 +391,11 @@ class SystemRegOp(Instruction):
self.CRn = 0b0100
self.CRm = 0b0010
self.op2 = 0b000
elif self.system_reg == 'cntvctss_el0':
self.op1 = 0b011
self.CRn = 0b1110
self.CRm = 0b0000
self.op2 = 0b110
def generate(self):
self.reg = [GeneralRegister().generate()]
@ -1607,6 +1612,8 @@ generate (Op, ["nop", "yield", "wfe", "sev", "sevl",
"pacia1716", "paciasp", "paciaz", "pacib1716", "pacibsp", "pacibz",
"eret", "drps", "isb", "sb",])
generate (OneRegOp, ["wfet"])
# Ensure the "i" is not stripped off the end of the instruction
generate (PostfixExceptionOp, ["wfi", "xpaclri"])
@ -1623,7 +1630,7 @@ generate (OneRegOp, ["br", "blr",
for system_reg in ["fpsr", "nzcv"]:
generate (SystemOneRegOp, [ ["msr", system_reg] ])
for system_reg in ["fpsr", "nzcv", "dczid_el0", "ctr_el0"]:
for system_reg in ["fpsr", "nzcv", "dczid_el0", "ctr_el0", "cntvctss_el0"]:
generate (OneRegSystemOp, [ ["mrs", system_reg] ])
# Ensure the "i" is not stripped off the end of the instruction
@ -2275,9 +2282,9 @@ outfile.write("forth:\n")
outfile.close()
# compile for sve with armv9-a+sha3+sve2-bitperm because of SHA3 crypto extension and SVE2 bitperm instructions.
# compile for sve with armv9.2-a+sha3+sve2-bitperm because of SHA3 crypto extension and SVE2 bitperm instructions.
# armv9-a enables sve and sve2 by default.
subprocess.check_call([AARCH64_AS, "-march=armv9-a+sha3+sve2-bitperm", "aarch64ops.s", "-o", "aarch64ops.o"])
subprocess.check_call([AARCH64_AS, "-march=armv9.2-a+sha3+sve2-bitperm", "aarch64ops.s", "-o", "aarch64ops.o"])
print
print "/*"

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,7 @@
*/
/**
* @test TestOnSpinWaitAArch64
* @test id=TestOnSpinWaitAArch64
* @summary Checks that java.lang.Thread.onSpinWait is intrinsified with instructions specified with '-XX:OnSpinWaitInst' and '-XX:OnSpinWaitInstCount'
* @bug 8186670
* @library /test/lib
@ -41,6 +41,22 @@
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 sb 1
*/
/**
* @test id=TestOnSpinWaitAArch64-wfet
* @summary Checks that java.lang.Thread.onSpinWait is intrinsified when -XX:OnSpinWaitInst=wfet is used
* @bug 8366441
* @library /test/lib
*
* @requires vm.flagless
* @requires (os.arch=="aarch64" & os.family=="linux")
* @requires vm.debug
*
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c2 wfet 1 1
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c2 wfet 1 1000
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 wfet 1 1
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 wfet 1 1000
*/
package compiler.onSpinWait;
import java.util.Arrays;
@ -56,6 +72,7 @@ public class TestOnSpinWaitAArch64 {
String compiler = args[0];
String spinWaitInst = args[1];
String spinWaitInstCount = args[2];
String spinWaitDelay = (args.length >= 4 ? args[3] : "");
ArrayList<String> command = new ArrayList<String>();
command.add("-XX:+IgnoreUnrecognizedVMOptions");
command.add("-showversion");
@ -70,8 +87,14 @@ public class TestOnSpinWaitAArch64 {
throw new RuntimeException("Unknown compiler: " + compiler);
}
command.add("-Xbatch");
if ("wfet".equals(spinWaitInst)) {
command.add("-XX:+UnlockExperimentalVMOptions");
}
command.add("-XX:OnSpinWaitInst=" + spinWaitInst);
command.add("-XX:OnSpinWaitInstCount=" + spinWaitInstCount);
if (!spinWaitDelay.isEmpty()) {
command.add("-XX:OnSpinWaitDelay=" + spinWaitDelay);
}
command.add("-XX:CompileCommand=compileonly," + Launcher.class.getName() + "::" + "test");
command.add("-XX:CompileCommand=print," + Launcher.class.getName() + "::" + "test");
command.add(Launcher.class.getName());
@ -85,6 +108,14 @@ public class TestOnSpinWaitAArch64 {
return;
}
if ("wfet".equals(spinWaitInst) &&
(analyzer.contains("CPU does not support the SB instruction") ||
analyzer.contains("CPU does not support the FEAT_ECV") ||
analyzer.contains("CPU does not support the WFET instruction"))) {
System.out.println("Skipping the test. The CPU does not support SB or WFET instruction, or FEAT_ECV.");
return;
}
analyzer.shouldHaveExitValue(0);
System.out.println(analyzer.getOutput());
@ -101,6 +132,9 @@ public class TestOnSpinWaitAArch64 {
return "3f2003d5";
} else if ("sb".equals(spinWaitInst)) {
return "ff3003d5";
} else if ("wfet".equals(spinWaitInst)) {
// This assumes rscratch1 is r8.
return "081003d5";
} else {
throw new RuntimeException("Unknown spin wait instruction: " + spinWaitInst);
}
@ -166,7 +200,7 @@ public class TestOnSpinWaitAArch64 {
// When code is disassembled, we have one instruction per line.
// Otherwise, there can be multiple hex instructions separated by '|'.
foundCount += (int)Arrays.stream(line.split("\\|"))
.takeWhile(i -> i.startsWith(expectedInst))
.filter(i -> i.startsWith(expectedInst))
.count();
}