mirror of
https://github.com/openjdk/jdk.git
synced 2026-04-03 19:48:46 +00:00
8366441: AArch64: Support WFET in OnSpinWait
Co-authored-by: Stuart Monteith <smonteith@openjdk.org> Co-authored-by: Andrew Haley <aph@openjdk.org> Reviewed-by: aph, eastigeevich
This commit is contained in:
parent
f45b48ba98
commit
dfe438d057
@ -1095,6 +1095,10 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
void wfet(Register rt) {
|
||||
system(0b00, 0b011, 0b0001, 0b0000, 0b000, rt);
|
||||
}
|
||||
|
||||
// we only provide mrs and msr for the special purpose system
|
||||
// registers where op1 (instr[20:19]) == 11
|
||||
// n.b msr has L (instr[21]) == 0 mrs has L == 1
|
||||
|
||||
@ -115,14 +115,18 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
"Value -1 means off.") \
|
||||
range(-1, 4096) \
|
||||
product(ccstr, OnSpinWaitInst, "yield", DIAGNOSTIC, \
|
||||
"The instruction to use to implement " \
|
||||
"java.lang.Thread.onSpinWait()." \
|
||||
"Valid values are: none, nop, isb, yield, sb.") \
|
||||
"The instruction to use for java.lang.Thread.onSpinWait(). " \
|
||||
"Valid values are: none, nop, isb, yield, sb, wfet.") \
|
||||
constraint(OnSpinWaitInstNameConstraintFunc, AtParse) \
|
||||
product(uint, OnSpinWaitInstCount, 1, DIAGNOSTIC, \
|
||||
"The number of OnSpinWaitInst instructions to generate." \
|
||||
"It cannot be used with OnSpinWaitInst=none.") \
|
||||
"The number of OnSpinWaitInst instructions to generate. " \
|
||||
"It cannot be used with OnSpinWaitInst=none. " \
|
||||
"For OnSpinWaitInst=wfet it must be 1.") \
|
||||
range(1, 99) \
|
||||
product(uint, OnSpinWaitDelay, 40, DIAGNOSTIC, \
|
||||
"The minimum delay (in nanoseconds) of the OnSpinWait loop. " \
|
||||
"It can only be used with -XX:OnSpinWaitInst=wfet.") \
|
||||
range(1, 1000) \
|
||||
product(ccstr, UseBranchProtection, "none", \
|
||||
"Branch Protection to use: none, standard, pac-ret") \
|
||||
product(bool, AlwaysMergeDMB, true, DIAGNOSTIC, \
|
||||
|
||||
@ -6807,6 +6807,9 @@ void MacroAssembler::spin_wait() {
|
||||
assert(VM_Version::supports_sb(), "current CPU does not support SB instruction");
|
||||
sb();
|
||||
break;
|
||||
case SpinWait::WFET:
|
||||
spin_wait_wfet(VM_Version::spin_wait_desc().delay());
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
@ -6814,6 +6817,28 @@ void MacroAssembler::spin_wait() {
|
||||
block_comment("}");
|
||||
}
|
||||
|
||||
void MacroAssembler::spin_wait_wfet(int delay_ns) {
|
||||
// The sequence assumes CNTFRQ_EL0 is fixed to 1GHz. The assumption is valid
|
||||
// starting from Armv8.6, according to the "D12.1.2 The system counter" of the
|
||||
// Arm Architecture Reference Manual for A-profile architecture version M.a.a.
|
||||
// This is sufficient because FEAT_WFXT is introduced from Armv8.6.
|
||||
Register target = rscratch1;
|
||||
Register current = rscratch2;
|
||||
get_cntvctss_el0(current);
|
||||
add(target, current, delay_ns);
|
||||
|
||||
Label L_wait_loop;
|
||||
bind(L_wait_loop);
|
||||
|
||||
wfet(target);
|
||||
get_cntvctss_el0(current);
|
||||
|
||||
cmp(current, target);
|
||||
br(LT, L_wait_loop);
|
||||
|
||||
sb();
|
||||
}
|
||||
|
||||
// Stack frame creation/removal
|
||||
|
||||
void MacroAssembler::enter(bool strip_ret_addr) {
|
||||
|
||||
@ -660,6 +660,14 @@ public:
|
||||
msr(0b011, 0b0100, 0b0010, 0b000, reg);
|
||||
}
|
||||
|
||||
// CNTVCTSS_EL0: op1 == 011
|
||||
// CRn == 1110
|
||||
// CRm == 0000
|
||||
// op2 == 110
|
||||
inline void get_cntvctss_el0(Register reg) {
|
||||
mrs(0b011, 0b1110, 0b0000, 0b110, reg);
|
||||
}
|
||||
|
||||
// idiv variant which deals with MINLONG as dividend and -1 as divisor
|
||||
int corrected_idivl(Register result, Register ra, Register rb,
|
||||
bool want_remainder, Register tmp = rscratch1);
|
||||
@ -1724,6 +1732,7 @@ public:
|
||||
|
||||
// Code for java.lang.Thread::onSpinWait() intrinsic.
|
||||
void spin_wait();
|
||||
void spin_wait_wfet(int delay_ns);
|
||||
|
||||
void fast_lock(Register basic_lock, Register obj, Register t1, Register t2, Register t3, Label& slow);
|
||||
void fast_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow);
|
||||
|
||||
@ -32,6 +32,7 @@ bool SpinWait::supports(const char *name) {
|
||||
strcmp(name, "isb") == 0 ||
|
||||
strcmp(name, "yield") == 0 ||
|
||||
strcmp(name, "sb") == 0 ||
|
||||
strcmp(name, "wfet") == 0 ||
|
||||
strcmp(name, "none") == 0);
|
||||
}
|
||||
|
||||
@ -46,6 +47,8 @@ SpinWait::Inst SpinWait::from_name(const char* name) {
|
||||
return SpinWait::YIELD;
|
||||
} else if (strcmp(name, "sb") == 0) {
|
||||
return SpinWait::SB;
|
||||
} else if (strcmp(name, "wfet") == 0) {
|
||||
return SpinWait::WFET;
|
||||
}
|
||||
|
||||
return SpinWait::NONE;
|
||||
|
||||
@ -24,6 +24,8 @@
|
||||
#ifndef CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
|
||||
#define CPU_AARCH64_SPIN_WAIT_AARCH64_HPP
|
||||
|
||||
#include "utilities/debug.hpp"
|
||||
|
||||
class SpinWait {
|
||||
public:
|
||||
enum Inst {
|
||||
@ -31,21 +33,30 @@ public:
|
||||
NOP,
|
||||
ISB,
|
||||
YIELD,
|
||||
SB
|
||||
SB,
|
||||
WFET
|
||||
};
|
||||
|
||||
private:
|
||||
Inst _inst;
|
||||
int _count;
|
||||
int _delay;
|
||||
|
||||
Inst from_name(const char *name);
|
||||
|
||||
public:
|
||||
SpinWait(Inst inst = NONE, int count = 0) : _inst(inst), _count(inst == NONE ? 0 : count) {}
|
||||
SpinWait(const char *name, int count) : SpinWait(from_name(name), count) {}
|
||||
SpinWait(Inst inst = NONE, int count = 0, int delay = -1)
|
||||
: _inst(inst), _count(inst == NONE ? 0 : count), _delay(delay) {}
|
||||
SpinWait(const char *name, int count, int delay)
|
||||
: SpinWait(from_name(name), count, delay) {}
|
||||
|
||||
Inst inst() const { return _inst; }
|
||||
int inst_count() const { return _count; }
|
||||
int delay() const {
|
||||
assert(_inst == WFET, "Specifying the delay value is only supported for WFET");
|
||||
assert(_delay > 0, "The delay value must be positive");
|
||||
return _delay;
|
||||
}
|
||||
|
||||
static bool supports(const char *name);
|
||||
};
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
|
||||
* Copyright 2025 Arm Limited and/or its affiliates.
|
||||
* Copyright 2025, 2026 Arm Limited and/or its affiliates.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -55,11 +55,33 @@ SpinWait VM_Version::_spin_wait;
|
||||
const char* VM_Version::_features_names[MAX_CPU_FEATURES] = { nullptr };
|
||||
|
||||
static SpinWait get_spin_wait_desc() {
|
||||
SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount);
|
||||
SpinWait spin_wait(OnSpinWaitInst, OnSpinWaitInstCount, OnSpinWaitDelay);
|
||||
if (spin_wait.inst() == SpinWait::SB && !VM_Version::supports_sb()) {
|
||||
vm_exit_during_initialization("OnSpinWaitInst is SB but current CPU does not support SB instruction");
|
||||
}
|
||||
|
||||
if (spin_wait.inst() == SpinWait::WFET) {
|
||||
if (!VM_Version::supports_wfxt()) {
|
||||
vm_exit_during_initialization("OnSpinWaitInst is WFET but the CPU does not support the WFET instruction");
|
||||
}
|
||||
|
||||
if (!VM_Version::supports_ecv()) {
|
||||
vm_exit_during_initialization("The CPU does not support the FEAT_ECV required by the -XX:OnSpinWaitInst=wfet implementation");
|
||||
}
|
||||
|
||||
if (!VM_Version::supports_sb()) {
|
||||
vm_exit_during_initialization("The CPU does not support the SB instruction required by the -XX:OnSpinWaitInst=wfet implementation");
|
||||
}
|
||||
|
||||
if (OnSpinWaitInstCount != 1) {
|
||||
vm_exit_during_initialization("OnSpinWaitInstCount for OnSpinWaitInst 'wfet' must be 1");
|
||||
}
|
||||
} else {
|
||||
if (!FLAG_IS_DEFAULT(OnSpinWaitDelay)) {
|
||||
vm_exit_during_initialization("OnSpinWaitDelay can only be used with -XX:OnSpinWaitInst=wfet");
|
||||
}
|
||||
}
|
||||
|
||||
return spin_wait;
|
||||
}
|
||||
|
||||
|
||||
@ -159,7 +159,9 @@ public:
|
||||
/* flags above must follow Linux HWCAP */ \
|
||||
decl(SVEBITPERM, svebitperm, 27) \
|
||||
decl(SVE2, sve2, 28) \
|
||||
decl(A53MAC, a53mac, 31)
|
||||
decl(A53MAC, a53mac, 31) \
|
||||
decl(ECV, ecv, 32) \
|
||||
decl(WFXT, wfxt, 33)
|
||||
|
||||
enum Feature_Flag {
|
||||
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = bit,
|
||||
|
||||
@ -620,6 +620,8 @@ extern "C" {
|
||||
assert(VM_Version::supports_sb(), "current CPU does not support SB instruction");
|
||||
asm volatile(".inst 0xd50330ff" : : : "memory");
|
||||
break;
|
||||
case SpinWait::WFET:
|
||||
ShouldNotReachHere();
|
||||
#ifdef ASSERT
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
|
||||
@ -95,6 +95,13 @@
|
||||
#define HWCAP2_SVEBITPERM (1 << 4)
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP2_ECV
|
||||
#define HWCAP2_ECV (1 << 19)
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP2_WFXT
|
||||
#define HWCAP2_WFXT (1u << 31)
|
||||
#endif
|
||||
#ifndef PR_SVE_GET_VL
|
||||
// For old toolchains which do not have SVE related macros defined.
|
||||
#define PR_SVE_SET_VL 50
|
||||
@ -158,6 +165,12 @@ void VM_Version::get_os_cpu_info() {
|
||||
if (auxv2 & HWCAP2_SVEBITPERM) {
|
||||
set_feature(CPU_SVEBITPERM);
|
||||
}
|
||||
if (auxv2 & HWCAP2_ECV) {
|
||||
set_feature(CPU_ECV);
|
||||
}
|
||||
if (auxv2 & HWCAP2_WFXT) {
|
||||
set_feature(CPU_WFXT);
|
||||
}
|
||||
|
||||
uint64_t ctr_el0;
|
||||
uint64_t dczid_el0;
|
||||
|
||||
@ -153,6 +153,20 @@ JVMFlag::Error OnSpinWaitInstNameConstraintFunc(ccstr value, bool verbose) {
|
||||
return JVMFlag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
|
||||
#ifdef LINUX
|
||||
if (strcmp(value, "wfet") == 0) {
|
||||
if (UnlockExperimentalVMOptions) {
|
||||
return JVMFlag::SUCCESS;
|
||||
} else {
|
||||
JVMFlag::printError(verbose,
|
||||
"'wfet' value for OnSpinWaitInst is experimental and "
|
||||
"must be enabled via -XX:+UnlockExperimentalVMOptions.\n"
|
||||
"Error: The unlock option must precede 'OnSpinWaitInst'.\n");
|
||||
return JVMFlag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (strcmp(value, "nop") != 0 &&
|
||||
strcmp(value, "isb") != 0 &&
|
||||
strcmp(value, "yield") != 0 &&
|
||||
@ -160,7 +174,7 @@ JVMFlag::Error OnSpinWaitInstNameConstraintFunc(ccstr value, bool verbose) {
|
||||
strcmp(value, "none") != 0) {
|
||||
JVMFlag::printError(verbose,
|
||||
"Unrecognized value %s for OnSpinWaitInst. Must be one of the following: "
|
||||
"nop, isb, yield, sb, none\n",
|
||||
"nop, isb, yield, sb," LINUX_ONLY(" wfet,") " none\n",
|
||||
value);
|
||||
return JVMFlag::VIOLATES_CONSTRAINT;
|
||||
}
|
||||
|
||||
@ -184,6 +184,8 @@ public class AArch64 extends Architecture {
|
||||
SVEBITPERM,
|
||||
SVE2,
|
||||
A53MAC,
|
||||
ECV,
|
||||
WFXT,
|
||||
FPHP,
|
||||
ASIMDHP,
|
||||
}
|
||||
|
||||
@ -391,6 +391,11 @@ class SystemRegOp(Instruction):
|
||||
self.CRn = 0b0100
|
||||
self.CRm = 0b0010
|
||||
self.op2 = 0b000
|
||||
elif self.system_reg == 'cntvctss_el0':
|
||||
self.op1 = 0b011
|
||||
self.CRn = 0b1110
|
||||
self.CRm = 0b0000
|
||||
self.op2 = 0b110
|
||||
|
||||
def generate(self):
|
||||
self.reg = [GeneralRegister().generate()]
|
||||
@ -1607,6 +1612,8 @@ generate (Op, ["nop", "yield", "wfe", "sev", "sevl",
|
||||
"pacia1716", "paciasp", "paciaz", "pacib1716", "pacibsp", "pacibz",
|
||||
"eret", "drps", "isb", "sb",])
|
||||
|
||||
generate (OneRegOp, ["wfet"])
|
||||
|
||||
# Ensure the "i" is not stripped off the end of the instruction
|
||||
generate (PostfixExceptionOp, ["wfi", "xpaclri"])
|
||||
|
||||
@ -1623,7 +1630,7 @@ generate (OneRegOp, ["br", "blr",
|
||||
for system_reg in ["fpsr", "nzcv"]:
|
||||
generate (SystemOneRegOp, [ ["msr", system_reg] ])
|
||||
|
||||
for system_reg in ["fpsr", "nzcv", "dczid_el0", "ctr_el0"]:
|
||||
for system_reg in ["fpsr", "nzcv", "dczid_el0", "ctr_el0", "cntvctss_el0"]:
|
||||
generate (OneRegSystemOp, [ ["mrs", system_reg] ])
|
||||
|
||||
# Ensure the "i" is not stripped off the end of the instruction
|
||||
@ -2275,9 +2282,9 @@ outfile.write("forth:\n")
|
||||
|
||||
outfile.close()
|
||||
|
||||
# compile for sve with armv9-a+sha3+sve2-bitperm because of SHA3 crypto extension and SVE2 bitperm instructions.
|
||||
# compile for sve with armv9.2-a+sha3+sve2-bitperm because of SHA3 crypto extension and SVE2 bitperm instructions.
|
||||
# armv9-a enables sve and sve2 by default.
|
||||
subprocess.check_call([AARCH64_AS, "-march=armv9-a+sha3+sve2-bitperm", "aarch64ops.s", "-o", "aarch64ops.o"])
|
||||
subprocess.check_call([AARCH64_AS, "-march=armv9.2-a+sha3+sve2-bitperm", "aarch64ops.s", "-o", "aarch64ops.o"])
|
||||
|
||||
print
|
||||
print "/*"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test TestOnSpinWaitAArch64
|
||||
* @test id=TestOnSpinWaitAArch64
|
||||
* @summary Checks that java.lang.Thread.onSpinWait is intrinsified with instructions specified with '-XX:OnSpinWaitInst' and '-XX:OnSpinWaitInstCount'
|
||||
* @bug 8186670
|
||||
* @library /test/lib
|
||||
@ -41,6 +41,22 @@
|
||||
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 sb 1
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test id=TestOnSpinWaitAArch64-wfet
|
||||
* @summary Checks that java.lang.Thread.onSpinWait is intrinsified when -XX:OnSpinWaitInst=wfet is used
|
||||
* @bug 8366441
|
||||
* @library /test/lib
|
||||
*
|
||||
* @requires vm.flagless
|
||||
* @requires (os.arch=="aarch64" & os.family=="linux")
|
||||
* @requires vm.debug
|
||||
*
|
||||
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c2 wfet 1 1
|
||||
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c2 wfet 1 1000
|
||||
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 wfet 1 1
|
||||
* @run driver compiler.onSpinWait.TestOnSpinWaitAArch64 c1 wfet 1 1000
|
||||
*/
|
||||
|
||||
package compiler.onSpinWait;
|
||||
|
||||
import java.util.Arrays;
|
||||
@ -56,6 +72,7 @@ public class TestOnSpinWaitAArch64 {
|
||||
String compiler = args[0];
|
||||
String spinWaitInst = args[1];
|
||||
String spinWaitInstCount = args[2];
|
||||
String spinWaitDelay = (args.length >= 4 ? args[3] : "");
|
||||
ArrayList<String> command = new ArrayList<String>();
|
||||
command.add("-XX:+IgnoreUnrecognizedVMOptions");
|
||||
command.add("-showversion");
|
||||
@ -70,8 +87,14 @@ public class TestOnSpinWaitAArch64 {
|
||||
throw new RuntimeException("Unknown compiler: " + compiler);
|
||||
}
|
||||
command.add("-Xbatch");
|
||||
if ("wfet".equals(spinWaitInst)) {
|
||||
command.add("-XX:+UnlockExperimentalVMOptions");
|
||||
}
|
||||
command.add("-XX:OnSpinWaitInst=" + spinWaitInst);
|
||||
command.add("-XX:OnSpinWaitInstCount=" + spinWaitInstCount);
|
||||
if (!spinWaitDelay.isEmpty()) {
|
||||
command.add("-XX:OnSpinWaitDelay=" + spinWaitDelay);
|
||||
}
|
||||
command.add("-XX:CompileCommand=compileonly," + Launcher.class.getName() + "::" + "test");
|
||||
command.add("-XX:CompileCommand=print," + Launcher.class.getName() + "::" + "test");
|
||||
command.add(Launcher.class.getName());
|
||||
@ -85,6 +108,14 @@ public class TestOnSpinWaitAArch64 {
|
||||
return;
|
||||
}
|
||||
|
||||
if ("wfet".equals(spinWaitInst) &&
|
||||
(analyzer.contains("CPU does not support the SB instruction") ||
|
||||
analyzer.contains("CPU does not support the FEAT_ECV") ||
|
||||
analyzer.contains("CPU does not support the WFET instruction"))) {
|
||||
System.out.println("Skipping the test. The CPU does not support SB or WFET instruction, or FEAT_ECV.");
|
||||
return;
|
||||
}
|
||||
|
||||
analyzer.shouldHaveExitValue(0);
|
||||
|
||||
System.out.println(analyzer.getOutput());
|
||||
@ -101,6 +132,9 @@ public class TestOnSpinWaitAArch64 {
|
||||
return "3f2003d5";
|
||||
} else if ("sb".equals(spinWaitInst)) {
|
||||
return "ff3003d5";
|
||||
} else if ("wfet".equals(spinWaitInst)) {
|
||||
// This assumes rscratch1 is r8.
|
||||
return "081003d5";
|
||||
} else {
|
||||
throw new RuntimeException("Unknown spin wait instruction: " + spinWaitInst);
|
||||
}
|
||||
@ -166,7 +200,7 @@ public class TestOnSpinWaitAArch64 {
|
||||
// When code is disassembled, we have one instruction per line.
|
||||
// Otherwise, there can be multiple hex instructions separated by '|'.
|
||||
foundCount += (int)Arrays.stream(line.split("\\|"))
|
||||
.takeWhile(i -> i.startsWith(expectedInst))
|
||||
.filter(i -> i.startsWith(expectedInst))
|
||||
.count();
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user