8345298: RISC-V: Add riscv backend for Float16 operations - scalar

Reviewed-by: rehn, fyang
This commit is contained in:
Hamlin Li 2025-03-13 08:16:53 +00:00
parent 6241d09657
commit a33b1f7f64
13 changed files with 444 additions and 50 deletions

View File

@ -1316,6 +1316,7 @@ enum operand_size { int8, int16, int32, uint32, int64 };
public:
void flh(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b001>(Rd, Rs, offset); }
void flw(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b010>(Rd, Rs, offset); }
void _fld(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b011>(Rd, Rs, offset); }
@ -1397,6 +1398,46 @@ enum operand_size { int8, int16, int32, uint32, int64 };
fp_base<H_16_hp, 0b11100>(Rd, Rs1, 0b00000, 0b000);
}
void fadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00000>(Rd, Rs1, Rs2, rm);
}
void fsub_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00001>(Rd, Rs1, Rs2, rm);
}
void fmul_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00010>(Rd, Rs1, Rs2, rm);
}
void fdiv_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00011>(Rd, Rs1, Rs2, rm);
}
void fsqrt_h(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b01011>(Rd, Rs1, 0b00000, rm);
}
void fmin_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b000);
}
void fmax_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
assert_cond(UseZfh);
fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b001);
}
void fmadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {
assert_cond(UseZfh);
fp_fm<H_16_hp, 0b1000011>(Rd, Rs1, Rs2, Rs3, rm);
}
// -------------- ZFA Instruction Definitions --------------
// Zfa Extension for Additional Floating-Point Instructions
void _fli_s(FloatRegister Rd, uint8_t Rs1) {

View File

@ -2159,27 +2159,68 @@ void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Regis
// Set dst to NaN if any NaN input.
void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2,
bool is_double, bool is_min) {
FLOAT_TYPE ft, bool is_min) {
assert_cond((ft != FLOAT_TYPE::half_precision) || UseZfh);
Label Done, Compare;
is_double ? fclass_d(t0, src1)
: fclass_s(t0, src1);
is_double ? fclass_d(t1, src2)
: fclass_s(t1, src2);
orr(t0, t0, t1);
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
beqz(t0, Compare);
is_double ? fadd_d(dst, src1, src2)
: fadd_s(dst, src1, src2);
j(Done);
switch (ft) {
case FLOAT_TYPE::half_precision:
fclass_h(t0, src1);
fclass_h(t1, src2);
bind(Compare);
if (is_double) {
is_min ? fmin_d(dst, src1, src2)
: fmax_d(dst, src1, src2);
} else {
is_min ? fmin_s(dst, src1, src2)
: fmax_s(dst, src1, src2);
orr(t0, t0, t1);
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
beqz(t0, Compare);
fadd_h(dst, src1, src2);
j(Done);
bind(Compare);
if (is_min) {
fmin_h(dst, src1, src2);
} else {
fmax_h(dst, src1, src2);
}
break;
case FLOAT_TYPE::single_precision:
fclass_s(t0, src1);
fclass_s(t1, src2);
orr(t0, t0, t1);
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
beqz(t0, Compare);
fadd_s(dst, src1, src2);
j(Done);
bind(Compare);
if (is_min) {
fmin_s(dst, src1, src2);
} else {
fmax_s(dst, src1, src2);
}
break;
case FLOAT_TYPE::double_precision:
fclass_d(t0, src1);
fclass_d(t1, src2);
orr(t0, t0, t1);
andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
beqz(t0, Compare);
fadd_d(dst, src1, src2);
j(Done);
bind(Compare);
if (is_min) {
fmin_d(dst, src1, src2);
} else {
fmax_d(dst, src1, src2);
}
break;
default:
ShouldNotReachHere();
}
bind(Done);

View File

@ -172,9 +172,15 @@
}
}
enum class FLOAT_TYPE {
half_precision,
single_precision,
double_precision
};
void minmax_fp(FloatRegister dst,
FloatRegister src1, FloatRegister src2,
bool is_double, bool is_min);
FLOAT_TYPE ft, bool is_min);
void round_double_mode(FloatRegister dst, FloatRegister src, int round_mode,
Register tmp1, Register tmp2, Register tmp3);

View File

@ -1080,6 +1080,7 @@ public:
} \
}
INSN(flh);
INSN(flw);
INSN(fld);

View File

@ -1916,7 +1916,19 @@ bool Matcher::match_rule_supported(int opcode) {
case Op_ConvHF2F:
case Op_ConvF2HF:
return VM_Version::supports_float16_float_conversion();
case Op_ReinterpretS2HF:
case Op_ReinterpretHF2S:
return UseZfh || UseZfhmin;
case Op_AddHF:
case Op_DivHF:
case Op_FmaHF:
case Op_MaxHF:
case Op_MinHF:
case Op_MulHF:
case Op_SubHF:
case Op_SqrtHF:
return UseZfh;
}
return true; // Per default match rules are supported.
@ -3056,6 +3068,27 @@ operand immF0()
interface(CONST_INTER);
%}
// Half Float Immediate
operand immH()
%{
match(ConH);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
// Half Float Immediate: +0.0f.
operand immH0()
%{
predicate(jint_cast(n->geth()) == 0);
match(ConH);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}
operand immIOffset()
%{
predicate(Assembler::is_simm12(n->get_int()));
@ -4907,6 +4940,39 @@ instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
ins_pipe(ialu_imm);
%}
// Load Half Float Constant
instruct loadConH(fRegF dst, immH con) %{
match(Set dst con);
ins_cost(LOAD_COST);
format %{
"flh $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConH"
%}
ins_encode %{
assert(UseZfh || UseZfhmin, "must");
__ flh(as_FloatRegister($dst$$reg), $constantaddress($con));
// TODO: add zfa instructions for half float, and optimize here.
%}
ins_pipe(fp_load_constant_s);
%}
instruct loadConH0(fRegF dst, immH0 con) %{
match(Set dst con);
ins_cost(XFER_COST);
format %{ "fmv.h.x $dst, zr\t# float, #@loadConH0" %}
ins_encode %{
assert(UseZfh || UseZfhmin, "must");
__ fmv_h_x(as_FloatRegister($dst$$reg), zr);
%}
ins_pipe(fp_load_constant_s);
%}
// Load Float Constant
instruct loadConF(fRegF dst, immF con) %{
match(Set dst con);
@ -7291,7 +7357,7 @@ instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
ins_encode %{
__ minmax_fp(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
false /* is_double */, false /* is_min */);
__ FLOAT_TYPE::single_precision, false /* is_min */);
%}
ins_pipe(pipe_class_default);
@ -7307,7 +7373,7 @@ instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
ins_encode %{
__ minmax_fp(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
false /* is_double */, true /* is_min */);
__ FLOAT_TYPE::single_precision, true /* is_min */);
%}
ins_pipe(pipe_class_default);
@ -7323,7 +7389,7 @@ instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
ins_encode %{
__ minmax_fp(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
true /* is_double */, false /* is_min */);
__ FLOAT_TYPE::double_precision, false /* is_min */);
%}
ins_pipe(pipe_class_default);
@ -7339,7 +7405,7 @@ instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
ins_encode %{
__ minmax_fp(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
true /* is_double */, true /* is_min */);
__ FLOAT_TYPE::double_precision, true /* is_min */);
%}
ins_pipe(pipe_class_default);
@ -8009,6 +8075,17 @@ instruct checkCastPP(iRegPNoSp dst)
ins_pipe(pipe_class_empty);
%}
instruct castHH(fRegF dst)
%{
match(Set dst (CastHH dst));
size(0);
format %{ "# castHH of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castFF(fRegF dst)
%{
match(Set dst (CastFF dst));
@ -8171,6 +8248,108 @@ instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF ftmp, iRegINoSp xtmp)
ins_pipe(pipe_slow);
%}
// half precision operations
instruct reinterpretS2HF(fRegF dst, iRegI src)
%{
match(Set dst (ReinterpretS2HF src));
format %{ "fmv.h.x $dst, $src" %}
ins_encode %{
__ fmv_h_x($dst$$FloatRegister, $src$$Register);
%}
ins_pipe(fp_i2f);
%}
instruct convF2HFAndS2HF(fRegF dst, fRegF src)
%{
match(Set dst (ReinterpretS2HF (ConvF2HF src)));
format %{ "convF2HFAndS2HF $dst, $src" %}
ins_encode %{
__ fcvt_h_s($dst$$FloatRegister, $src$$FloatRegister);
%}
ins_pipe(fp_uop_s);
%}
instruct reinterpretHF2S(iRegINoSp dst, fRegF src)
%{
match(Set dst (ReinterpretHF2S src));
format %{ "fmv.x.h $dst, $src" %}
ins_encode %{
__ fmv_x_h($dst$$Register, $src$$FloatRegister);
%}
ins_pipe(fp_f2i);
%}
instruct convHF2SAndHF2F(fRegF dst, fRegF src)
%{
match(Set dst (ConvHF2F (ReinterpretHF2S src)));
format %{ "convHF2SAndHF2F $dst, $src" %}
ins_encode %{
__ fcvt_s_h($dst$$FloatRegister, $src$$FloatRegister);
%}
ins_pipe(fp_uop_s);
%}
instruct sqrt_HF_reg(fRegF dst, fRegF src)
%{
match(Set dst (SqrtHF src));
format %{ "fsqrt.h $dst, $src" %}
ins_encode %{
__ fsqrt_h($dst$$FloatRegister, $src$$FloatRegister);
%}
ins_pipe(fp_sqrt_s);
%}
instruct binOps_HF_reg(fRegF dst, fRegF src1, fRegF src2)
%{
match(Set dst (AddHF src1 src2));
match(Set dst (SubHF src1 src2));
match(Set dst (MulHF src1 src2));
match(Set dst (DivHF src1 src2));
format %{ "binop_hf $dst, $src1, $src2" %}
ins_encode %{
int opcode = this->ideal_Opcode();
switch(opcode) {
case Op_AddHF: __ fadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
case Op_SubHF: __ fsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
case Op_MulHF: __ fmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
case Op_DivHF: __ fdiv_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break;
}
%}
ins_pipe(fp_dop_reg_reg_s);
%}
instruct min_max_HF_reg(fRegF dst, fRegF src1, fRegF src2)
%{
match(Set dst (MinHF src1 src2));
match(Set dst (MaxHF src1 src2));
format %{ "min_max_hf $dst, $src1, $src2" %}
ins_encode %{
int opcode = this->ideal_Opcode();
switch(opcode) {
case Op_MinHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
__ FLOAT_TYPE::half_precision, true);
break;
case Op_MaxHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
__ FLOAT_TYPE::half_precision, false);
break;
default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break;
}
%}
ins_pipe(pipe_class_default);
%}
instruct fma_HF_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3)
%{
match(Set dst (FmaHF src3 (Binary src1 src2)));
format %{ "fmadd.h $dst, $src1, $src2, $src3\t# $dst = $src1 * $src2 + $src3 fma packedH" %}
ins_encode %{
__ fmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
%}
ins_pipe(pipe_class_default);
%}
// float <-> int
instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{

View File

@ -6156,6 +6156,104 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
// x10 = input (float16)
// f10 = result (float)
// t1 = temporary register
address generate_float16ToFloat() {
__ align(CodeEntryAlignment);
StubGenStubId stub_id = StubGenStubId::hf2f_id;
StubCodeMark mark(this, stub_id);
address entry = __ pc();
BLOCK_COMMENT("float16ToFloat:");
FloatRegister dst = f10;
Register src = x10;
Label NaN_SLOW;
assert(VM_Version::supports_float16_float_conversion(), "must");
// On riscv, NaN needs a special process as fcvt does not work in that case.
// On riscv, Inf does not need a special process as fcvt can handle it correctly.
// but we consider to get the slow path to process NaN and Inf at the same time,
// as both of them are rare cases, and if we try to get the slow path to handle
// only NaN case it would sacrifise the performance for normal cases,
// i.e. non-NaN and non-Inf cases.
// check whether it's a NaN or +/- Inf.
__ mv(t0, 0x7c00);
__ andr(t1, src, t0);
// jump to stub processing NaN and Inf cases.
__ beq(t0, t1, NaN_SLOW);
// non-NaN or non-Inf cases, just use built-in instructions.
__ fmv_h_x(dst, src);
__ fcvt_s_h(dst, dst);
__ ret();
__ bind(NaN_SLOW);
// following instructions mainly focus on NaN, as riscv does not handle
// NaN well with fcvt, but the code also works for Inf at the same time.
// construct a NaN in 32 bits from the NaN in 16 bits,
// we need the payloads of non-canonical NaNs to be preserved.
__ mv(t1, 0x7f800000);
// sign-bit was already set via sign-extension if necessary.
__ slli(t0, src, 13);
__ orr(t1, t0, t1);
__ fmv_w_x(dst, t1);
__ ret();
return entry;
}
// f10 = input (float)
// x10 = result (float16)
// f11 = temporary float register
// t1 = temporary register
address generate_floatToFloat16() {
__ align(CodeEntryAlignment);
StubGenStubId stub_id = StubGenStubId::f2hf_id;
StubCodeMark mark(this, stub_id);
address entry = __ pc();
BLOCK_COMMENT("floatToFloat16:");
Register dst = x10;
FloatRegister src = f10, ftmp = f11;
Label NaN_SLOW;
assert(VM_Version::supports_float16_float_conversion(), "must");
// On riscv, NaN needs a special process as fcvt does not work in that case.
// check whether it's a NaN.
// replace fclass with feq as performance optimization.
__ feq_s(t0, src, src);
// jump to stub processing NaN cases.
__ beqz(t0, NaN_SLOW);
// non-NaN cases, just use built-in instructions.
__ fcvt_h_s(ftmp, src);
__ fmv_x_h(dst, ftmp);
__ ret();
__ bind(NaN_SLOW);
__ fmv_x_w(dst, src);
// preserve the payloads of non-canonical NaNs.
__ srai(dst, dst, 13);
// preserve the sign bit.
__ srai(t1, dst, 13);
__ slli(t1, t1, 10);
__ mv(t0, 0x3ff);
__ orr(t1, t1, t0);
// get the result by merging sign bit and payloads of preserved non-canonical NaNs.
__ andr(dst, dst, t1);
__ ret();
return entry;
}
#endif // COMPILER2_OR_JVMCI
#ifdef COMPILER2
@ -6525,6 +6623,12 @@ static const int64_t right_3_bits = right_n_bits(3);
StubRoutines::_crc_table_adr = (address)StubRoutines::riscv::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
}
if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_float16ToFloat) &&
vmIntrinsics::is_intrinsic_available(vmIntrinsics::_floatToFloat16)) {
StubRoutines::_hf2f = generate_float16ToFloat();
StubRoutines::_f2hf = generate_floatToFloat16();
}
}
void generate_continuation_stubs() {

View File

@ -24,6 +24,7 @@
*
*/
#include "classfile/vmIntrinsics.hpp"
#include "runtime/java.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/vm_version.hpp"
@ -464,3 +465,18 @@ void VM_Version::initialize_cpu_information(void) {
snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", features_string());
_initialized = true;
}
bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
switch (id) {
case vmIntrinsics::_floatToFloat16:
case vmIntrinsics::_float16ToFloat:
if (!supports_float16_float_conversion()) {
return false;
}
break;
default:
break;
}
return true;
}

View File

@ -297,6 +297,13 @@ class VM_Version : public Abstract_VM_Version {
// RISCV64 supports fast class initialization checks
static bool supports_fast_class_init_checks() { return true; }
static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); }
static bool supports_float16_float_conversion() {
return UseZfh || UseZfhmin;
}
// Check intrinsic support
static bool is_intrinsic_supported(vmIntrinsicID id);
};
#endif // CPU_RISCV_VM_VERSION_RISCV_HPP

View File

@ -54,7 +54,7 @@ public class ConvF2HFIdealizationTests {
@Test
@IR(counts = {IRNode.REINTERPRET_S2HF, ">=1", IRNode.REINTERPRET_HF2S, ">=1", IRNode.ADD_HF, ">=1" },
failOn = {IRNode.ADD_F, IRNode.CONV_HF2F, IRNode.CONV_F2HF},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
// Test pattern - ConvHF2F -> AddF -> ConvF2HF is optimized to ReinterpretS2HF -> AddHF -> ReinterpretHF2S
public void test1() {
for (int i = 0; i < SIZE; i++) {

View File

@ -54,7 +54,7 @@ public class MulHFNodeIdealizationTests {
@Test
@IR(counts = {IRNode.ADD_HF, "1"},
applyIfCPUFeature = {"avx512_fp16", "true"},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"},
failOn = {IRNode.MUL_HF})
public void test1() {
dst = multiply(src, valueOf(2.0f));

View File

@ -101,7 +101,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {"convHF2SAndHF2F", " >0 "}, phase = {CompilePhase.FINAL_CODE},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testEliminateIntermediateHF2S() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -114,7 +114,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testAdd1() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -125,7 +125,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(failOn = {IRNode.ADD_HF, IRNode.REINTERPRET_S2HF, IRNode.REINTERPRET_HF2S},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testAdd2() {
Float16 hf0 = shortBitsToFloat16((short)0);
Float16 hf1 = shortBitsToFloat16((short)15360);
@ -137,7 +137,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.SUB_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testSub() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -148,7 +148,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testMul() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -159,7 +159,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.DIV_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testDiv() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -170,7 +170,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testDivByOne() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -181,7 +181,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.MAX_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testMax() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -192,7 +192,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.MIN_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testMin() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -203,7 +203,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.SQRT_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testSqrt() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -214,7 +214,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.FMA_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testFma() {
Float16 res = shortBitsToFloat16((short)0);
for (int i = 0; i < count; i++) {
@ -226,7 +226,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testDivByPOT() {
Float16 res = valueOf(0.0f);
for (int i = 0; i < 50; i++) {
@ -243,7 +243,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testMulByTWO() {
Float16 res = valueOf(0.0f);
Float16 multiplier = valueOf(2.0f);
@ -280,7 +280,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.ADD_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testAddConstantFolding() {
// If either value is NaN, then the result is NaN.
assertResult(add(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding");
@ -323,7 +323,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.SUB_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testSubConstantFolding() {
// If either value is NaN, then the result is NaN.
assertResult(subtract(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding");
@ -356,7 +356,7 @@ public class TestFloat16ScalarOperations {
@Test
@Warmup(value = 10000)
@IR(counts = {IRNode.MAX_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testMaxConstantFolding() {
// If either value is NaN, then the result is NaN.
assertResult(max(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMaxConstantFolding");
@ -374,7 +374,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.MIN_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testMinConstantFolding() {
// If either value is NaN, then the result is NaN.
assertResult(min(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMinConstantFolding");
@ -391,7 +391,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testDivConstantFolding() {
// If either value is NaN, then the result is NaN.
assertResult(divide(Float16.NaN, POSITIVE_ZERO).floatValue(), Float.NaN, "testDivConstantFolding");
@ -431,7 +431,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testMulConstantFolding() {
// If any operand is NaN, the result is NaN.
assertResult(multiply(Float16.NaN, valueOf(4.0f)).floatValue(), Float.NaN, "testMulConstantFolding");
@ -454,7 +454,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.SQRT_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testSqrtConstantFolding() {
// If the argument is NaN or less than zero, then the result is NaN.
assertResult(sqrt(Float16.NaN).floatValue(), Float.NaN, "testSqrtConstantFolding");
@ -473,7 +473,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testFMAConstantFolding() {
// If any argument is NaN, the result is NaN.
assertResult(fma(Float16.NaN, valueOf(2.0f), valueOf(3.0f)).floatValue(), Float.NaN, "testFMAConstantFolding");
@ -508,7 +508,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testRounding1() {
dst[0] = float16ToRawShortBits(add(RANDOM1, RANDOM2));
dst[1] = float16ToRawShortBits(subtract(RANDOM2, RANDOM3));
@ -547,7 +547,7 @@ public class TestFloat16ScalarOperations {
@Test
@IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.SUB_HF, " >0 ", IRNode.MUL_HF, " >0 ",
IRNode.DIV_HF, " >0 ", IRNode.SQRT_HF, " >0 ", IRNode.FMA_HF, " >0 "},
applyIfCPUFeature = {"avx512_fp16", "true"})
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
public void testRounding2() {
dst[0] = float16ToRawShortBits(add(RANDOM1_VAR, RANDOM2_VAR));
dst[1] = float16ToRawShortBits(subtract(RANDOM2_VAR, RANDOM3_VAR));

View File

@ -110,6 +110,7 @@ public class IREncodingPrinter {
"sve",
// Riscv64
"rvv",
"zfh",
"zvbb",
"zvfh"
));

View File

@ -45,8 +45,6 @@ public class TestFloat16VectorConvChain {
counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
@IR(applyIfCPUFeatureAnd = {"avx512_fp16", "false", "f16c", "true"},
counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
@IR(applyIfCPUFeature = {"zvfh", "true"},
counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
public static void test(short [] res, short [] src1, short [] src2) {
for (int i = 0; i < res.length; i++) {
res[i] = (short)Float.float16ToFloat(Float.floatToFloat16(Float.float16ToFloat(src1[i]) + Float.float16ToFloat(src2[i])));