diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 3a638357f0b..a08550b7137 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -1316,6 +1316,7 @@ enum operand_size { int8, int16, int32, uint32, int64 }; public: + void flh(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b001>(Rd, Rs, offset); } void flw(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b010>(Rd, Rs, offset); } void _fld(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b011>(Rd, Rs, offset); } @@ -1397,6 +1398,46 @@ enum operand_size { int8, int16, int32, uint32, int64 }; fp_base(Rd, Rs1, 0b00000, 0b000); } + void fadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { + assert_cond(UseZfh); + fp_base(Rd, Rs1, Rs2, rm); + } + + void fsub_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { + assert_cond(UseZfh); + fp_base(Rd, Rs1, Rs2, rm); + } + + void fmul_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { + assert_cond(UseZfh); + fp_base(Rd, Rs1, Rs2, rm); + } + + void fdiv_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { + assert_cond(UseZfh); + fp_base(Rd, Rs1, Rs2, rm); + } + + void fsqrt_h(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) { + assert_cond(UseZfh); + fp_base(Rd, Rs1, 0b00000, rm); + } + + void fmin_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) { + assert_cond(UseZfh); + fp_base(Rd, Rs1, Rs2, 0b000); + } + + void fmax_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) { + assert_cond(UseZfh); + fp_base(Rd, Rs1, Rs2, 0b001); + } + + void fmadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) { + assert_cond(UseZfh); + fp_fm(Rd, Rs1, Rs2, Rs3, rm); + } + // -------------- ZFA Instruction Definitions -------------- // Zfa Extension for Additional Floating-Point Instructions void _fli_s(FloatRegister Rd, uint8_t Rs1) { diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index d203ec3dfdb..4eabe966672 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -2159,27 +2159,68 @@ void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Regis // Set dst to NaN if any NaN input. void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2, - bool is_double, bool is_min) { + FLOAT_TYPE ft, bool is_min) { + assert_cond((ft != FLOAT_TYPE::half_precision) || UseZfh); + Label Done, Compare; - is_double ? fclass_d(t0, src1) - : fclass_s(t0, src1); - is_double ? fclass_d(t1, src2) - : fclass_s(t1, src2); - orr(t0, t0, t1); - andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN - beqz(t0, Compare); - is_double ? fadd_d(dst, src1, src2) - : fadd_s(dst, src1, src2); - j(Done); + switch (ft) { + case FLOAT_TYPE::half_precision: + fclass_h(t0, src1); + fclass_h(t1, src2); - bind(Compare); - if (is_double) { - is_min ? fmin_d(dst, src1, src2) - : fmax_d(dst, src1, src2); - } else { - is_min ? fmin_s(dst, src1, src2) - : fmax_s(dst, src1, src2); + orr(t0, t0, t1); + andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN + beqz(t0, Compare); + + fadd_h(dst, src1, src2); + j(Done); + + bind(Compare); + if (is_min) { + fmin_h(dst, src1, src2); + } else { + fmax_h(dst, src1, src2); + } + break; + case FLOAT_TYPE::single_precision: + fclass_s(t0, src1); + fclass_s(t1, src2); + + orr(t0, t0, t1); + andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN + beqz(t0, Compare); + + fadd_s(dst, src1, src2); + j(Done); + + bind(Compare); + if (is_min) { + fmin_s(dst, src1, src2); + } else { + fmax_s(dst, src1, src2); + } + break; + case FLOAT_TYPE::double_precision: + fclass_d(t0, src1); + fclass_d(t1, src2); + + orr(t0, t0, t1); + andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN + beqz(t0, Compare); + + fadd_d(dst, src1, src2); + j(Done); + + bind(Compare); + if (is_min) { + fmin_d(dst, src1, src2); + } else { + fmax_d(dst, src1, src2); + } + break; + default: + ShouldNotReachHere(); } bind(Done); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 72f3c1460f3..a650174d90f 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -172,9 +172,15 @@ } } + enum class FLOAT_TYPE { + half_precision, + single_precision, + double_precision + }; + void minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2, - bool is_double, bool is_min); + FLOAT_TYPE ft, bool is_min); void round_double_mode(FloatRegister dst, FloatRegister src, int round_mode, Register tmp1, Register tmp2, Register tmp3); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 41ccd0c4b2f..f5dd4025185 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1080,6 +1080,7 @@ public: } \ } + INSN(flh); INSN(flw); INSN(fld); diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 6ee1e284f2c..cecca0a5402 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1916,7 +1916,19 @@ bool Matcher::match_rule_supported(int opcode) { case Op_ConvHF2F: case Op_ConvF2HF: + return VM_Version::supports_float16_float_conversion(); + case Op_ReinterpretS2HF: + case Op_ReinterpretHF2S: return UseZfh || UseZfhmin; + case Op_AddHF: + case Op_DivHF: + case Op_FmaHF: + case Op_MaxHF: + case Op_MinHF: + case Op_MulHF: + case Op_SubHF: + case Op_SqrtHF: + return UseZfh; } return true; // Per default match rules are supported. @@ -3056,6 +3068,27 @@ operand immF0() interface(CONST_INTER); %} +// Half Float Immediate +operand immH() +%{ + match(ConH); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Half Float Immediate: +0.0f. +operand immH0() +%{ + predicate(jint_cast(n->geth()) == 0); + match(ConH); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immIOffset() %{ predicate(Assembler::is_simm12(n->get_int())); @@ -4907,6 +4940,39 @@ instruct loadConNKlass(iRegNNoSp dst, immNKlass con) ins_pipe(ialu_imm); %} +// Load Half Float Constant +instruct loadConH(fRegF dst, immH con) %{ + match(Set dst con); + + ins_cost(LOAD_COST); + format %{ + "flh $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConH" + %} + + ins_encode %{ + assert(UseZfh || UseZfhmin, "must"); + __ flh(as_FloatRegister($dst$$reg), $constantaddress($con)); + // TODO: add zfa instructions for half float, and optimize here. + %} + + ins_pipe(fp_load_constant_s); +%} + +instruct loadConH0(fRegF dst, immH0 con) %{ + match(Set dst con); + + ins_cost(XFER_COST); + + format %{ "fmv.h.x $dst, zr\t# float, #@loadConH0" %} + + ins_encode %{ + assert(UseZfh || UseZfhmin, "must"); + __ fmv_h_x(as_FloatRegister($dst$$reg), zr); + %} + + ins_pipe(fp_load_constant_s); +%} + // Load Float Constant instruct loadConF(fRegF dst, immF con) %{ match(Set dst con); @@ -7291,7 +7357,7 @@ instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{ ins_encode %{ __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), - false /* is_double */, false /* is_min */); + __ FLOAT_TYPE::single_precision, false /* is_min */); %} ins_pipe(pipe_class_default); @@ -7307,7 +7373,7 @@ instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{ ins_encode %{ __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), - false /* is_double */, true /* is_min */); + __ FLOAT_TYPE::single_precision, true /* is_min */); %} ins_pipe(pipe_class_default); @@ -7323,7 +7389,7 @@ instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{ ins_encode %{ __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), - true /* is_double */, false /* is_min */); + __ FLOAT_TYPE::double_precision, false /* is_min */); %} ins_pipe(pipe_class_default); @@ -7339,7 +7405,7 @@ instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{ ins_encode %{ __ minmax_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), - true /* is_double */, true /* is_min */); + __ FLOAT_TYPE::double_precision, true /* is_min */); %} ins_pipe(pipe_class_default); @@ -8009,6 +8075,17 @@ instruct checkCastPP(iRegPNoSp dst) ins_pipe(pipe_class_empty); %} +instruct castHH(fRegF dst) +%{ + match(Set dst (CastHH dst)); + + size(0); + format %{ "# castHH of $dst" %} + ins_encode(/* empty encoding */); + ins_cost(0); + ins_pipe(pipe_class_empty); +%} + instruct castFF(fRegF dst) %{ match(Set dst (CastFF dst)); @@ -8171,6 +8248,108 @@ instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF ftmp, iRegINoSp xtmp) ins_pipe(pipe_slow); %} +// half precision operations + +instruct reinterpretS2HF(fRegF dst, iRegI src) +%{ + match(Set dst (ReinterpretS2HF src)); + format %{ "fmv.h.x $dst, $src" %} + ins_encode %{ + __ fmv_h_x($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(fp_i2f); +%} + +instruct convF2HFAndS2HF(fRegF dst, fRegF src) +%{ + match(Set dst (ReinterpretS2HF (ConvF2HF src))); + format %{ "convF2HFAndS2HF $dst, $src" %} + ins_encode %{ + __ fcvt_h_s($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fp_uop_s); +%} + +instruct reinterpretHF2S(iRegINoSp dst, fRegF src) +%{ + match(Set dst (ReinterpretHF2S src)); + format %{ "fmv.x.h $dst, $src" %} + ins_encode %{ + __ fmv_x_h($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(fp_f2i); +%} + +instruct convHF2SAndHF2F(fRegF dst, fRegF src) +%{ + match(Set dst (ConvHF2F (ReinterpretHF2S src))); + format %{ "convHF2SAndHF2F $dst, $src" %} + ins_encode %{ + __ fcvt_s_h($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fp_uop_s); +%} + +instruct sqrt_HF_reg(fRegF dst, fRegF src) +%{ + match(Set dst (SqrtHF src)); + format %{ "fsqrt.h $dst, $src" %} + ins_encode %{ + __ fsqrt_h($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fp_sqrt_s); +%} + +instruct binOps_HF_reg(fRegF dst, fRegF src1, fRegF src2) +%{ + match(Set dst (AddHF src1 src2)); + match(Set dst (SubHF src1 src2)); + match(Set dst (MulHF src1 src2)); + match(Set dst (DivHF src1 src2)); + format %{ "binop_hf $dst, $src1, $src2" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + switch(opcode) { + case Op_AddHF: __ fadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break; + case Op_SubHF: __ fsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break; + case Op_MulHF: __ fmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break; + case Op_DivHF: __ fdiv_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break; + default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break; + } + %} + ins_pipe(fp_dop_reg_reg_s); +%} + +instruct min_max_HF_reg(fRegF dst, fRegF src1, fRegF src2) +%{ + match(Set dst (MinHF src1 src2)); + match(Set dst (MaxHF src1 src2)); + format %{ "min_max_hf $dst, $src1, $src2" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + switch(opcode) { + case Op_MinHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + __ FLOAT_TYPE::half_precision, true); + break; + case Op_MaxHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, + __ FLOAT_TYPE::half_precision, false); + break; + default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break; + } + %} + ins_pipe(pipe_class_default); +%} + +instruct fma_HF_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) +%{ + match(Set dst (FmaHF src3 (Binary src1 src2))); + format %{ "fmadd.h $dst, $src1, $src2, $src3\t# $dst = $src1 * $src2 + $src3 fma packedH" %} + ins_encode %{ + __ fmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister); + %} + ins_pipe(pipe_class_default); +%} + // float <-> int instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index e9130590ae0..235dd8f13d8 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -6156,6 +6156,104 @@ class StubGenerator: public StubCodeGenerator { return start; } + // x10 = input (float16) + // f10 = result (float) + // t1 = temporary register + address generate_float16ToFloat() { + __ align(CodeEntryAlignment); + StubGenStubId stub_id = StubGenStubId::hf2f_id; + StubCodeMark mark(this, stub_id); + address entry = __ pc(); + BLOCK_COMMENT("float16ToFloat:"); + + FloatRegister dst = f10; + Register src = x10; + Label NaN_SLOW; + + assert(VM_Version::supports_float16_float_conversion(), "must"); + + // On riscv, NaN needs a special process as fcvt does not work in that case. + // On riscv, Inf does not need a special process as fcvt can handle it correctly. + // but we consider to get the slow path to process NaN and Inf at the same time, + // as both of them are rare cases, and if we try to get the slow path to handle + // only NaN case it would sacrifise the performance for normal cases, + // i.e. non-NaN and non-Inf cases. + + // check whether it's a NaN or +/- Inf. + __ mv(t0, 0x7c00); + __ andr(t1, src, t0); + // jump to stub processing NaN and Inf cases. + __ beq(t0, t1, NaN_SLOW); + + // non-NaN or non-Inf cases, just use built-in instructions. + __ fmv_h_x(dst, src); + __ fcvt_s_h(dst, dst); + __ ret(); + + __ bind(NaN_SLOW); + // following instructions mainly focus on NaN, as riscv does not handle + // NaN well with fcvt, but the code also works for Inf at the same time. + + // construct a NaN in 32 bits from the NaN in 16 bits, + // we need the payloads of non-canonical NaNs to be preserved. + __ mv(t1, 0x7f800000); + // sign-bit was already set via sign-extension if necessary. + __ slli(t0, src, 13); + __ orr(t1, t0, t1); + __ fmv_w_x(dst, t1); + + __ ret(); + return entry; + } + + // f10 = input (float) + // x10 = result (float16) + // f11 = temporary float register + // t1 = temporary register + address generate_floatToFloat16() { + __ align(CodeEntryAlignment); + StubGenStubId stub_id = StubGenStubId::f2hf_id; + StubCodeMark mark(this, stub_id); + address entry = __ pc(); + BLOCK_COMMENT("floatToFloat16:"); + + Register dst = x10; + FloatRegister src = f10, ftmp = f11; + Label NaN_SLOW; + + assert(VM_Version::supports_float16_float_conversion(), "must"); + + // On riscv, NaN needs a special process as fcvt does not work in that case. + + // check whether it's a NaN. + // replace fclass with feq as performance optimization. + __ feq_s(t0, src, src); + // jump to stub processing NaN cases. + __ beqz(t0, NaN_SLOW); + + // non-NaN cases, just use built-in instructions. + __ fcvt_h_s(ftmp, src); + __ fmv_x_h(dst, ftmp); + __ ret(); + + __ bind(NaN_SLOW); + __ fmv_x_w(dst, src); + + // preserve the payloads of non-canonical NaNs. + __ srai(dst, dst, 13); + // preserve the sign bit. + __ srai(t1, dst, 13); + __ slli(t1, t1, 10); + __ mv(t0, 0x3ff); + __ orr(t1, t1, t0); + + // get the result by merging sign bit and payloads of preserved non-canonical NaNs. + __ andr(dst, dst, t1); + + __ ret(); + return entry; + } + #endif // COMPILER2_OR_JVMCI #ifdef COMPILER2 @@ -6525,6 +6623,12 @@ static const int64_t right_3_bits = right_n_bits(3); StubRoutines::_crc_table_adr = (address)StubRoutines::riscv::_crc_table; StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); } + + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_float16ToFloat) && + vmIntrinsics::is_intrinsic_available(vmIntrinsics::_floatToFloat16)) { + StubRoutines::_hf2f = generate_float16ToFloat(); + StubRoutines::_f2hf = generate_floatToFloat16(); + } } void generate_continuation_stubs() { diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index 389af3a06e3..7b17d30447b 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -24,6 +24,7 @@ * */ +#include "classfile/vmIntrinsics.hpp" #include "runtime/java.hpp" #include "runtime/os.inline.hpp" #include "runtime/vm_version.hpp" @@ -464,3 +465,18 @@ void VM_Version::initialize_cpu_information(void) { snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", features_string()); _initialized = true; } + +bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { + assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); + switch (id) { + case vmIntrinsics::_floatToFloat16: + case vmIntrinsics::_float16ToFloat: + if (!supports_float16_float_conversion()) { + return false; + } + break; + default: + break; + } + return true; +} diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index 68665d12378..dc9d143cea9 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -297,6 +297,13 @@ class VM_Version : public Abstract_VM_Version { // RISCV64 supports fast class initialization checks static bool supports_fast_class_init_checks() { return true; } static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); } + + static bool supports_float16_float_conversion() { + return UseZfh || UseZfhmin; + } + + // Check intrinsic support + static bool is_intrinsic_supported(vmIntrinsicID id); }; #endif // CPU_RISCV_VM_VERSION_RISCV_HPP diff --git a/test/hotspot/jtreg/compiler/c2/irTests/ConvF2HFIdealizationTests.java b/test/hotspot/jtreg/compiler/c2/irTests/ConvF2HFIdealizationTests.java index f24fca1d87e..d7b927778df 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/ConvF2HFIdealizationTests.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/ConvF2HFIdealizationTests.java @@ -54,7 +54,7 @@ public class ConvF2HFIdealizationTests { @Test @IR(counts = {IRNode.REINTERPRET_S2HF, ">=1", IRNode.REINTERPRET_HF2S, ">=1", IRNode.ADD_HF, ">=1" }, failOn = {IRNode.ADD_F, IRNode.CONV_HF2F, IRNode.CONV_F2HF}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) // Test pattern - ConvHF2F -> AddF -> ConvF2HF is optimized to ReinterpretS2HF -> AddHF -> ReinterpretHF2S public void test1() { for (int i = 0; i < SIZE; i++) { diff --git a/test/hotspot/jtreg/compiler/c2/irTests/MulHFNodeIdealizationTests.java b/test/hotspot/jtreg/compiler/c2/irTests/MulHFNodeIdealizationTests.java index dd98c80d629..eab220dc196 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/MulHFNodeIdealizationTests.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/MulHFNodeIdealizationTests.java @@ -54,7 +54,7 @@ public class MulHFNodeIdealizationTests { @Test @IR(counts = {IRNode.ADD_HF, "1"}, - applyIfCPUFeature = {"avx512_fp16", "true"}, + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}, failOn = {IRNode.MUL_HF}) public void test1() { dst = multiply(src, valueOf(2.0f)); diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java index 17a3e4b4c56..014c660dfb2 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java @@ -101,7 +101,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {"convHF2SAndHF2F", " >0 "}, phase = {CompilePhase.FINAL_CODE}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testEliminateIntermediateHF2S() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -114,7 +114,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testAdd1() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -125,7 +125,7 @@ public class TestFloat16ScalarOperations { @Test @IR(failOn = {IRNode.ADD_HF, IRNode.REINTERPRET_S2HF, IRNode.REINTERPRET_HF2S}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testAdd2() { Float16 hf0 = shortBitsToFloat16((short)0); Float16 hf1 = shortBitsToFloat16((short)15360); @@ -137,7 +137,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.SUB_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testSub() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -148,7 +148,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testMul() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -159,7 +159,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.DIV_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testDiv() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -170,7 +170,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testDivByOne() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -181,7 +181,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.MAX_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testMax() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -192,7 +192,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.MIN_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testMin() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -203,7 +203,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.SQRT_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testSqrt() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -214,7 +214,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.FMA_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testFma() { Float16 res = shortBitsToFloat16((short)0); for (int i = 0; i < count; i++) { @@ -226,7 +226,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testDivByPOT() { Float16 res = valueOf(0.0f); for (int i = 0; i < 50; i++) { @@ -243,7 +243,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testMulByTWO() { Float16 res = valueOf(0.0f); Float16 multiplier = valueOf(2.0f); @@ -280,7 +280,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.ADD_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testAddConstantFolding() { // If either value is NaN, then the result is NaN. assertResult(add(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding"); @@ -323,7 +323,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.SUB_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testSubConstantFolding() { // If either value is NaN, then the result is NaN. assertResult(subtract(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding"); @@ -356,7 +356,7 @@ public class TestFloat16ScalarOperations { @Test @Warmup(value = 10000) @IR(counts = {IRNode.MAX_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testMaxConstantFolding() { // If either value is NaN, then the result is NaN. assertResult(max(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMaxConstantFolding"); @@ -374,7 +374,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.MIN_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testMinConstantFolding() { // If either value is NaN, then the result is NaN. assertResult(min(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMinConstantFolding"); @@ -391,7 +391,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testDivConstantFolding() { // If either value is NaN, then the result is NaN. assertResult(divide(Float16.NaN, POSITIVE_ZERO).floatValue(), Float.NaN, "testDivConstantFolding"); @@ -431,7 +431,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testMulConstantFolding() { // If any operand is NaN, the result is NaN. assertResult(multiply(Float16.NaN, valueOf(4.0f)).floatValue(), Float.NaN, "testMulConstantFolding"); @@ -454,7 +454,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.SQRT_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testSqrtConstantFolding() { // If the argument is NaN or less than zero, then the result is NaN. assertResult(sqrt(Float16.NaN).floatValue(), Float.NaN, "testSqrtConstantFolding"); @@ -473,7 +473,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testFMAConstantFolding() { // If any argument is NaN, the result is NaN. assertResult(fma(Float16.NaN, valueOf(2.0f), valueOf(3.0f)).floatValue(), Float.NaN, "testFMAConstantFolding"); @@ -508,7 +508,7 @@ public class TestFloat16ScalarOperations { @Test @IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testRounding1() { dst[0] = float16ToRawShortBits(add(RANDOM1, RANDOM2)); dst[1] = float16ToRawShortBits(subtract(RANDOM2, RANDOM3)); @@ -547,7 +547,7 @@ public class TestFloat16ScalarOperations { @Test @IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.SUB_HF, " >0 ", IRNode.MUL_HF, " >0 ", IRNode.DIV_HF, " >0 ", IRNode.SQRT_HF, " >0 ", IRNode.FMA_HF, " >0 "}, - applyIfCPUFeature = {"avx512_fp16", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) public void testRounding2() { dst[0] = float16ToRawShortBits(add(RANDOM1_VAR, RANDOM2_VAR)); dst[1] = float16ToRawShortBits(subtract(RANDOM2_VAR, RANDOM3_VAR)); diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java index 0b7dcbae9d9..023bd7b815d 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java @@ -110,6 +110,7 @@ public class IREncodingPrinter { "sve", // Riscv64 "rvv", + "zfh", "zvbb", "zvfh" )); diff --git a/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorConvChain.java b/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorConvChain.java index 4cf656620bc..2cb04c3889b 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorConvChain.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorConvChain.java @@ -45,8 +45,6 @@ public class TestFloat16VectorConvChain { counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"}) @IR(applyIfCPUFeatureAnd = {"avx512_fp16", "false", "f16c", "true"}, counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"}) - @IR(applyIfCPUFeature = {"zvfh", "true"}, - counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"}) public static void test(short [] res, short [] src1, short [] src2) { for (int i = 0; i < res.length; i++) { res[i] = (short)Float.float16ToFloat(Float.floatToFloat16(Float.float16ToFloat(src1[i]) + Float.float16ToFloat(src2[i])));