8345298: RISC-V: Add riscv backend for Float16 operations - scalar

Reviewed-by: rehn, fyang
2026-06-27 12:52:31 +00:00 · 2025-03-13 08:16:53 +00:00 · 2025-03-13 08:16:53 +00:00 · a33b1f7f64
commit a33b1f7f64
parent 6241d09657
13 changed files with 444 additions and 50 deletions
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@ -1316,6 +1316,7 @@ enum operand_size { int8, int16, int32, uint32, int64 };

 public:

+  void  flh(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b001>(Rd, Rs, offset); }
  void  flw(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b010>(Rd, Rs, offset); }
  void _fld(FloatRegister Rd, Register Rs, const int32_t offset) { fp_load<0b011>(Rd, Rs, offset); }

@ -1397,6 +1398,46 @@ enum operand_size { int8, int16, int32, uint32, int64 };
    fp_base<H_16_hp, 0b11100>(Rd, Rs1, 0b00000, 0b000);
  }

+  void fadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
+    assert_cond(UseZfh);
+    fp_base<H_16_hp, 0b00000>(Rd, Rs1, Rs2, rm);
+  }
+
+  void fsub_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
+    assert_cond(UseZfh);
+    fp_base<H_16_hp, 0b00001>(Rd, Rs1, Rs2, rm);
+  }
+
+  void fmul_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
+    assert_cond(UseZfh);
+    fp_base<H_16_hp, 0b00010>(Rd, Rs1, Rs2, rm);
+  }
+
+  void fdiv_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {
+    assert_cond(UseZfh);
+    fp_base<H_16_hp, 0b00011>(Rd, Rs1, Rs2, rm);
+  }
+
+  void fsqrt_h(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {
+    assert_cond(UseZfh);
+    fp_base<H_16_hp, 0b01011>(Rd, Rs1, 0b00000, rm);
+  }
+
+  void fmin_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
+    assert_cond(UseZfh);
+    fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b000);
+  }
+
+  void fmax_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {
+    assert_cond(UseZfh);
+    fp_base<H_16_hp, 0b00101>(Rd, Rs1, Rs2, 0b001);
+  }
+
+  void fmadd_h(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne)  {
+    assert_cond(UseZfh);
+    fp_fm<H_16_hp, 0b1000011>(Rd, Rs1, Rs2, Rs3, rm);
+  }
+
 // --------------  ZFA Instruction Definitions  --------------
 // Zfa Extension for Additional Floating-Point Instructions
  void _fli_s(FloatRegister Rd, uint8_t Rs1) {
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@ -2159,27 +2159,68 @@ void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Regis

 // Set dst to NaN if any NaN input.
 void C2_MacroAssembler::minmax_fp(FloatRegister dst, FloatRegister src1, FloatRegister src2,
-                                  bool is_double, bool is_min) {
+                                  FLOAT_TYPE ft, bool is_min) {
+  assert_cond((ft != FLOAT_TYPE::half_precision) || UseZfh);
+
  Label Done, Compare;

-  is_double ? fclass_d(t0, src1)
-            : fclass_s(t0, src1);
-  is_double ? fclass_d(t1, src2)
-            : fclass_s(t1, src2);
-  orr(t0, t0, t1);
-  andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
-  beqz(t0, Compare);
-  is_double ? fadd_d(dst, src1, src2)
-            : fadd_s(dst, src1, src2);
-  j(Done);
+  switch (ft) {
+    case FLOAT_TYPE::half_precision:
+      fclass_h(t0, src1);
+      fclass_h(t1, src2);

-  bind(Compare);
-  if (is_double) {
-    is_min ? fmin_d(dst, src1, src2)
-           : fmax_d(dst, src1, src2);
-  } else {
-    is_min ? fmin_s(dst, src1, src2)
-           : fmax_s(dst, src1, src2);
+      orr(t0, t0, t1);
+      andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
+      beqz(t0, Compare);
+
+      fadd_h(dst, src1, src2);
+      j(Done);
+
+      bind(Compare);
+      if (is_min) {
+        fmin_h(dst, src1, src2);
+      } else {
+        fmax_h(dst, src1, src2);
+      }
+      break;
+    case FLOAT_TYPE::single_precision:
+      fclass_s(t0, src1);
+      fclass_s(t1, src2);
+
+      orr(t0, t0, t1);
+      andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
+      beqz(t0, Compare);
+
+      fadd_s(dst, src1, src2);
+      j(Done);
+
+      bind(Compare);
+      if (is_min) {
+        fmin_s(dst, src1, src2);
+      } else {
+        fmax_s(dst, src1, src2);
+      }
+      break;
+    case FLOAT_TYPE::double_precision:
+      fclass_d(t0, src1);
+      fclass_d(t1, src2);
+
+      orr(t0, t0, t1);
+      andi(t0, t0, FClassBits::nan); // if src1 or src2 is quiet or signaling NaN then return NaN
+      beqz(t0, Compare);
+
+      fadd_d(dst, src1, src2);
+      j(Done);
+
+      bind(Compare);
+      if (is_min) {
+        fmin_d(dst, src1, src2);
+      } else {
+        fmax_d(dst, src1, src2);
+      }
+      break;
+    default:
+      ShouldNotReachHere();
  }

  bind(Done);
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@ -172,9 +172,15 @@
    }
  }

+  enum class FLOAT_TYPE {
+    half_precision,
+    single_precision,
+    double_precision
+  };
+
  void minmax_fp(FloatRegister dst,
                 FloatRegister src1, FloatRegister src2,
-                 bool is_double, bool is_min);
+                 FLOAT_TYPE ft, bool is_min);

  void round_double_mode(FloatRegister dst, FloatRegister src, int round_mode,
                         Register tmp1, Register tmp2, Register tmp3);
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@ -1080,6 +1080,7 @@ public:
    }                                                                                              \
  }

+  INSN(flh);
  INSN(flw);
  INSN(fld);

--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@ -1916,7 +1916,19 @@ bool Matcher::match_rule_supported(int opcode) {

    case Op_ConvHF2F:
    case Op_ConvF2HF:
+      return VM_Version::supports_float16_float_conversion();
+    case Op_ReinterpretS2HF:
+    case Op_ReinterpretHF2S:
      return UseZfh || UseZfhmin;
+    case Op_AddHF:
+    case Op_DivHF:
+    case Op_FmaHF:
+    case Op_MaxHF:
+    case Op_MinHF:
+    case Op_MulHF:
+    case Op_SubHF:
+    case Op_SqrtHF:
+      return UseZfh;
  }

  return true; // Per default match rules are supported.
@ -3056,6 +3068,27 @@ operand immF0()
  interface(CONST_INTER);
 %}

+// Half Float Immediate
+operand immH()
+%{
+  match(ConH);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Half Float Immediate: +0.0f.
+operand immH0()
+%{
+  predicate(jint_cast(n->geth()) == 0);
+  match(ConH);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 operand immIOffset()
 %{
  predicate(Assembler::is_simm12(n->get_int()));
@ -4907,6 +4940,39 @@ instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
  ins_pipe(ialu_imm);
 %}

+// Load Half Float Constant
+instruct loadConH(fRegF dst, immH con) %{
+  match(Set dst con);
+
+  ins_cost(LOAD_COST);
+  format %{
+    "flh $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConH"
+  %}
+
+  ins_encode %{
+    assert(UseZfh || UseZfhmin, "must");
+    __ flh(as_FloatRegister($dst$$reg), $constantaddress($con));
+    // TODO: add zfa instructions for half float, and optimize here.
+  %}
+
+  ins_pipe(fp_load_constant_s);
+%}
+
+instruct loadConH0(fRegF dst, immH0 con) %{
+  match(Set dst con);
+
+  ins_cost(XFER_COST);
+
+  format %{ "fmv.h.x $dst, zr\t# float, #@loadConH0" %}
+
+  ins_encode %{
+    assert(UseZfh || UseZfhmin, "must");
+    __ fmv_h_x(as_FloatRegister($dst$$reg), zr);
+  %}
+
+  ins_pipe(fp_load_constant_s);
+%}
+
 // Load Float Constant
 instruct loadConF(fRegF dst, immF con) %{
  match(Set dst con);
@ -7291,7 +7357,7 @@ instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
  ins_encode %{
    __ minmax_fp(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
-                 false /* is_double */, false /* is_min */);
+                 __ FLOAT_TYPE::single_precision, false /* is_min */);
  %}

  ins_pipe(pipe_class_default);
@ -7307,7 +7373,7 @@ instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2, rFlagsReg cr) %{
  ins_encode %{
    __ minmax_fp(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
-                 false /* is_double */, true /* is_min */);
+                 __ FLOAT_TYPE::single_precision, true /* is_min */);
  %}

  ins_pipe(pipe_class_default);
@ -7323,7 +7389,7 @@ instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
  ins_encode %{
    __ minmax_fp(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
-                 true /* is_double */, false /* is_min */);
+                 __ FLOAT_TYPE::double_precision, false /* is_min */);
  %}

  ins_pipe(pipe_class_default);
@ -7339,7 +7405,7 @@ instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2, rFlagsReg cr) %{
  ins_encode %{
    __ minmax_fp(as_FloatRegister($dst$$reg),
                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
-                 true /* is_double */, true /* is_min */);
+                 __ FLOAT_TYPE::double_precision, true /* is_min */);
  %}

  ins_pipe(pipe_class_default);
@ -8009,6 +8075,17 @@ instruct checkCastPP(iRegPNoSp dst)
  ins_pipe(pipe_class_empty);
 %}

+instruct castHH(fRegF dst)
+%{
+  match(Set dst (CastHH dst));
+
+  size(0);
+  format %{ "# castHH of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_cost(0);
+  ins_pipe(pipe_class_empty);
+%}
+
 instruct castFF(fRegF dst)
 %{
  match(Set dst (CastFF dst));
@ -8171,6 +8248,108 @@ instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF ftmp, iRegINoSp xtmp)
  ins_pipe(pipe_slow);
 %}

+// half precision operations
+
+instruct reinterpretS2HF(fRegF dst, iRegI src)
+%{
+  match(Set dst (ReinterpretS2HF src));
+  format %{ "fmv.h.x $dst, $src" %}
+  ins_encode %{
+    __ fmv_h_x($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe(fp_i2f);
+%}
+
+instruct convF2HFAndS2HF(fRegF dst, fRegF src)
+%{
+  match(Set dst (ReinterpretS2HF (ConvF2HF src)));
+  format %{ "convF2HFAndS2HF $dst, $src" %}
+  ins_encode %{
+    __ fcvt_h_s($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(fp_uop_s);
+%}
+
+instruct reinterpretHF2S(iRegINoSp dst, fRegF src)
+%{
+  match(Set dst (ReinterpretHF2S src));
+  format %{ "fmv.x.h $dst, $src" %}
+  ins_encode %{
+    __ fmv_x_h($dst$$Register, $src$$FloatRegister);
+  %}
+  ins_pipe(fp_f2i);
+%}
+
+instruct convHF2SAndHF2F(fRegF dst, fRegF src)
+%{
+  match(Set dst (ConvHF2F (ReinterpretHF2S src)));
+  format %{ "convHF2SAndHF2F $dst, $src" %}
+  ins_encode %{
+    __ fcvt_s_h($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(fp_uop_s);
+%}
+
+instruct sqrt_HF_reg(fRegF dst, fRegF src)
+%{
+  match(Set dst (SqrtHF src));
+  format %{ "fsqrt.h $dst, $src" %}
+  ins_encode %{
+    __ fsqrt_h($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(fp_sqrt_s);
+%}
+
+instruct binOps_HF_reg(fRegF dst, fRegF src1, fRegF src2)
+%{
+  match(Set dst (AddHF src1 src2));
+  match(Set dst (SubHF src1 src2));
+  match(Set dst (MulHF src1 src2));
+  match(Set dst (DivHF src1 src2));
+  format %{ "binop_hf $dst, $src1, $src2" %}
+  ins_encode %{
+    int opcode = this->ideal_Opcode();
+    switch(opcode) {
+      case Op_AddHF: __ fadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
+      case Op_SubHF: __ fsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
+      case Op_MulHF: __ fmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
+      case Op_DivHF: __ fdiv_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); break;
+      default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break;
+    }
+  %}
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
+instruct min_max_HF_reg(fRegF dst, fRegF src1, fRegF src2)
+%{
+  match(Set dst (MinHF src1 src2));
+  match(Set dst (MaxHF src1 src2));
+  format %{ "min_max_hf $dst, $src1, $src2" %}
+  ins_encode %{
+    int opcode = this->ideal_Opcode();
+    switch(opcode) {
+      case Op_MinHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+                                  __ FLOAT_TYPE::half_precision, true);
+                     break;
+      case Op_MaxHF: __ minmax_fp($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister,
+                                  __ FLOAT_TYPE::half_precision, false);
+                     break;
+      default: assert(false, "%s is not supported here", NodeClassNames[opcode]); break;
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct fma_HF_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3)
+%{
+  match(Set dst (FmaHF src3 (Binary src1 src2)));
+  format %{ "fmadd.h $dst, $src1, $src2, $src3\t# $dst = $src1 * $src2 + $src3 fma packedH" %}
+  ins_encode %{
+    __ fmadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister, $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // float <-> int

 instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@ -6156,6 +6156,104 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  // x10 = input (float16)
+  // f10 = result (float)
+  // t1  = temporary register
+  address generate_float16ToFloat() {
+    __ align(CodeEntryAlignment);
+    StubGenStubId stub_id = StubGenStubId::hf2f_id;
+    StubCodeMark mark(this, stub_id);
+    address entry = __ pc();
+    BLOCK_COMMENT("float16ToFloat:");
+
+    FloatRegister dst = f10;
+    Register src = x10;
+    Label NaN_SLOW;
+
+    assert(VM_Version::supports_float16_float_conversion(), "must");
+
+    // On riscv, NaN needs a special process as fcvt does not work in that case.
+    // On riscv, Inf does not need a special process as fcvt can handle it correctly.
+    // but we consider to get the slow path to process NaN and Inf at the same time,
+    // as both of them are rare cases, and if we try to get the slow path to handle
+    // only NaN case it would sacrifise the performance for normal cases,
+    // i.e. non-NaN and non-Inf cases.
+
+    // check whether it's a NaN or +/- Inf.
+    __ mv(t0, 0x7c00);
+    __ andr(t1, src, t0);
+    // jump to stub processing NaN and Inf cases.
+    __ beq(t0, t1, NaN_SLOW);
+
+    // non-NaN or non-Inf cases, just use built-in instructions.
+    __ fmv_h_x(dst, src);
+    __ fcvt_s_h(dst, dst);
+    __ ret();
+
+    __ bind(NaN_SLOW);
+    // following instructions mainly focus on NaN, as riscv does not handle
+    // NaN well with fcvt, but the code also works for Inf at the same time.
+
+    // construct a NaN in 32 bits from the NaN in 16 bits,
+    // we need the payloads of non-canonical NaNs to be preserved.
+    __ mv(t1, 0x7f800000);
+    // sign-bit was already set via sign-extension if necessary.
+    __ slli(t0, src, 13);
+    __ orr(t1, t0, t1);
+    __ fmv_w_x(dst, t1);
+
+    __ ret();
+    return entry;
+  }
+
+  // f10 = input (float)
+  // x10 = result (float16)
+  // f11 = temporary float register
+  // t1  = temporary register
+  address generate_floatToFloat16() {
+    __ align(CodeEntryAlignment);
+    StubGenStubId stub_id = StubGenStubId::f2hf_id;
+    StubCodeMark mark(this, stub_id);
+    address entry = __ pc();
+    BLOCK_COMMENT("floatToFloat16:");
+
+    Register dst = x10;
+    FloatRegister src = f10, ftmp = f11;
+    Label NaN_SLOW;
+
+    assert(VM_Version::supports_float16_float_conversion(), "must");
+
+    // On riscv, NaN needs a special process as fcvt does not work in that case.
+
+    // check whether it's a NaN.
+    // replace fclass with feq as performance optimization.
+    __ feq_s(t0, src, src);
+    // jump to stub processing NaN cases.
+    __ beqz(t0, NaN_SLOW);
+
+    // non-NaN cases, just use built-in instructions.
+    __ fcvt_h_s(ftmp, src);
+    __ fmv_x_h(dst, ftmp);
+    __ ret();
+
+    __ bind(NaN_SLOW);
+    __ fmv_x_w(dst, src);
+
+    // preserve the payloads of non-canonical NaNs.
+    __ srai(dst, dst, 13);
+    // preserve the sign bit.
+    __ srai(t1, dst, 13);
+    __ slli(t1, t1, 10);
+    __ mv(t0, 0x3ff);
+    __ orr(t1, t1, t0);
+
+    // get the result by merging sign bit and payloads of preserved non-canonical NaNs.
+    __ andr(dst, dst, t1);
+
+    __ ret();
+    return entry;
+  }
+
 #endif // COMPILER2_OR_JVMCI

 #ifdef COMPILER2
@ -6525,6 +6623,12 @@ static const int64_t right_3_bits = right_n_bits(3);
      StubRoutines::_crc_table_adr = (address)StubRoutines::riscv::_crc_table;
      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
    }
+
+    if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_float16ToFloat) &&
+        vmIntrinsics::is_intrinsic_available(vmIntrinsics::_floatToFloat16)) {
+      StubRoutines::_hf2f = generate_float16ToFloat();
+      StubRoutines::_f2hf = generate_floatToFloat16();
+    }
  }

  void generate_continuation_stubs() {
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@ -24,6 +24,7 @@
 *
 */

+#include "classfile/vmIntrinsics.hpp"
 #include "runtime/java.hpp"
 #include "runtime/os.inline.hpp"
 #include "runtime/vm_version.hpp"
@ -464,3 +465,18 @@ void VM_Version::initialize_cpu_information(void) {
  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", features_string());
  _initialized = true;
 }
+
+bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+  switch (id) {
+  case vmIntrinsics::_floatToFloat16:
+  case vmIntrinsics::_float16ToFloat:
+    if (!supports_float16_float_conversion()) {
+      return false;
+    }
+    break;
+  default:
+    break;
+  }
+  return true;
+}
--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@ -297,6 +297,13 @@ class VM_Version : public Abstract_VM_Version {
  // RISCV64 supports fast class initialization checks
  static bool supports_fast_class_init_checks() { return true; }
  static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); }
+
+  static bool supports_float16_float_conversion() {
+    return UseZfh || UseZfhmin;
+  }
+
+  // Check intrinsic support
+  static bool is_intrinsic_supported(vmIntrinsicID id);
 };

 #endif // CPU_RISCV_VM_VERSION_RISCV_HPP
--- a/test/hotspot/jtreg/compiler/c2/irTests/ConvF2HFIdealizationTests.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/ConvF2HFIdealizationTests.java
@ -54,7 +54,7 @@ public class ConvF2HFIdealizationTests {
    @Test
    @IR(counts = {IRNode.REINTERPRET_S2HF, ">=1", IRNode.REINTERPRET_HF2S, ">=1", IRNode.ADD_HF, ">=1" },
        failOn = {IRNode.ADD_F, IRNode.CONV_HF2F, IRNode.CONV_F2HF},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    // Test pattern - ConvHF2F -> AddF -> ConvF2HF is optimized to ReinterpretS2HF -> AddHF -> ReinterpretHF2S
    public void test1() {
        for (int i = 0; i < SIZE; i++) {
--- a/test/hotspot/jtreg/compiler/c2/irTests/MulHFNodeIdealizationTests.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/MulHFNodeIdealizationTests.java
@ -54,7 +54,7 @@ public class MulHFNodeIdealizationTests {

    @Test
    @IR(counts = {IRNode.ADD_HF, "1"},
-        applyIfCPUFeature = {"avx512_fp16", "true"},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"},
        failOn = {IRNode.MUL_HF})
    public void test1() {
        dst = multiply(src, valueOf(2.0f));
--- a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java
@ -101,7 +101,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {"convHF2SAndHF2F", " >0 "}, phase = {CompilePhase.FINAL_CODE},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testEliminateIntermediateHF2S() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -114,7 +114,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testAdd1() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -125,7 +125,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(failOn = {IRNode.ADD_HF, IRNode.REINTERPRET_S2HF, IRNode.REINTERPRET_HF2S},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testAdd2() {
        Float16 hf0 = shortBitsToFloat16((short)0);
        Float16 hf1 = shortBitsToFloat16((short)15360);
@ -137,7 +137,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.SUB_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testSub() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -148,7 +148,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testMul() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -159,7 +159,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.DIV_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testDiv() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -170,7 +170,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testDivByOne() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -181,7 +181,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.MAX_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testMax() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -192,7 +192,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.MIN_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testMin() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -203,7 +203,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.SQRT_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testSqrt() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -214,7 +214,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.FMA_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testFma() {
        Float16 res = shortBitsToFloat16((short)0);
        for (int i = 0; i < count; i++) {
@ -226,7 +226,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.MUL_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testDivByPOT() {
        Float16 res = valueOf(0.0f);
        for (int i = 0; i < 50; i++) {
@ -243,7 +243,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.ADD_HF, " >0 ", IRNode.REINTERPRET_S2HF, " >0 ", IRNode.REINTERPRET_HF2S, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testMulByTWO() {
        Float16 res = valueOf(0.0f);
        Float16 multiplier = valueOf(2.0f);
@ -280,7 +280,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.ADD_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testAddConstantFolding() {
        // If either value is NaN, then the result is NaN.
        assertResult(add(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding");
@ -323,7 +323,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.SUB_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testSubConstantFolding() {
        // If either value is NaN, then the result is NaN.
        assertResult(subtract(Float16.NaN, valueOf(2.0f)).floatValue(), Float.NaN, "testAddConstantFolding");
@ -356,7 +356,7 @@ public class TestFloat16ScalarOperations {
    @Test
    @Warmup(value = 10000)
    @IR(counts = {IRNode.MAX_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testMaxConstantFolding() {
        // If either value is NaN, then the result is NaN.
        assertResult(max(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMaxConstantFolding");
@ -374,7 +374,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.MIN_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testMinConstantFolding() {
        // If either value is NaN, then the result is NaN.
        assertResult(min(valueOf(2.0f), Float16.NaN).floatValue(), Float.NaN, "testMinConstantFolding");
@ -391,7 +391,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.DIV_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testDivConstantFolding() {
        // If either value is NaN, then the result is NaN.
        assertResult(divide(Float16.NaN, POSITIVE_ZERO).floatValue(), Float.NaN, "testDivConstantFolding");
@ -431,7 +431,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.MUL_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testMulConstantFolding() {
        // If any operand is NaN, the result is NaN.
        assertResult(multiply(Float16.NaN, valueOf(4.0f)).floatValue(), Float.NaN, "testMulConstantFolding");
@ -454,7 +454,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.SQRT_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testSqrtConstantFolding() {
        // If the argument is NaN or less than zero, then the result is NaN.
        assertResult(sqrt(Float16.NaN).floatValue(), Float.NaN, "testSqrtConstantFolding");
@ -473,7 +473,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testFMAConstantFolding() {
        // If any argument is NaN, the result is NaN.
        assertResult(fma(Float16.NaN, valueOf(2.0f), valueOf(3.0f)).floatValue(), Float.NaN, "testFMAConstantFolding");
@ -508,7 +508,7 @@ public class TestFloat16ScalarOperations {

    @Test
    @IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testRounding1() {
        dst[0] = float16ToRawShortBits(add(RANDOM1, RANDOM2));
        dst[1] = float16ToRawShortBits(subtract(RANDOM2, RANDOM3));
@ -547,7 +547,7 @@ public class TestFloat16ScalarOperations {
    @Test
    @IR(counts = {IRNode.ADD_HF, " >0 ", IRNode.SUB_HF, " >0 ", IRNode.MUL_HF, " >0 ",
                  IRNode.DIV_HF, " >0 ", IRNode.SQRT_HF, " >0 ", IRNode.FMA_HF, " >0 "},
-        applyIfCPUFeature = {"avx512_fp16", "true"})
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"})
    public void testRounding2() {
        dst[0] = float16ToRawShortBits(add(RANDOM1_VAR, RANDOM2_VAR));
        dst[1] = float16ToRawShortBits(subtract(RANDOM2_VAR, RANDOM3_VAR));
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
@ -110,6 +110,7 @@ public class IREncodingPrinter {
        "sve",
        // Riscv64
        "rvv",
+        "zfh",
        "zvbb",
        "zvfh"
    ));
--- a/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorConvChain.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorConvChain.java
@ -45,8 +45,6 @@ public class TestFloat16VectorConvChain {
        counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
    @IR(applyIfCPUFeatureAnd = {"avx512_fp16", "false", "f16c", "true"},
        counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
-    @IR(applyIfCPUFeature = {"zvfh", "true"},
-        counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_ANY, ">= 1", IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_ANY, " >= 1"})
    public static void test(short [] res, short [] src1, short [] src2) {
        for (int i = 0; i < res.length; i++) {
            res[i] = (short)Float.float16ToFloat(Float.floatToFloat16(Float.float16ToFloat(src1[i]) + Float.float16ToFloat(src2[i])));