From 1a9965555477ee2a6cb65e91ef54ed608e4bee66 Mon Sep 17 00:00:00 2001 From: Mohamed Issa Date: Fri, 27 Mar 2026 04:56:30 +0000 Subject: [PATCH] 8378295: Update scalar AVX10 floating point min/max definitions Reviewed-by: sviswanathan, mhaessig, jbhateja, sparasa --- src/hotspot/cpu/x86/assembler_x86.cpp | 160 ++++---- src/hotspot/cpu/x86/assembler_x86.hpp | 34 +- src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 77 +++- src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp | 26 +- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 46 ++- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 22 +- src/hotspot/cpu/x86/x86.ad | 353 +++++++++--------- .../math/TestFpMinMaxReductions.java | 32 +- .../compiler/lib/ir_framework/IRNode.java | 36 +- .../vector/Float16OperationsBenchmark.java | 38 +- .../bench/vm/compiler/FpMinMaxIntrinsics.java | 171 ++++++++- 11 files changed, 650 insertions(+), 345 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 38a28a6ec49..a4f2968f0d1 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -3472,7 +3472,7 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { emit_int16(0x6F, (0xC0 | encode)); } -void Assembler::vmovw(XMMRegister dst, Register src) { +void Assembler::evmovw(XMMRegister dst, Register src) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_is_evex_instruction(); @@ -3480,7 +3480,7 @@ void Assembler::vmovw(XMMRegister dst, Register src) { emit_int16(0x6E, (0xC0 | encode)); } -void Assembler::vmovw(Register dst, XMMRegister src) { +void Assembler::evmovw(Register dst, XMMRegister src) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_is_evex_instruction(); @@ -3488,6 +3488,36 @@ void Assembler::vmovw(Register dst, XMMRegister src) { emit_int16(0x7E, (0xC0 | encode)); } +void Assembler::evmovw(XMMRegister dst, Address src) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); + attributes.set_is_evex_instruction(); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes); + emit_int8(0x6E); + emit_operand(dst, src, 0); +} + +void Assembler::evmovw(Address dst, XMMRegister src) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); + attributes.set_is_evex_instruction(); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes); + emit_int8(0x7E); + emit_operand(src, dst, 0); +} + +void Assembler::evmovw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes); + emit_int16(0x6E, (0xC0 | encode)); +} + void Assembler::vmovdqu(XMMRegister dst, Address src) { assert(UseAVX > 0, ""); InstructionMark im(this); @@ -7310,6 +7340,42 @@ void Assembler::etzcntq(Register dst, Address src, bool no_flags) { emit_operand(dst, src, 0); } +void Assembler::evucomish(XMMRegister dst, Address src) { + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); + attributes.set_is_evex_instruction(); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); + emit_int8(0x2E); + emit_operand(dst, src, 0); +} + +void Assembler::evucomish(XMMRegister dst, XMMRegister src) { + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes); + emit_int16(0x2E, (0xC0 | encode)); +} + +void Assembler::evucomxsh(XMMRegister dst, Address src) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); + attributes.set_is_evex_instruction(); + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes); + emit_int8(0x2E); + emit_operand(dst, src, 0); +} + +void Assembler::evucomxsh(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes); + emit_int16(0x2E, (0xC0 | encode)); +} + void Assembler::ucomisd(XMMRegister dst, Address src) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -7327,7 +7393,7 @@ void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { emit_int16(0x2E, (0xC0 | encode)); } -void Assembler::vucomxsd(XMMRegister dst, Address src) { +void Assembler::evucomxsd(XMMRegister dst, Address src) { assert(VM_Version::supports_avx10_2(), ""); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -7338,7 +7404,7 @@ void Assembler::vucomxsd(XMMRegister dst, Address src) { emit_operand(dst, src, 0); } -void Assembler::vucomxsd(XMMRegister dst, XMMRegister src) { +void Assembler::evucomxsd(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx10_2(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_is_evex_instruction(); @@ -7361,7 +7427,7 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { emit_int16(0x2E, (0xC0 | encode)); } -void Assembler::vucomxss(XMMRegister dst, Address src) { +void Assembler::evucomxss(XMMRegister dst, Address src) { assert(VM_Version::supports_avx10_2(), ""); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -7372,7 +7438,7 @@ void Assembler::vucomxss(XMMRegister dst, Address src) { emit_operand(dst, src, 0); } -void Assembler::vucomxss(XMMRegister dst, XMMRegister src) { +void Assembler::evucomxss(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx10_2(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_is_evex_instruction(); @@ -8411,30 +8477,6 @@ void Assembler::vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x59, (0xC0 | encode)); } -void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) { - assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_is_evex_instruction(); - int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes); - emit_int16(0x5F, (0xC0 | encode)); -} - -void Assembler::eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { - assert(VM_Version::supports_avx10_2(), ""); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_is_evex_instruction(); - int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); - emit_int24(0x53, (0xC0 | encode), imm8); -} - -void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) { - assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_is_evex_instruction(); - int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes); - emit_int16(0x5D, (0xC0 | encode)); -} - void Assembler::vsqrtsh(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -13369,48 +13411,38 @@ bool Assembler::is_demotable(bool no_flags, int dst_enc, int nds_enc) { return (!no_flags && dst_enc == nds_enc); } -void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) { - assert(VM_Version::supports_avx(), ""); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); - emit_int16(0x5F, (0xC0 | encode)); -} - -void Assembler::vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { - assert(VM_Version::supports_avx(), ""); - InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_rex_vex_w_reverted(); - int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); - emit_int16(0x5F, (0xC0 | encode)); -} - -void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) { - assert(VM_Version::supports_avx(), ""); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); - emit_int16(0x5D, (0xC0 | encode)); -} - -void Assembler::eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { +void Assembler::evminmaxsh(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) { assert(VM_Version::supports_avx10_2(), ""); - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x53, (0xC0 | encode), imm8); +} + +void Assembler::evminmaxss(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24(0x53, (0xC0 | encode), imm8); } -void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { - assert(VM_Version::supports_avx(), ""); - InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - attributes.set_rex_vex_w_reverted(); - int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); - emit_int16(0x5D, (0xC0 | encode)); -} - -void Assembler::eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { +void Assembler::evminmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) { assert(VM_Version::supports_avx10_2(), ""); - InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24(0x53, (0xC0 | encode), imm8); } diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 57a5e25d7a6..98684752b0c 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1694,8 +1694,11 @@ private: void movsbl(Register dst, Address src); void movsbl(Register dst, Register src); - void vmovw(XMMRegister dst, Register src); - void vmovw(Register dst, XMMRegister src); + void evmovw(XMMRegister dst, Register src); + void evmovw(Register dst, XMMRegister src); + void evmovw(XMMRegister dst, Address src); + void evmovw(Address dst, XMMRegister src); + void evmovw(XMMRegister dst, XMMRegister src); void movsbq(Register dst, Address src); void movsbq(Register dst, Register src); @@ -2329,17 +2332,23 @@ private: void tzcntq(Register dst, Address src); void etzcntq(Register dst, Address src, bool no_flags); + // Unordered Compare Scalar Half-Precision Floating-Point Values and set EFLAGS + void evucomish(XMMRegister dst, Address src); + void evucomish(XMMRegister dst, XMMRegister src); + void evucomxsh(XMMRegister dst, Address src); + void evucomxsh(XMMRegister dst, XMMRegister src); + // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS void ucomisd(XMMRegister dst, Address src); void ucomisd(XMMRegister dst, XMMRegister src); - void vucomxsd(XMMRegister dst, Address src); - void vucomxsd(XMMRegister dst, XMMRegister src); + void evucomxsd(XMMRegister dst, Address src); + void evucomxsd(XMMRegister dst, XMMRegister src); // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS void ucomiss(XMMRegister dst, Address src); void ucomiss(XMMRegister dst, XMMRegister src); - void vucomxss(XMMRegister dst, Address src); - void vucomxss(XMMRegister dst, XMMRegister src); + void evucomxss(XMMRegister dst, Address src); + void evucomxss(XMMRegister dst, XMMRegister src); void xabort(int8_t imm8); @@ -2417,11 +2426,6 @@ private: void vsubss(XMMRegister dst, XMMRegister nds, Address src); void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src); - void sarxl(Register dst, Register src1, Register src2); void sarxl(Register dst, Address src1, Register src2); void sarxq(Register dst, Register src1, Register src2); @@ -2552,8 +2556,6 @@ private: void vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src); void vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src); void vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src); - void vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src); void vsqrtsh(XMMRegister dst, XMMRegister src); void vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2); @@ -2790,9 +2792,9 @@ private: void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); // AVX10.2 floating point minmax instructions - void eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); - void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); - void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void evminmaxsh(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8); + void evminmaxss(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8); + void evminmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8); void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 5b5fb02967c..c1df726b5ba 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1037,8 +1037,8 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, } } -void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, - XMMRegister src1, XMMRegister src2, int vlen_enc) { +void C2_MacroAssembler::vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2, int vlen_enc) { assert(opc == Op_MinV || opc == Op_MinReductionV || opc == Op_MaxV || opc == Op_MaxReductionV, "sanity"); @@ -1052,6 +1052,21 @@ void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, } } +void C2_MacroAssembler::sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2) { + assert(opc == Op_MinF || opc == Op_MaxF || + opc == Op_MinD || opc == Op_MaxD, "sanity"); + + int imm8 = (opc == Op_MinF || opc == Op_MinD) ? AVX10_2_MINMAX_MIN_COMPARE_SIGN + : AVX10_2_MINMAX_MAX_COMPARE_SIGN; + if (elem_bt == T_FLOAT) { + evminmaxss(dst, mask, src1, src2, true, imm8); + } else { + assert(elem_bt == T_DOUBLE, ""); + evminmaxsd(dst, mask, src1, src2, true, imm8); + } +} + // Float/Double signum void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) { assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity"); @@ -1063,7 +1078,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, // If other floating point comparison instructions used, ZF=1 for equal and unordered cases if (opcode == Op_SignumF) { if (VM_Version::supports_avx10_2()) { - vucomxss(dst, zero); + evucomxss(dst, zero); jcc(Assembler::negative, DONE_LABEL); } else { ucomiss(dst, zero); @@ -1074,7 +1089,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg); } else if (opcode == Op_SignumD) { if (VM_Version::supports_avx10_2()) { - vucomxsd(dst, zero); + evucomxsd(dst, zero); jcc(Assembler::negative, DONE_LABEL); } else { ucomisd(dst, zero); @@ -2400,7 +2415,7 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali } if (VM_Version::supports_avx10_2()) { - vminmax_fp(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc); + vminmax_fp_avx10_2(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc); } else { vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); } @@ -2409,7 +2424,7 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali } if (is_dst_valid) { if (VM_Version::supports_avx10_2()) { - vminmax_fp(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit); + vminmax_fp_avx10_2(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit); } else { vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); } @@ -2440,7 +2455,7 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val } if (VM_Version::supports_avx10_2()) { - vminmax_fp(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc); + vminmax_fp_avx10_2(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc); } else { vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); } @@ -2451,7 +2466,7 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val if (is_dst_valid) { if (VM_Version::supports_avx10_2()) { - vminmax_fp(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit); + vminmax_fp_avx10_2(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit); } else { vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); } @@ -7061,13 +7076,25 @@ void C2_MacroAssembler::evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, } } -void C2_MacroAssembler::scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, - KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2) { - vector_max_min_fp16(opcode, dst, src1, src2, ktmp, xtmp1, xtmp2, Assembler::AVX_128bit); +void C2_MacroAssembler::sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2) { + vminmax_fp16(opcode, dst, src1, src2, ktmp, xtmp1, xtmp2, Assembler::AVX_128bit); } -void C2_MacroAssembler::vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, - KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc) { +void C2_MacroAssembler::sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp) { + if (opcode == Op_MaxHF) { + // dst = max(src1, src2) + evminmaxsh(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN); + } else { + assert(opcode == Op_MinHF, ""); + // dst = min(src1, src2) + evminmaxsh(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN); + } +} + +void C2_MacroAssembler::vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc) { if (opcode == Op_MaxVHF || opcode == Op_MaxHF) { // Move sign bits of src2 to mask register. evpmovw2m(ktmp, src2, vlen_enc); @@ -7110,3 +7137,27 @@ void C2_MacroAssembler::vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegi Assembler::evmovdquw(dst, ktmp, xtmp1, true, vlen_enc); } } + +void C2_MacroAssembler::vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp, int vlen_enc) { + if (opcode == Op_MaxVHF) { + // dst = max(src1, src2) + evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vlen_enc); + } else { + assert(opcode == Op_MinVHF, ""); + // dst = min(src1, src2) + evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN, vlen_enc); + } +} + +void C2_MacroAssembler::vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2, + KRegister ktmp, int vlen_enc) { + if (opcode == Op_MaxVHF) { + // dst = max(src1, src2) + evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vlen_enc); + } else { + assert(opcode == Op_MinVHF, ""); + // dst = min(src1, src2) + evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN, vlen_enc); + } +} diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 6d8b0ceaebe..4e77f8a5f6f 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -67,8 +67,11 @@ public: XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, int vlen_enc); - void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, - XMMRegister src1, XMMRegister src2, int vlen_enc); + void vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2, int vlen_enc); + + void sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2); void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); @@ -576,11 +579,20 @@ public: void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc); - void vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, - KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); + void vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); - void scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, - KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2); + void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp, int vlen_enc); + + void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2, + KRegister ktmp, int vlen_enc); + + void sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2); + + void sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, + KRegister ktmp); void reconstruct_frame_pointer(Register rtmp); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index a0f08145d55..356bf8af5c0 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -1958,6 +1958,16 @@ void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src, Register rscrat } } +void MacroAssembler::movhlf(XMMRegister dst, XMMRegister src, Register rscratch) { + if (VM_Version::supports_avx10_2()) { + evmovw(dst, src); + } else { + assert(rscratch != noreg, "missing"); + evmovw(rscratch, src); + evmovw(dst, rscratch); + } +} + void MacroAssembler::mov64(Register dst, int64_t imm64) { if (is_uimm32(imm64)) { movl(dst, checked_cast(imm64)); @@ -2661,14 +2671,14 @@ void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src, Register rscra } } -void MacroAssembler::vucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch) { +void MacroAssembler::evucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); if (reachable(src)) { - Assembler::vucomxsd(dst, as_Address(src)); + Assembler::evucomxsd(dst, as_Address(src)); } else { lea(rscratch, src); - Assembler::vucomxsd(dst, Address(rscratch, 0)); + Assembler::evucomxsd(dst, Address(rscratch, 0)); } } @@ -2683,14 +2693,36 @@ void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src, Register rscra } } -void MacroAssembler::vucomxss(XMMRegister dst, AddressLiteral src, Register rscratch) { +void MacroAssembler::evucomxss(XMMRegister dst, AddressLiteral src, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); if (reachable(src)) { - Assembler::vucomxss(dst, as_Address(src)); + Assembler::evucomxss(dst, as_Address(src)); } else { lea(rscratch, src); - Assembler::vucomxss(dst, Address(rscratch, 0)); + Assembler::evucomxss(dst, Address(rscratch, 0)); + } +} + +void MacroAssembler::evucomish(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + + if (reachable(src)) { + Assembler::evucomish(dst, as_Address(src)); + } else { + lea(rscratch, src); + Assembler::evucomish(dst, Address(rscratch, 0)); + } +} + +void MacroAssembler::evucomxsh(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + + if (reachable(src)) { + Assembler::evucomxsh(dst, as_Address(src)); + } else { + lea(rscratch, src); + Assembler::evucomxsh(dst, Address(rscratch, 0)); } } @@ -9163,7 +9195,7 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM case T_FLOAT: evminmaxps(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break; case T_DOUBLE: - evminmaxps(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break; + evminmaxpd(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 3bdd1e4477a..021d2943ee8 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -162,6 +162,8 @@ class MacroAssembler: public Assembler { void incrementq(AddressLiteral dst, Register rscratch = noreg); + void movhlf(XMMRegister dst, XMMRegister src, Register rscratch = noreg); + // Support optimal SSE move instructions. void movflt(XMMRegister dst, XMMRegister src) { if (dst-> encoding() == src->encoding()) return; @@ -1308,21 +1310,29 @@ public: void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } void subss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void evucomish(XMMRegister dst, XMMRegister src) { Assembler::evucomish(dst, src); } + void evucomish(XMMRegister dst, Address src) { Assembler::evucomish(dst, src); } + void evucomish(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + + void evucomxsh(XMMRegister dst, XMMRegister src) { Assembler::evucomxsh(dst, src); } + void evucomxsh(XMMRegister dst, Address src) { Assembler::evucomxsh(dst, src); } + void evucomxsh(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); - void vucomxss(XMMRegister dst, XMMRegister src) { Assembler::vucomxss(dst, src); } - void vucomxss(XMMRegister dst, Address src) { Assembler::vucomxss(dst, src); } - void vucomxss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void evucomxss(XMMRegister dst, XMMRegister src) { Assembler::evucomxss(dst, src); } + void evucomxss(XMMRegister dst, Address src) { Assembler::evucomxss(dst, src); } + void evucomxss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } void ucomisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); - void vucomxsd(XMMRegister dst, XMMRegister src) { Assembler::vucomxsd(dst, src); } - void vucomxsd(XMMRegister dst, Address src) { Assembler::vucomxsd(dst, src); } - void vucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void evucomxsd(XMMRegister dst, XMMRegister src) { Assembler::evucomxsd(dst, src); } + void evucomxsd(XMMRegister dst, Address src) { Assembler::evucomxsd(dst, src); } + void evucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values void xorpd(XMMRegister dst, XMMRegister src); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index f31d64f3d7e..d7014141234 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1708,84 +1708,99 @@ static void emit_cmpfp3(MacroAssembler* masm, Register dst) { __ bind(done); } -// Math.min() # Math.max() -// -------------------------- -// ucomis[s/d] # -// ja -> b # a -// jp -> NaN # NaN -// jb -> a # b -// je # -// |-jz -> a | b # a & b -// | -> a # +enum FP_PREC { + fp_prec_hlf, + fp_prec_flt, + fp_prec_dbl +}; + +static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt, + XMMRegister p, XMMRegister q) { + if (pt == fp_prec_hlf) { + __ evucomish(p, q); + } else if (pt == fp_prec_flt) { + __ ucomiss(p, q); + } else { + __ ucomisd(p, q); + } +} + +static inline void movfp(MacroAssembler* masm, enum FP_PREC pt, + XMMRegister dst, XMMRegister src, Register scratch) { + if (pt == fp_prec_hlf) { + __ movhlf(dst, src, scratch); + } else if (pt == fp_prec_flt) { + __ movflt(dst, src); + } else { + __ movdbl(dst, src); + } +} + +// Math.min() # Math.max() +// ----------------------------- +// (v)ucomis[h/s/d] # +// ja -> b # a +// jp -> NaN # NaN +// jb -> a # b +// je # +// |-jz -> a | b # a & b +// | -> a # static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister xmmt, Register rt, - bool min, bool single) { + bool min, enum FP_PREC pt) { Label nan, zero, below, above, done; - if (single) - __ ucomiss(a, b); - else - __ ucomisd(a, b); + emit_fp_ucom(masm, pt, a, b); - if (dst->encoding() != (min ? b : a)->encoding()) + if (dst->encoding() != (min ? b : a)->encoding()) { __ jccb(Assembler::above, above); // CF=0 & ZF=0 - else + } else { __ jccb(Assembler::above, done); + } __ jccb(Assembler::parity, nan); // PF=1 __ jccb(Assembler::below, below); // CF=1 // equal __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit); - if (single) { - __ ucomiss(a, xmmt); - __ jccb(Assembler::equal, zero); + emit_fp_ucom(masm, pt, a, xmmt); - __ movflt(dst, a); - __ jmp(done); - } - else { - __ ucomisd(a, xmmt); - __ jccb(Assembler::equal, zero); + __ jccb(Assembler::equal, zero); + movfp(masm, pt, dst, a, rt); - __ movdbl(dst, a); - __ jmp(done); - } + __ jmp(done); __ bind(zero); - if (min) + if (min) { __ vpor(dst, a, b, Assembler::AVX_128bit); - else + } else { __ vpand(dst, a, b, Assembler::AVX_128bit); + } __ jmp(done); __ bind(above); - if (single) - __ movflt(dst, min ? b : a); - else - __ movdbl(dst, min ? b : a); + movfp(masm, pt, dst, min ? b : a, rt); __ jmp(done); __ bind(nan); - if (single) { + if (pt == fp_prec_hlf) { + __ movl(rt, 0x00007e00); // Float16.NaN + __ evmovw(dst, rt); + } else if (pt == fp_prec_flt) { __ movl(rt, 0x7fc00000); // Float.NaN __ movdl(dst, rt); - } - else { + } else { __ mov64(rt, 0x7ff8000000000000L); // Double.NaN __ movdq(dst, rt); } __ jmp(done); __ bind(below); - if (single) - __ movflt(dst, min ? a : b); - else - __ movdbl(dst, min ? a : b); + movfp(masm, pt, dst, min ? a : b, rt); __ bind(done); } @@ -7345,146 +7360,140 @@ instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con) ins_pipe(ialu_reg_fat); %} +// min = java.lang.Math.min(float a, float b) // max = java.lang.Math.max(float a, float b) -instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{ - predicate(VM_Version::supports_avx10_2()); +instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b) +%{ + predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); match(Set dst (MaxF a b)); - format %{ "maxF $dst, $a, $b" %} - ins_encode %{ - __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN); - %} - ins_pipe( pipe_slow ); -%} - -// max = java.lang.Math.max(float a, float b) -instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ - predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); - match(Set dst (MaxF a b)); - effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); - format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} - ins_encode %{ - __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); - %} - ins_pipe( pipe_slow ); -%} - -instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{ - predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); - match(Set dst (MaxF a b)); - effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); - - format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %} - ins_encode %{ - emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register, - false /*min*/, true /*single*/); - %} - ins_pipe( pipe_slow ); -%} - -// max = java.lang.Math.max(double a, double b) -instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{ - predicate(VM_Version::supports_avx10_2()); - match(Set dst (MaxD a b)); - format %{ "maxD $dst, $a, $b" %} - ins_encode %{ - __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN); - %} - ins_pipe( pipe_slow ); -%} - -// max = java.lang.Math.max(double a, double b) -instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ - predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); - match(Set dst (MaxD a b)); - effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); - format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} - ins_encode %{ - __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); - %} - ins_pipe( pipe_slow ); -%} - -instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{ - predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); - match(Set dst (MaxD a b)); - effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); - - format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %} - ins_encode %{ - emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register, - false /*min*/, false /*single*/); - %} - ins_pipe( pipe_slow ); -%} - -// max = java.lang.Math.min(float a, float b) -instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{ - predicate(VM_Version::supports_avx10_2()); match(Set dst (MinF a b)); - format %{ "minF $dst, $a, $b" %} + + format %{ "minmaxF $dst, $a, $b" %} ins_encode %{ - __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN); + int opcode = this->ideal_Opcode(); + __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr) +%{ + predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n)); + match(Set dst (MaxF a b)); + match(Set dst (MinF a b)); + effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); + + format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + bool min = (opcode == Op_MinF) ? true : false; + emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register, + min, fp_prec_flt /*pt*/); %} ins_pipe( pipe_slow ); %} // min = java.lang.Math.min(float a, float b) -instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ +// max = java.lang.Math.max(float a, float b) +instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) +%{ predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); + match(Set dst (MaxF a b)); match(Set dst (MinF a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); - format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} + + format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} ins_encode %{ - __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); + int opcode = this->ideal_Opcode(); + int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV; + __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, + $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); %} ins_pipe( pipe_slow ); %} -instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{ +instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) +%{ predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); + match(Set dst (MaxF a b)); match(Set dst (MinF a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); - format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %} + format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %} ins_encode %{ + int opcode = this->ideal_Opcode(); + bool min = (opcode == Op_MinF) ? true : false; emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register, - true /*min*/, true /*single*/); - %} - ins_pipe( pipe_slow ); -%} - -// max = java.lang.Math.min(double a, double b) -instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{ - predicate(VM_Version::supports_avx10_2()); - match(Set dst (MinD a b)); - format %{ "minD $dst, $a, $b" %} - ins_encode %{ - __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN); + min, fp_prec_flt /*pt*/); %} ins_pipe( pipe_slow ); %} // min = java.lang.Math.min(double a, double b) -instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ - predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); +// max = java.lang.Math.max(double a, double b) +instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b) +%{ + predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n)); + match(Set dst (MaxD a b)); match(Set dst (MinD a b)); - effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); - format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} + + format %{ "minmaxD $dst, $a, $b" %} ins_encode %{ - __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); + int opcode = this->ideal_Opcode(); + __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister); %} ins_pipe( pipe_slow ); %} -instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{ - predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); +instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr) +%{ + predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n)); + match(Set dst (MaxD a b)); match(Set dst (MinD a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); - format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %} + format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %} ins_encode %{ + int opcode = this->ideal_Opcode(); + bool min = (opcode == Op_MinD) ? true : false; emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register, - true /*min*/, false /*single*/); + min, fp_prec_dbl /*pt*/); + %} + ins_pipe( pipe_slow ); +%} + +// min = java.lang.Math.min(double a, double b) +// max = java.lang.Math.max(double a, double b) +instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) +%{ + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); + match(Set dst (MaxD a b)); + match(Set dst (MinD a b)); + effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); + + format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV; + __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, + $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) +%{ + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); + match(Set dst (MaxD a b)); + match(Set dst (MinD a b)); + effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); + + format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + bool min = (opcode == Op_MinD) ? true : false; + emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register, + min, fp_prec_dbl /*pt*/); %} ins_pipe( pipe_slow ); %} @@ -14394,9 +14403,9 @@ instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{ match(Set cr (CmpF src1 src2)); ins_cost(100); - format %{ "vucomxss $src1, $src2" %} + format %{ "evucomxss $src1, $src2" %} ins_encode %{ - __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister); + __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} @@ -14416,9 +14425,9 @@ instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{ match(Set cr (CmpF src1 (LoadF src2))); ins_cost(100); - format %{ "vucomxss $src1, $src2" %} + format %{ "evucomxss $src1, $src2" %} ins_encode %{ - __ vucomxss($src1$$XMMRegister, $src2$$Address); + __ evucomxss($src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} @@ -14438,9 +14447,9 @@ instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{ match(Set cr (CmpF src con)); ins_cost(100); - format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %} + format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %} ins_encode %{ - __ vucomxss($src$$XMMRegister, $constantaddress($con)); + __ evucomxss($src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} @@ -14479,9 +14488,9 @@ instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{ match(Set cr (CmpD src1 src2)); ins_cost(100); - format %{ "vucomxsd $src1, $src2 test" %} + format %{ "evucomxsd $src1, $src2 test" %} ins_encode %{ - __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister); + __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister); %} ins_pipe(pipe_slow); %} @@ -14501,9 +14510,9 @@ instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{ match(Set cr (CmpD src1 (LoadD src2))); ins_cost(100); - format %{ "vucomxsd $src1, $src2" %} + format %{ "evucomxsd $src1, $src2" %} ins_encode %{ - __ vucomxsd($src1$$XMMRegister, $src2$$Address); + __ evucomxsd($src1$$XMMRegister, $src2$$Address); %} ins_pipe(pipe_slow); %} @@ -14522,9 +14531,9 @@ instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{ match(Set cr (CmpD src con)); ins_cost(100); - format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %} + format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %} ins_encode %{ - __ vucomxsd($src$$XMMRegister, $constantaddress($con)); + __ evucomxsd($src$$XMMRegister, $constantaddress($con)); %} ins_pipe(pipe_slow); %} @@ -18832,7 +18841,7 @@ instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); - __ vmovw($rtmp$$Register, $src$$XMMRegister); + __ evmovw($rtmp$$Register, $src$$XMMRegister); __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); %} ins_pipe( pipe_slow ); @@ -20947,7 +20956,7 @@ instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{ int vlen_enc = vector_length_encoding(this); int opcode = this->ideal_Opcode(); BasicType elem_bt = Matcher::vector_element_basic_type(this); - __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); + __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); %} ins_pipe( pipe_slow ); %} @@ -25291,9 +25300,9 @@ instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) instruct reinterpretS2HF(regF dst, rRegI src) %{ match(Set dst (ReinterpretS2HF src)); - format %{ "vmovw $dst, $src" %} + format %{ "evmovw $dst, $src" %} ins_encode %{ - __ vmovw($dst$$XMMRegister, $src$$Register); + __ evmovw($dst$$XMMRegister, $src$$Register); %} ins_pipe(pipe_slow); %} @@ -25301,9 +25310,9 @@ instruct reinterpretS2HF(regF dst, rRegI src) instruct reinterpretHF2S(rRegI dst, regF src) %{ match(Set dst (ReinterpretHF2S src)); - format %{ "vmovw $dst, $src" %} + format %{ "evmovw $dst, $src" %} ins_encode %{ - __ vmovw($dst$$Register, $src$$XMMRegister); + __ evmovw($dst$$Register, $src$$XMMRegister); %} ins_pipe(pipe_slow); %} @@ -25357,10 +25366,11 @@ instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2) predicate(VM_Version::supports_avx10_2()); match(Set dst (MaxHF src1 src2)); match(Set dst (MinHF src1 src2)); + format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} ins_encode %{ - int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN; - __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); + int opcode = this->ideal_Opcode(); + __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0); %} ins_pipe( pipe_slow ); %} @@ -25371,11 +25381,12 @@ instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xt match(Set dst (MaxHF src1 src2)); match(Set dst (MinHF src1 src2)); effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); + format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} ins_encode %{ int opcode = this->ideal_Opcode(); - __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, - $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, + $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -25475,8 +25486,9 @@ instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2) format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); - int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN; - __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); + int opcode = this->ideal_Opcode(); + __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, + k0, vlen_enc); %} ins_pipe( pipe_slow ); %} @@ -25489,8 +25501,9 @@ instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2) format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} ins_encode %{ int vlen_enc = vector_length_encoding(this); - int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN; - __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); + int opcode = this->ideal_Opcode(); + __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, + k0, vlen_enc); %} ins_pipe( pipe_slow ); %} @@ -25505,8 +25518,8 @@ instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, ins_encode %{ int vlen_enc = vector_length_encoding(this); int opcode = this->ideal_Opcode(); - __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, - $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); + __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister, + $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); %} ins_pipe( pipe_slow ); %} diff --git a/test/hotspot/jtreg/compiler/intrinsics/math/TestFpMinMaxReductions.java b/test/hotspot/jtreg/compiler/intrinsics/math/TestFpMinMaxReductions.java index ff0277b33f7..5f516890dbe 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/math/TestFpMinMaxReductions.java +++ b/test/hotspot/jtreg/compiler/intrinsics/math/TestFpMinMaxReductions.java @@ -55,21 +55,21 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MIN_F_REG, "1"}, - failOn = {IRNode.MIN_F_REDUCTION_REG}) + @IR(counts = {IRNode.MINMAX_F_REG, "1"}, + failOn = {IRNode.MINMAX_F_REDUCTION_REG}) private static float testFloatMin() { return Math.min(floatInput1, floatInput2); } @Test - @IR(counts = {IRNode.MAX_F_REG, "1"}, - failOn = {IRNode.MAX_F_REDUCTION_REG}) + @IR(counts = {IRNode.MINMAX_F_REG, "1"}, + failOn = {IRNode.MINMAX_F_REDUCTION_REG}) private static float testFloatMax() { return Math.max(floatInput1, floatInput2); } @Test - @IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"}) private static float testFloatMinReduction() { float fmin = Float.POSITIVE_INFINITY; for (int i = 0; i < floatArray.length; i++) { @@ -79,7 +79,7 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"}) private static float testFloatMinReductionPartiallyUnrolled() { float fmin = Float.POSITIVE_INFINITY; for (int i = 0; i < floatArray.length / 2; i++) { @@ -90,7 +90,7 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"}) private static float testFloatMinReductionNonCounted() { float fmin = Float.POSITIVE_INFINITY; for (int i = 0; i < floatArray.length; i += stride) { @@ -100,7 +100,7 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"}) private static float testFloatMinReductionGlobalAccumulator() { acc = Float.POSITIVE_INFINITY; for (int i = 0; i < floatArray.length; i++) { @@ -110,7 +110,7 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"}) private static float testFloatMinReductionInOuterLoop() { float fmin = Float.POSITIVE_INFINITY; int count = 0; @@ -124,7 +124,7 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MAX_F_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"}) private static float testFloatMaxReduction() { float fmax = Float.NEGATIVE_INFINITY; for (int i = 0; i < floatArray.length; i++) { @@ -134,21 +134,21 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MIN_D_REG, "1"}, - failOn = {IRNode.MIN_D_REDUCTION_REG}) + @IR(counts = {IRNode.MINMAX_D_REG, "1"}, + failOn = {IRNode.MINMAX_D_REDUCTION_REG}) private static double testDoubleMin() { return Math.min(doubleInput1, doubleInput2); } @Test - @IR(counts = {IRNode.MAX_D_REG, "1"}, - failOn = {IRNode.MAX_D_REDUCTION_REG}) + @IR(counts = {IRNode.MINMAX_D_REG, "1"}, + failOn = {IRNode.MINMAX_D_REDUCTION_REG}) private static double testDoubleMax() { return Math.max(doubleInput1, doubleInput2); } @Test - @IR(counts = {IRNode.MIN_D_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_D_REDUCTION_REG, ">= 1"}) private static double testDoubleMinReduction() { double fmin = Double.POSITIVE_INFINITY; for (int i = 0; i < doubleArray.length; i++) { @@ -158,7 +158,7 @@ public class TestFpMinMaxReductions { } @Test - @IR(counts = {IRNode.MAX_D_REDUCTION_REG, ">= 1"}) + @IR(counts = {IRNode.MINMAX_D_REDUCTION_REG, ">= 1"}) private static double testDoubleMaxReduction() { double fmax = Double.NEGATIVE_INFINITY; for (int i = 0; i < doubleArray.length; i++) { diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 8885d1283df..0753a0b04bc 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -1203,31 +1203,11 @@ public class IRNode { beforeMatchingNameRegex(MAX_D, "MaxD"); } - public static final String MAX_D_REDUCTION_REG = PREFIX + "MAX_D_REDUCTION_REG" + POSTFIX; - static { - machOnlyNameRegex(MAX_D_REDUCTION_REG, "maxD_reduction_reg"); - } - - public static final String MAX_D_REG = PREFIX + "MAX_D_REG" + POSTFIX; - static { - machOnlyNameRegex(MAX_D_REG, "maxD_reg"); - } - public static final String MAX_F = PREFIX + "MAX_F" + POSTFIX; static { beforeMatchingNameRegex(MAX_F, "MaxF"); } - public static final String MAX_F_REDUCTION_REG = PREFIX + "MAX_F_REDUCTION_REG" + POSTFIX; - static { - machOnlyNameRegex(MAX_F_REDUCTION_REG, "maxF_reduction_reg"); - } - - public static final String MAX_F_REG = PREFIX + "MAX_F_REG" + POSTFIX; - static { - machOnlyNameRegex(MAX_F_REG, "maxF_reg"); - } - public static final String MAX_I = PREFIX + "MAX_I" + POSTFIX; static { beforeMatchingNameRegex(MAX_I, "MaxI"); @@ -1309,14 +1289,14 @@ public class IRNode { beforeMatchingNameRegex(MIN_D, "MinD"); } - public static final String MIN_D_REDUCTION_REG = PREFIX + "MIN_D_REDUCTION_REG" + POSTFIX; + public static final String MINMAX_D_REDUCTION_REG = PREFIX + "MINMAX_D_REDUCTION_REG" + POSTFIX; static { - machOnlyNameRegex(MIN_D_REDUCTION_REG, "minD_reduction_reg"); + machOnlyNameRegex(MINMAX_D_REDUCTION_REG, "minmaxD_reduction_reg"); } - public static final String MIN_D_REG = PREFIX + "MIN_D_REG" + POSTFIX; + public static final String MINMAX_D_REG = PREFIX + "MINMAX_D_REG" + POSTFIX; static { - machOnlyNameRegex(MIN_D_REG, "minD_reg"); + machOnlyNameRegex(MINMAX_D_REG, "minmaxD_reg"); } public static final String MIN_F = PREFIX + "MIN_F" + POSTFIX; @@ -1324,14 +1304,14 @@ public class IRNode { beforeMatchingNameRegex(MIN_F, "MinF"); } - public static final String MIN_F_REDUCTION_REG = PREFIX + "MIN_F_REDUCTION_REG" + POSTFIX; + public static final String MINMAX_F_REDUCTION_REG = PREFIX + "MINMAX_F_REDUCTION_REG" + POSTFIX; static { - machOnlyNameRegex(MIN_F_REDUCTION_REG, "minF_reduction_reg"); + machOnlyNameRegex(MINMAX_F_REDUCTION_REG, "minmaxF_reduction_reg"); } - public static final String MIN_F_REG = PREFIX + "MIN_F_REG" + POSTFIX; + public static final String MINMAX_F_REG = PREFIX + "MINMAX_F_REG" + POSTFIX; static { - machOnlyNameRegex(MIN_F_REG, "minF_reg"); + machOnlyNameRegex(MINMAX_F_REG, "minmaxF_reg"); } public static final String MIN_I = PREFIX + "MIN_I" + POSTFIX; diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/Float16OperationsBenchmark.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/Float16OperationsBenchmark.java index cbfe9958924..92c0b58005f 100644 --- a/test/micro/org/openjdk/bench/jdk/incubator/vector/Float16OperationsBenchmark.java +++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/Float16OperationsBenchmark.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ import jdk.incubator.vector.*; import org.openjdk.jmh.annotations.*; import static jdk.incubator.vector.Float16.*; import static java.lang.Float.*; +import java.util.Random; @OutputTimeUnit(TimeUnit.MILLISECONDS) @State(Scope.Thread) @@ -45,11 +46,20 @@ public class Float16OperationsBenchmark { short [] vector5; boolean [] vectorPredicate; + private int c0, c1, c2, s1, s2; + + Random r; + static final short f16_one = Float.floatToFloat16(1.0f); static final short f16_two = Float.floatToFloat16(2.0f); @Setup(Level.Trial) public void BmSetup() { + r = new Random(); + + c1 = s1 = step(); + c2 = vectorDim - (s2 = step()); + rexp = new int[vectorDim]; vectorRes = new short[vectorDim]; vector1 = new short[vectorDim]; @@ -84,6 +94,16 @@ public class Float16OperationsBenchmark { ); } + private int step() { + return (r.nextInt() & 0xf) + 1; + } + + private void inc() { + c1 = c1 + s1 < vectorDim ? c1 + s1 : (s1 = step()); + c2 = c2 - s2 > 0 ? c2 - s2 : vectorDim - (s2 = step()); + c0 = Math.abs(c2 - c1); + } + @Benchmark public void addBenchmark() { for (int i = 0; i < vectorDim; i++) { @@ -200,6 +220,14 @@ public class Float16OperationsBenchmark { } } + @Benchmark + public void maxScalarBenchmark() { + for (int i = 0; i < vectorDim; i++) { + inc(); // Ensures no auto-vectorization + vectorRes[c0] = float16ToRawShortBits(max(shortBitsToFloat16(vector1[c1]), shortBitsToFloat16(vector2[c2]))); + } + } + @Benchmark public void minBenchmark() { for (int i = 0; i < vectorDim; i++) { @@ -207,6 +235,14 @@ public class Float16OperationsBenchmark { } } + @Benchmark + public void minScalarBenchmark() { + for (int i = 0; i < vectorDim; i++) { + inc(); // Ensures no auto-vectorization + vectorRes[c0] = float16ToRawShortBits(min(shortBitsToFloat16(vector1[c1]), shortBitsToFloat16(vector2[c2]))); + } + } + @Benchmark public void sqrtBenchmark() { for (int i = 0; i < vectorDim; i++) { diff --git a/test/micro/org/openjdk/bench/vm/compiler/FpMinMaxIntrinsics.java b/test/micro/org/openjdk/bench/vm/compiler/FpMinMaxIntrinsics.java index 27ae2214157..62c33f5fafe 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/FpMinMaxIntrinsics.java +++ b/test/micro/org/openjdk/bench/vm/compiler/FpMinMaxIntrinsics.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,14 +45,15 @@ public class FpMinMaxIntrinsics { private Random r = new Random(); private static int stride = 1; - private static float acc; + private static float f_acc; + private static double d_acc; @Setup public void init() { c1 = s1 = step(); c2 = COUNT - (s2 = step()); - for (int i=0; i