From 5e30bf68353d989aadc2d8176181226b2debd283 Mon Sep 17 00:00:00 2001 From: Jatin Bhateja Date: Wed, 2 Jul 2025 17:47:20 +0000 Subject: [PATCH] 8360116: Add support for AVX10 floating point minmax instruction Reviewed-by: mhaessig, sviswanathan --- src/hotspot/cpu/x86/assembler_x86.cpp | 108 ++++++++ src/hotspot/cpu/x86/assembler_x86.hpp | 22 ++ src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp | 43 ++- src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp | 3 + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 16 ++ src/hotspot/cpu/x86/x86.ad | 251 +++++++++++++++--- src/hotspot/cpu/x86/x86_64.ad | 60 ++++- 7 files changed, 461 insertions(+), 42 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 897b06e94df..f116125767a 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -8257,6 +8257,14 @@ void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x5F, (0xC0 | encode)); } +void Assembler::eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x53, (0xC0 | encode), imm8); +} + void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -8771,12 +8779,68 @@ void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve emit_int16(0x5F, (0xC0 | encode)); } +void Assembler::evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x52, (0xC0 | encode), imm8); +} + +void Assembler::evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8(0x52); + emit_operand(dst, src, 0); + emit_int8(imm8); +} + void Assembler::maxpd(XMMRegister dst, XMMRegister src) { InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x5F, (0xC0 | encode)); } +void Assembler::evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x52, (0xC0 | encode), imm8); +} + +void Assembler::evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8(0x52); + emit_operand(dst, src, 0); + emit_int8(imm8); +} + void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), ""); InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -13119,6 +13183,14 @@ void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x5D, (0xC0 | encode)); } +void Assembler::eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x53, (0xC0 | encode), imm8); +} + void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -13127,6 +13199,14 @@ void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x5D, (0xC0 | encode)); } +void Assembler::eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x53, (0xC0 | encode), imm8); +} + void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(vector_len <= AVX_256bit, ""); @@ -16526,6 +16606,34 @@ void Assembler::evminph(XMMRegister dst, XMMRegister nds, Address src, int vecto emit_operand(dst, src, 0); } +void Assembler::evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x52, (0xC0 | encode), imm8); +} + +void Assembler::evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) { + assert(VM_Version::supports_avx10_2(), ""); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes); + emit_int8(0x52); + emit_operand(dst, src, 0); + emit_int8(imm8); +} + void Assembler::evmaxph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16"); assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), ""); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index b1959e23722..45c24f8c832 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -441,6 +441,17 @@ class InstructionAttr; // See fxsave and xsave(EVEX enabled) documentation for layout const int FPUStateSizeInWords = 2688 / wordSize; + +// AVX10 new minmax instruction control mask encoding. +// +// imm8[4] = 0 (please refer to Table 11.1 of section 11.2 of AVX10 manual[1] for details) +// imm8[3:2] (sign control) = 01 (select sign, please refer to Table 11.5 of section 11.2 of AVX10 manual[1] for details) +// imm8[1:0] = 00 (min) / 01 (max) +// +// [1] https://www.intel.com/content/www/us/en/content-details/856721/intel-advanced-vector-extensions-10-2-intel-avx10-2-architecture-specification.html?wapkw=AVX10 +const int AVX10_MINMAX_MAX_COMPARE_SIGN = 0x5; +const int AVX10_MINMAX_MIN_COMPARE_SIGN = 0x4; + // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write // is what you get. The Assembler is generating code into a CodeBuffer. @@ -2745,6 +2756,17 @@ private: void minpd(XMMRegister dst, XMMRegister src); void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + // AVX10.2 floating point minmax instructions + void eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); + void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); + void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); + void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); + void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len); + void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len); + // Maximum of packed integers void pmaxsb(XMMRegister dst, XMMRegister src); void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index d9a9ef0de3b..6d24c145a50 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1230,6 +1230,21 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, } } +void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2, int vlen_enc) { + assert(opc == Op_MinV || opc == Op_MinReductionV || + opc == Op_MaxV || opc == Op_MaxReductionV, "sanity"); + + int imm8 = (opc == Op_MinV || opc == Op_MinReductionV) ? AVX10_MINMAX_MIN_COMPARE_SIGN + : AVX10_MINMAX_MAX_COMPARE_SIGN; + if (elem_bt == T_FLOAT) { + evminmaxps(dst, mask, src1, src2, true, imm8, vlen_enc); + } else { + assert(elem_bt == T_DOUBLE, ""); + evminmaxpd(dst, mask, src1, src2, true, imm8, vlen_enc); + } +} + // Float/Double signum void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) { assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity"); @@ -2537,12 +2552,21 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali } else { // i = [0,1] vpermilps(wtmp, wsrc, permconst[i], vlen_enc); } - vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc); + } else { + vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + } wsrc = wdst; vlen_enc = Assembler::AVX_128bit; } if (is_dst_valid) { - vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit); + } else { + vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + } } } @@ -2568,12 +2592,23 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val assert(i == 0, "%d", i); vpermilpd(wtmp, wsrc, 1, vlen_enc); } - vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc); + } else { + vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + } + wsrc = wdst; vlen_enc = Assembler::AVX_128bit; } + if (is_dst_valid) { - vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + if (VM_Version::supports_avx10_2()) { + vminmax_fp(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit); + } else { + vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + } } } diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 713eb73d68f..ee6fecb9f88 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -72,6 +72,9 @@ public: XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, int vlen_enc); + void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask, + XMMRegister src1, XMMRegister src2, int vlen_enc); + void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc); void evminmax_fp(int opcode, BasicType elem_bt, diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index c401863d7cd..c8bf289e9d4 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -8841,6 +8841,10 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM evpminsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpminsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; + case T_DOUBLE: + evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8856,6 +8860,10 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM evpmaxsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpmaxsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; + case T_DOUBLE: + evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8871,6 +8879,10 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM evpminsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpminsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; + case T_DOUBLE: + evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } @@ -8886,6 +8898,10 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM evpmaxsd(dst, mask, nds, src, merge, vector_len); break; case T_LONG: evpmaxsq(dst, mask, nds, src, merge, vector_len); break; + case T_FLOAT: + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; + case T_DOUBLE: + evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break; default: fatal("Unexpected type argument %s", type2name(type)); break; } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index a281331cb29..c0a55917a94 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -2024,7 +2024,7 @@ bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { return false; // Implementation limitation } - if (is_floating_point_type(bt)) { + if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) { return false; // Implementation limitation } return true; @@ -5293,9 +5293,9 @@ instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legV //--------------------Min/Max Float Reduction -------------------- // Float Min Reduction -instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, - legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && +instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, + legVec btmp, legVec xmm_1, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && Matcher::vector_length(n->in(2)) == 2); @@ -5316,7 +5316,7 @@ instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && Matcher::vector_length(n->in(2)) >= 4); @@ -5335,9 +5335,9 @@ instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legV ins_pipe( pipe_slow ); %} -instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, - legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && +instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp, + legVec btmp, legVec xmm_1, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5355,9 +5355,9 @@ instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, %} -instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, - legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && +instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp, + legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5374,12 +5374,78 @@ instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, ins_pipe( pipe_slow ); %} +instruct minmax_reduction2F_avx10(regF dst, immF src1, vec src2, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, + xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionF_avx10(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, + xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reduction2F_avx10_av(regF dst, vec src, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, + $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionF_avx10_av(regF dst, vec src, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, + $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} //--------------------Min Double Reduction -------------------- -instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, + legVec tmp3, legVec tmp4, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && Matcher::vector_length(n->in(2)) == 2); @@ -5398,10 +5464,9 @@ instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, ins_pipe( pipe_slow ); %} -instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2, + legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && Matcher::vector_length(n->in(2)) >= 4); @@ -5421,10 +5486,9 @@ instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, %} -instruct minmax_reduction2D_av(legRegD dst, legVec src, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, + legVec tmp3, legVec tmp4, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && Matcher::vector_length(n->in(2)) == 2); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5441,10 +5505,9 @@ instruct minmax_reduction2D_av(legRegD dst, legVec src, ins_pipe( pipe_slow ); %} -instruct minmax_reductionD_av(legRegD dst, legVec src, - legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs - rFlagsReg cr) %{ - predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && +instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3, + legVec tmp4, legVec tmp5, rFlagsReg cr) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && Matcher::vector_length(n->in(2)) >= 4); match(Set dst (MinReductionV dst src)); match(Set dst (MaxReductionV dst src)); @@ -5461,6 +5524,75 @@ instruct minmax_reductionD_av(legRegD dst, legVec src, ins_pipe( pipe_slow ); %} +instruct minmax_reduction2D_avx10(regD dst, immD src1, vec src2, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, + xnoreg, xnoreg, $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionD_avx10(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || + (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV src1 src2)); + match(Set dst (MaxReductionV src1 src2)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src2); + __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, + xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + + +instruct minmax_reduction2D_av_avx10(regD dst, vec src, vec xtmp1) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + Matcher::vector_length(n->in(2)) == 2); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1); + format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, + xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct minmax_reductionD_av_avx10(regD dst, vec src, vec xtmp1, vec xtmp2) %{ + predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && + Matcher::vector_length(n->in(2)) >= 4); + match(Set dst (MinReductionV dst src)); + match(Set dst (MaxReductionV dst src)); + effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); + format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %} + ins_encode %{ + int opcode = this->ideal_Opcode(); + int vlen = Matcher::vector_length(this, $src); + __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, + xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + // ====================VECTOR ARITHMETIC======================================= // --------------------------------- ADD -------------------------------------- @@ -6347,9 +6479,25 @@ instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ ins_pipe( pipe_slow ); %} +// Float/Double vector Min/Max +instruct minmaxFP_avx10_reg(vec dst, vec a, vec b) %{ + predicate(VM_Version::supports_avx10_2() && + is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE + match(Set dst (MinV a b)); + match(Set dst (MaxV a b)); + format %{ "vector_minmaxFP $dst, $a, $b" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + int opcode = this->ideal_Opcode(); + BasicType elem_bt = Matcher::vector_element_basic_type(this); + __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + // Float/Double vector Min/Max instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ - predicate(Matcher::vector_length_in_bytes(n) <= 32 && + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 && is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE UseAVX > 0); match(Set dst (MinV a b)); @@ -6370,8 +6518,8 @@ instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, l ins_pipe( pipe_slow ); %} -instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ - predicate(Matcher::vector_length_in_bytes(n) == 64 && +instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ + predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 && is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE match(Set dst (MinV a b)); match(Set dst (MaxV a b)); @@ -10686,8 +10834,22 @@ instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) ins_pipe(pipe_slow); %} +instruct scalar_minmax_HF_avx10_reg(regF dst, regF src1, regF src2) +%{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MaxHF src1 src2)); + match(Set dst (MinHF src1 src2)); + format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %} + ins_encode %{ + int function = this->ideal_Opcode() == Op_MinHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; + __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function); + %} + ins_pipe( pipe_slow ); +%} + instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) %{ + predicate(!VM_Version::supports_avx10_2()); match(Set dst (MaxHF src1 src2)); match(Set dst (MinHF src1 src2)); effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); @@ -10787,8 +10949,37 @@ instruct vector_fma_HF_mem(vec dst, memory src1, vec src2) ins_pipe( pipe_slow ); %} +instruct vector_minmax_HF_avx10_mem(vec dst, vec src1, memory src2) +%{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2)))); + match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2)))); + format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; + __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + +instruct vector_minmax_HF_avx10_reg(vec dst, vec src1, vec src2) +%{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MinVHF src1 src2)); + match(Set dst (MaxVHF src1 src2)); + format %{ "vector_min_max_fp16 $dst, $src1, $src2" %} + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_MINMAX_MIN_COMPARE_SIGN : AVX10_MINMAX_MAX_COMPARE_SIGN; + __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2) %{ + predicate(!VM_Version::supports_avx10_2()); match(Set dst (MinVHF src1 src2)); match(Set dst (MaxVHF src1 src2)); effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 11585eabc79..5b5292fbde2 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4450,9 +4450,20 @@ instruct loadD(regD dst, memory mem) ins_pipe(pipe_slow); // XXX %} +// max = java.lang.Math.max(float a, float b) +instruct maxF_avx10_reg(regF dst, regF a, regF b) %{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MaxF a b)); + format %{ "maxF $dst, $a, $b" %} + ins_encode %{ + __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN); + %} + ins_pipe( pipe_slow ); +%} + // max = java.lang.Math.max(float a, float b) instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MaxF a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4463,7 +4474,7 @@ instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, %} instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MaxF a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); @@ -4475,9 +4486,20 @@ instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRe ins_pipe( pipe_slow ); %} +// max = java.lang.Math.max(double a, double b) +instruct maxD_avx10_reg(regD dst, regD a, regD b) %{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MaxD a b)); + format %{ "maxD $dst, $a, $b" %} + ins_encode %{ + __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MAX_COMPARE_SIGN); + %} + ins_pipe( pipe_slow ); +%} + // max = java.lang.Math.max(double a, double b) instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MaxD a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4488,7 +4510,7 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, %} instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MaxD a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); @@ -4500,9 +4522,20 @@ instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRe ins_pipe( pipe_slow ); %} +// max = java.lang.Math.min(float a, float b) +instruct minF_avx10_reg(regF dst, regF a, regF b) %{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MinF a b)); + format %{ "minF $dst, $a, $b" %} + ins_encode %{ + __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN); + %} + ins_pipe( pipe_slow ); +%} + // min = java.lang.Math.min(float a, float b) instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MinF a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4513,7 +4546,7 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, %} instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MinF a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr); @@ -4525,9 +4558,20 @@ instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRe ins_pipe( pipe_slow ); %} +// max = java.lang.Math.min(double a, double b) +instruct minD_avx10_reg(regD dst, regD a, regD b) %{ + predicate(VM_Version::supports_avx10_2()); + match(Set dst (MinD a b)); + format %{ "minD $dst, $a, $b" %} + ins_encode %{ + __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_MINMAX_MIN_COMPARE_SIGN); + %} + ins_pipe( pipe_slow ); +%} + // min = java.lang.Math.min(double a, double b) instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ - predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n)); match(Set dst (MinD a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %} @@ -4538,7 +4582,7 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, %} instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{ - predicate(UseAVX > 0 && VLoopReductions::is_reduction(n)); + predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n)); match(Set dst (MinD a b)); effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);