8378295: Update scalar AVX10 floating point min/max definitions

Reviewed-by: sviswanathan, mhaessig, jbhateja, sparasa
This commit is contained in:
Mohamed Issa 2026-03-27 04:56:30 +00:00 committed by Srinivas Vamsi Parasa
parent 5164fbc9f8
commit 1a99655554
11 changed files with 650 additions and 345 deletions

View File

@ -3472,7 +3472,7 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
emit_int16(0x6F, (0xC0 | encode));
}
void Assembler::vmovw(XMMRegister dst, Register src) {
void Assembler::evmovw(XMMRegister dst, Register src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
@ -3480,7 +3480,7 @@ void Assembler::vmovw(XMMRegister dst, Register src) {
emit_int16(0x6E, (0xC0 | encode));
}
void Assembler::vmovw(Register dst, XMMRegister src) {
void Assembler::evmovw(Register dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
@ -3488,6 +3488,36 @@ void Assembler::vmovw(Register dst, XMMRegister src) {
emit_int16(0x7E, (0xC0 | encode));
}
void Assembler::evmovw(XMMRegister dst, Address src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x6E);
emit_operand(dst, src, 0);
}
void Assembler::evmovw(Address dst, XMMRegister src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x7E);
emit_operand(src, dst, 0);
}
void Assembler::evmovw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x6E, (0xC0 | encode));
}
void Assembler::vmovdqu(XMMRegister dst, Address src) {
assert(UseAVX > 0, "");
InstructionMark im(this);
@ -7310,6 +7340,42 @@ void Assembler::etzcntq(Register dst, Address src, bool no_flags) {
emit_operand(dst, src, 0);
}
void Assembler::evucomish(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x2E);
emit_operand(dst, src, 0);
}
void Assembler::evucomish(XMMRegister dst, XMMRegister src) {
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x2E, (0xC0 | encode));
}
void Assembler::evucomxsh(XMMRegister dst, Address src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
attributes.set_is_evex_instruction();
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int8(0x2E);
emit_operand(dst, src, 0);
}
void Assembler::evucomxsh(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x2E, (0xC0 | encode));
}
void Assembler::ucomisd(XMMRegister dst, Address src) {
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@ -7327,7 +7393,7 @@ void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
emit_int16(0x2E, (0xC0 | encode));
}
void Assembler::vucomxsd(XMMRegister dst, Address src) {
void Assembler::evucomxsd(XMMRegister dst, Address src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@ -7338,7 +7404,7 @@ void Assembler::vucomxsd(XMMRegister dst, Address src) {
emit_operand(dst, src, 0);
}
void Assembler::vucomxsd(XMMRegister dst, XMMRegister src) {
void Assembler::evucomxsd(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
@ -7361,7 +7427,7 @@ void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
emit_int16(0x2E, (0xC0 | encode));
}
void Assembler::vucomxss(XMMRegister dst, Address src) {
void Assembler::evucomxss(XMMRegister dst, Address src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@ -7372,7 +7438,7 @@ void Assembler::vucomxss(XMMRegister dst, Address src) {
emit_operand(dst, src, 0);
}
void Assembler::vucomxss(XMMRegister dst, XMMRegister src) {
void Assembler::evucomxss(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
@ -8411,30 +8477,6 @@ void Assembler::vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
emit_int16(0x59, (0xC0 | encode));
}
void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5F, (0xC0 | encode));
}
void Assembler::eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x53, (0xC0 | encode), imm8);
}
void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x5D, (0xC0 | encode));
}
void Assembler::vsqrtsh(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@ -13369,48 +13411,38 @@ bool Assembler::is_demotable(bool no_flags, int dst_enc, int nds_enc) {
return (!no_flags && dst_enc == nds_enc);
}
void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x5F, (0xC0 | encode));
}
void Assembler::vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int16(0x5F, (0xC0 | encode));
}
void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x5D, (0xC0 | encode));
}
void Assembler::eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
void Assembler::evminmaxsh(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x53, (0xC0 | encode), imm8);
}
void Assembler::evminmaxss(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x53, (0xC0 | encode), imm8);
}
void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int16(0x5D, (0xC0 | encode));
}
void Assembler::eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
void Assembler::evminmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8) {
assert(VM_Version::supports_avx10_2(), "");
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
attributes.set_is_evex_instruction();
attributes.set_embedded_opmask_register_specifier(mask);
if (merge) {
attributes.reset_is_clear_context();
}
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x53, (0xC0 | encode), imm8);
}

View File

@ -1694,8 +1694,11 @@ private:
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);
void vmovw(XMMRegister dst, Register src);
void vmovw(Register dst, XMMRegister src);
void evmovw(XMMRegister dst, Register src);
void evmovw(Register dst, XMMRegister src);
void evmovw(XMMRegister dst, Address src);
void evmovw(Address dst, XMMRegister src);
void evmovw(XMMRegister dst, XMMRegister src);
void movsbq(Register dst, Address src);
void movsbq(Register dst, Register src);
@ -2329,17 +2332,23 @@ private:
void tzcntq(Register dst, Address src);
void etzcntq(Register dst, Address src, bool no_flags);
// Unordered Compare Scalar Half-Precision Floating-Point Values and set EFLAGS
void evucomish(XMMRegister dst, Address src);
void evucomish(XMMRegister dst, XMMRegister src);
void evucomxsh(XMMRegister dst, Address src);
void evucomxsh(XMMRegister dst, XMMRegister src);
// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
void ucomisd(XMMRegister dst, Address src);
void ucomisd(XMMRegister dst, XMMRegister src);
void vucomxsd(XMMRegister dst, Address src);
void vucomxsd(XMMRegister dst, XMMRegister src);
void evucomxsd(XMMRegister dst, Address src);
void evucomxsd(XMMRegister dst, XMMRegister src);
// Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
void ucomiss(XMMRegister dst, Address src);
void ucomiss(XMMRegister dst, XMMRegister src);
void vucomxss(XMMRegister dst, Address src);
void vucomxss(XMMRegister dst, XMMRegister src);
void evucomxss(XMMRegister dst, Address src);
void evucomxss(XMMRegister dst, XMMRegister src);
void xabort(int8_t imm8);
@ -2417,11 +2426,6 @@ private:
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
void sarxl(Register dst, Register src1, Register src2);
void sarxl(Register dst, Address src1, Register src2);
void sarxq(Register dst, Register src1, Register src2);
@ -2552,8 +2556,6 @@ private:
void vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void vsqrtsh(XMMRegister dst, XMMRegister src);
void vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2);
@ -2790,9 +2792,9 @@ private:
void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
// AVX10.2 floating point minmax instructions
void eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void evminmaxsh(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8);
void evminmaxss(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8);
void evminmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8);
void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);

View File

@ -1037,8 +1037,8 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
}
}
void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
XMMRegister src1, XMMRegister src2, int vlen_enc) {
void C2_MacroAssembler::vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
XMMRegister src1, XMMRegister src2, int vlen_enc) {
assert(opc == Op_MinV || opc == Op_MinReductionV ||
opc == Op_MaxV || opc == Op_MaxReductionV, "sanity");
@ -1052,6 +1052,21 @@ void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst,
}
}
void C2_MacroAssembler::sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
XMMRegister src1, XMMRegister src2) {
assert(opc == Op_MinF || opc == Op_MaxF ||
opc == Op_MinD || opc == Op_MaxD, "sanity");
int imm8 = (opc == Op_MinF || opc == Op_MinD) ? AVX10_2_MINMAX_MIN_COMPARE_SIGN
: AVX10_2_MINMAX_MAX_COMPARE_SIGN;
if (elem_bt == T_FLOAT) {
evminmaxss(dst, mask, src1, src2, true, imm8);
} else {
assert(elem_bt == T_DOUBLE, "");
evminmaxsd(dst, mask, src1, src2, true, imm8);
}
}
// Float/Double signum
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) {
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");
@ -1063,7 +1078,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero,
// If other floating point comparison instructions used, ZF=1 for equal and unordered cases
if (opcode == Op_SignumF) {
if (VM_Version::supports_avx10_2()) {
vucomxss(dst, zero);
evucomxss(dst, zero);
jcc(Assembler::negative, DONE_LABEL);
} else {
ucomiss(dst, zero);
@ -1074,7 +1089,7 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero,
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg);
} else if (opcode == Op_SignumD) {
if (VM_Version::supports_avx10_2()) {
vucomxsd(dst, zero);
evucomxsd(dst, zero);
jcc(Assembler::negative, DONE_LABEL);
} else {
ucomisd(dst, zero);
@ -2400,7 +2415,7 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali
}
if (VM_Version::supports_avx10_2()) {
vminmax_fp(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc);
vminmax_fp_avx10_2(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc);
} else {
vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
}
@ -2409,7 +2424,7 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali
}
if (is_dst_valid) {
if (VM_Version::supports_avx10_2()) {
vminmax_fp(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit);
vminmax_fp_avx10_2(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit);
} else {
vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
}
@ -2440,7 +2455,7 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val
}
if (VM_Version::supports_avx10_2()) {
vminmax_fp(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc);
vminmax_fp_avx10_2(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc);
} else {
vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
}
@ -2451,7 +2466,7 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val
if (is_dst_valid) {
if (VM_Version::supports_avx10_2()) {
vminmax_fp(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit);
vminmax_fp_avx10_2(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit);
} else {
vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
}
@ -7061,13 +7076,25 @@ void C2_MacroAssembler::evfp16ph(int opcode, XMMRegister dst, XMMRegister src1,
}
}
void C2_MacroAssembler::scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2) {
vector_max_min_fp16(opcode, dst, src1, src2, ktmp, xtmp1, xtmp2, Assembler::AVX_128bit);
void C2_MacroAssembler::sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2) {
vminmax_fp16(opcode, dst, src1, src2, ktmp, xtmp1, xtmp2, Assembler::AVX_128bit);
}
void C2_MacroAssembler::vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc) {
void C2_MacroAssembler::sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp) {
if (opcode == Op_MaxHF) {
// dst = max(src1, src2)
evminmaxsh(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
} else {
assert(opcode == Op_MinHF, "");
// dst = min(src1, src2)
evminmaxsh(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
}
}
void C2_MacroAssembler::vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc) {
if (opcode == Op_MaxVHF || opcode == Op_MaxHF) {
// Move sign bits of src2 to mask register.
evpmovw2m(ktmp, src2, vlen_enc);
@ -7110,3 +7137,27 @@ void C2_MacroAssembler::vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegi
Assembler::evmovdquw(dst, ktmp, xtmp1, true, vlen_enc);
}
}
void C2_MacroAssembler::vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, int vlen_enc) {
if (opcode == Op_MaxVHF) {
// dst = max(src1, src2)
evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vlen_enc);
} else {
assert(opcode == Op_MinVHF, "");
// dst = min(src1, src2)
evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN, vlen_enc);
}
}
void C2_MacroAssembler::vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2,
KRegister ktmp, int vlen_enc) {
if (opcode == Op_MaxVHF) {
// dst = max(src1, src2)
evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vlen_enc);
} else {
assert(opcode == Op_MinVHF, "");
// dst = min(src1, src2)
evminmaxph(dst, ktmp, src1, src2, true, AVX10_2_MINMAX_MIN_COMPARE_SIGN, vlen_enc);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -67,8 +67,11 @@ public:
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
int vlen_enc);
void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
XMMRegister src1, XMMRegister src2, int vlen_enc);
void vminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
XMMRegister src1, XMMRegister src2, int vlen_enc);
void sminmax_fp_avx10_2(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
XMMRegister src1, XMMRegister src2);
void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
@ -576,11 +579,20 @@ public:
void evfp16ph(int opcode, XMMRegister dst, XMMRegister src1, Address src2, int vlen_enc);
void vector_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
void vminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
void scalar_max_min_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, int vlen_enc);
void vminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, Address src2,
KRegister ktmp, int vlen_enc);
void sminmax_fp16(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp, XMMRegister xtmp1, XMMRegister xtmp2);
void sminmax_fp16_avx10_2(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2,
KRegister ktmp);
void reconstruct_frame_pointer(Register rtmp);

View File

@ -1958,6 +1958,16 @@ void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src, Register rscrat
}
}
void MacroAssembler::movhlf(XMMRegister dst, XMMRegister src, Register rscratch) {
if (VM_Version::supports_avx10_2()) {
evmovw(dst, src);
} else {
assert(rscratch != noreg, "missing");
evmovw(rscratch, src);
evmovw(dst, rscratch);
}
}
void MacroAssembler::mov64(Register dst, int64_t imm64) {
if (is_uimm32(imm64)) {
movl(dst, checked_cast<uint32_t>(imm64));
@ -2661,14 +2671,14 @@ void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src, Register rscra
}
}
void MacroAssembler::vucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
void MacroAssembler::evucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::vucomxsd(dst, as_Address(src));
Assembler::evucomxsd(dst, as_Address(src));
} else {
lea(rscratch, src);
Assembler::vucomxsd(dst, Address(rscratch, 0));
Assembler::evucomxsd(dst, Address(rscratch, 0));
}
}
@ -2683,14 +2693,36 @@ void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src, Register rscra
}
}
void MacroAssembler::vucomxss(XMMRegister dst, AddressLiteral src, Register rscratch) {
void MacroAssembler::evucomxss(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::vucomxss(dst, as_Address(src));
Assembler::evucomxss(dst, as_Address(src));
} else {
lea(rscratch, src);
Assembler::vucomxss(dst, Address(rscratch, 0));
Assembler::evucomxss(dst, Address(rscratch, 0));
}
}
void MacroAssembler::evucomish(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::evucomish(dst, as_Address(src));
} else {
lea(rscratch, src);
Assembler::evucomish(dst, Address(rscratch, 0));
}
}
void MacroAssembler::evucomxsh(XMMRegister dst, AddressLiteral src, Register rscratch) {
assert(rscratch != noreg || always_reachable(src), "missing");
if (reachable(src)) {
Assembler::evucomxsh(dst, as_Address(src));
} else {
lea(rscratch, src);
Assembler::evucomxsh(dst, Address(rscratch, 0));
}
}
@ -9163,7 +9195,7 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM
case T_FLOAT:
evminmaxps(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
case T_DOUBLE:
evminmaxps(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
evminmaxpd(dst, mask, nds, src, merge, AVX10_2_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
default:
fatal("Unexpected type argument %s", type2name(type)); break;
}

View File

@ -162,6 +162,8 @@ class MacroAssembler: public Assembler {
void incrementq(AddressLiteral dst, Register rscratch = noreg);
void movhlf(XMMRegister dst, XMMRegister src, Register rscratch = noreg);
// Support optimal SSE move instructions.
void movflt(XMMRegister dst, XMMRegister src) {
if (dst-> encoding() == src->encoding()) return;
@ -1308,21 +1310,29 @@ public:
void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
void subss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void evucomish(XMMRegister dst, XMMRegister src) { Assembler::evucomish(dst, src); }
void evucomish(XMMRegister dst, Address src) { Assembler::evucomish(dst, src); }
void evucomish(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void evucomxsh(XMMRegister dst, XMMRegister src) { Assembler::evucomxsh(dst, src); }
void evucomxsh(XMMRegister dst, Address src) { Assembler::evucomxsh(dst, src); }
void evucomxsh(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
void ucomiss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void vucomxss(XMMRegister dst, XMMRegister src) { Assembler::vucomxss(dst, src); }
void vucomxss(XMMRegister dst, Address src) { Assembler::vucomxss(dst, src); }
void vucomxss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void evucomxss(XMMRegister dst, XMMRegister src) { Assembler::evucomxss(dst, src); }
void evucomxss(XMMRegister dst, Address src) { Assembler::evucomxss(dst, src); }
void evucomxss(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
void ucomisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void vucomxsd(XMMRegister dst, XMMRegister src) { Assembler::vucomxsd(dst, src); }
void vucomxsd(XMMRegister dst, Address src) { Assembler::vucomxsd(dst, src); }
void vucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
void evucomxsd(XMMRegister dst, XMMRegister src) { Assembler::evucomxsd(dst, src); }
void evucomxsd(XMMRegister dst, Address src) { Assembler::evucomxsd(dst, src); }
void evucomxsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src);

View File

@ -1708,84 +1708,99 @@ static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
__ bind(done);
}
// Math.min() # Math.max()
// --------------------------
// ucomis[s/d] #
// ja -> b # a
// jp -> NaN # NaN
// jb -> a # b
// je #
// |-jz -> a | b # a & b
// | -> a #
enum FP_PREC {
fp_prec_hlf,
fp_prec_flt,
fp_prec_dbl
};
static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
XMMRegister p, XMMRegister q) {
if (pt == fp_prec_hlf) {
__ evucomish(p, q);
} else if (pt == fp_prec_flt) {
__ ucomiss(p, q);
} else {
__ ucomisd(p, q);
}
}
static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
XMMRegister dst, XMMRegister src, Register scratch) {
if (pt == fp_prec_hlf) {
__ movhlf(dst, src, scratch);
} else if (pt == fp_prec_flt) {
__ movflt(dst, src);
} else {
__ movdbl(dst, src);
}
}
// Math.min() # Math.max()
// -----------------------------
// (v)ucomis[h/s/d] #
// ja -> b # a
// jp -> NaN # NaN
// jb -> a # b
// je #
// |-jz -> a | b # a & b
// | -> a #
static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
XMMRegister a, XMMRegister b,
XMMRegister xmmt, Register rt,
bool min, bool single) {
bool min, enum FP_PREC pt) {
Label nan, zero, below, above, done;
if (single)
__ ucomiss(a, b);
else
__ ucomisd(a, b);
emit_fp_ucom(masm, pt, a, b);
if (dst->encoding() != (min ? b : a)->encoding())
if (dst->encoding() != (min ? b : a)->encoding()) {
__ jccb(Assembler::above, above); // CF=0 & ZF=0
else
} else {
__ jccb(Assembler::above, done);
}
__ jccb(Assembler::parity, nan); // PF=1
__ jccb(Assembler::below, below); // CF=1
// equal
__ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
if (single) {
__ ucomiss(a, xmmt);
__ jccb(Assembler::equal, zero);
emit_fp_ucom(masm, pt, a, xmmt);
__ movflt(dst, a);
__ jmp(done);
}
else {
__ ucomisd(a, xmmt);
__ jccb(Assembler::equal, zero);
__ jccb(Assembler::equal, zero);
movfp(masm, pt, dst, a, rt);
__ movdbl(dst, a);
__ jmp(done);
}
__ jmp(done);
__ bind(zero);
if (min)
if (min) {
__ vpor(dst, a, b, Assembler::AVX_128bit);
else
} else {
__ vpand(dst, a, b, Assembler::AVX_128bit);
}
__ jmp(done);
__ bind(above);
if (single)
__ movflt(dst, min ? b : a);
else
__ movdbl(dst, min ? b : a);
movfp(masm, pt, dst, min ? b : a, rt);
__ jmp(done);
__ bind(nan);
if (single) {
if (pt == fp_prec_hlf) {
__ movl(rt, 0x00007e00); // Float16.NaN
__ evmovw(dst, rt);
} else if (pt == fp_prec_flt) {
__ movl(rt, 0x7fc00000); // Float.NaN
__ movdl(dst, rt);
}
else {
} else {
__ mov64(rt, 0x7ff8000000000000L); // Double.NaN
__ movdq(dst, rt);
}
__ jmp(done);
__ bind(below);
if (single)
__ movflt(dst, min ? a : b);
else
__ movdbl(dst, min ? a : b);
movfp(masm, pt, dst, min ? a : b, rt);
__ bind(done);
}
@ -7345,146 +7360,140 @@ instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
ins_pipe(ialu_reg_fat);
%}
// min = java.lang.Math.min(float a, float b)
// max = java.lang.Math.max(float a, float b)
instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
predicate(VM_Version::supports_avx10_2());
instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
%{
predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
format %{ "maxF $dst, $a, $b" %}
ins_encode %{
__ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
%}
ins_pipe( pipe_slow );
%}
// max = java.lang.Math.max(float a, float b)
instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
ins_encode %{
__ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}
instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
ins_encode %{
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
false /*min*/, true /*single*/);
%}
ins_pipe( pipe_slow );
%}
// max = java.lang.Math.max(double a, double b)
instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
predicate(VM_Version::supports_avx10_2());
match(Set dst (MaxD a b));
format %{ "maxD $dst, $a, $b" %}
ins_encode %{
__ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
%}
ins_pipe( pipe_slow );
%}
// max = java.lang.Math.max(double a, double b)
instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
ins_encode %{
__ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}
instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
ins_encode %{
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
false /*min*/, false /*single*/);
%}
ins_pipe( pipe_slow );
%}
// max = java.lang.Math.min(float a, float b)
instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
predicate(VM_Version::supports_avx10_2());
match(Set dst (MinF a b));
format %{ "minF $dst, $a, $b" %}
format %{ "minmaxF $dst, $a, $b" %}
ins_encode %{
__ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
int opcode = this->ideal_Opcode();
__ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
%{
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinF) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
%}
// min = java.lang.Math.min(float a, float b)
instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
// max = java.lang.Math.max(float a, float b)
instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
ins_encode %{
__ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
int opcode = this->ideal_Opcode();
int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
__ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
$atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}
instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinF) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
true /*min*/, true /*single*/);
%}
ins_pipe( pipe_slow );
%}
// max = java.lang.Math.min(double a, double b)
instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
predicate(VM_Version::supports_avx10_2());
match(Set dst (MinD a b));
format %{ "minD $dst, $a, $b" %}
ins_encode %{
__ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
%}
// min = java.lang.Math.min(double a, double b)
instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
// max = java.lang.Math.max(double a, double b)
instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
%{
predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
format %{ "minmaxD $dst, $a, $b" %}
ins_encode %{
__ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
int opcode = this->ideal_Opcode();
__ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
%{
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinD) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
true /*min*/, false /*single*/);
min, fp_prec_dbl /*pt*/);
%}
ins_pipe( pipe_slow );
%}
// min = java.lang.Math.min(double a, double b)
// max = java.lang.Math.max(double a, double b)
instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
__ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
$atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
%}
ins_pipe( pipe_slow );
%}
instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinD) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
min, fp_prec_dbl /*pt*/);
%}
ins_pipe( pipe_slow );
%}
@ -14394,9 +14403,9 @@ instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
match(Set cr (CmpF src1 src2));
ins_cost(100);
format %{ "vucomxss $src1, $src2" %}
format %{ "evucomxss $src1, $src2" %}
ins_encode %{
__ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
__ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
@ -14416,9 +14425,9 @@ instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
match(Set cr (CmpF src1 (LoadF src2)));
ins_cost(100);
format %{ "vucomxss $src1, $src2" %}
format %{ "evucomxss $src1, $src2" %}
ins_encode %{
__ vucomxss($src1$$XMMRegister, $src2$$Address);
__ evucomxss($src1$$XMMRegister, $src2$$Address);
%}
ins_pipe(pipe_slow);
%}
@ -14438,9 +14447,9 @@ instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
match(Set cr (CmpF src con));
ins_cost(100);
format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
ins_encode %{
__ vucomxss($src$$XMMRegister, $constantaddress($con));
__ evucomxss($src$$XMMRegister, $constantaddress($con));
%}
ins_pipe(pipe_slow);
%}
@ -14479,9 +14488,9 @@ instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
match(Set cr (CmpD src1 src2));
ins_cost(100);
format %{ "vucomxsd $src1, $src2 test" %}
format %{ "evucomxsd $src1, $src2 test" %}
ins_encode %{
__ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
__ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
@ -14501,9 +14510,9 @@ instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
match(Set cr (CmpD src1 (LoadD src2)));
ins_cost(100);
format %{ "vucomxsd $src1, $src2" %}
format %{ "evucomxsd $src1, $src2" %}
ins_encode %{
__ vucomxsd($src1$$XMMRegister, $src2$$Address);
__ evucomxsd($src1$$XMMRegister, $src2$$Address);
%}
ins_pipe(pipe_slow);
%}
@ -14522,9 +14531,9 @@ instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
match(Set cr (CmpD src con));
ins_cost(100);
format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
ins_encode %{
__ vucomxsd($src$$XMMRegister, $constantaddress($con));
__ evucomxsd($src$$XMMRegister, $constantaddress($con));
%}
ins_pipe(pipe_slow);
%}
@ -18832,7 +18841,7 @@ instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ vmovw($rtmp$$Register, $src$$XMMRegister);
__ evmovw($rtmp$$Register, $src$$XMMRegister);
__ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
%}
ins_pipe( pipe_slow );
@ -20947,7 +20956,7 @@ instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
int vlen_enc = vector_length_encoding(this);
int opcode = this->ideal_Opcode();
BasicType elem_bt = Matcher::vector_element_basic_type(this);
__ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
__ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -25291,9 +25300,9 @@ instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
instruct reinterpretS2HF(regF dst, rRegI src)
%{
match(Set dst (ReinterpretS2HF src));
format %{ "vmovw $dst, $src" %}
format %{ "evmovw $dst, $src" %}
ins_encode %{
__ vmovw($dst$$XMMRegister, $src$$Register);
__ evmovw($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow);
%}
@ -25301,9 +25310,9 @@ instruct reinterpretS2HF(regF dst, rRegI src)
instruct reinterpretHF2S(rRegI dst, regF src)
%{
match(Set dst (ReinterpretHF2S src));
format %{ "vmovw $dst, $src" %}
format %{ "evmovw $dst, $src" %}
ins_encode %{
__ vmovw($dst$$Register, $src$$XMMRegister);
__ evmovw($dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
@ -25357,10 +25366,11 @@ instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
predicate(VM_Version::supports_avx10_2());
match(Set dst (MaxHF src1 src2));
match(Set dst (MinHF src1 src2));
format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
ins_encode %{
int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
__ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
int opcode = this->ideal_Opcode();
__ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
%}
ins_pipe( pipe_slow );
%}
@ -25371,11 +25381,12 @@ instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xt
match(Set dst (MaxHF src1 src2));
match(Set dst (MinHF src1 src2));
effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
__ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
__ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
%}
ins_pipe( pipe_slow );
%}
@ -25475,8 +25486,9 @@ instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
__ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
int opcode = this->ideal_Opcode();
__ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
k0, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -25489,8 +25501,9 @@ instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
__ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
int opcode = this->ideal_Opcode();
__ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
k0, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -25505,8 +25518,8 @@ instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1,
ins_encode %{
int vlen_enc = vector_length_encoding(this);
int opcode = this->ideal_Opcode();
__ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
__ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
$xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}

View File

@ -55,21 +55,21 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MIN_F_REG, "1"},
failOn = {IRNode.MIN_F_REDUCTION_REG})
@IR(counts = {IRNode.MINMAX_F_REG, "1"},
failOn = {IRNode.MINMAX_F_REDUCTION_REG})
private static float testFloatMin() {
return Math.min(floatInput1, floatInput2);
}
@Test
@IR(counts = {IRNode.MAX_F_REG, "1"},
failOn = {IRNode.MAX_F_REDUCTION_REG})
@IR(counts = {IRNode.MINMAX_F_REG, "1"},
failOn = {IRNode.MINMAX_F_REDUCTION_REG})
private static float testFloatMax() {
return Math.max(floatInput1, floatInput2);
}
@Test
@IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"})
private static float testFloatMinReduction() {
float fmin = Float.POSITIVE_INFINITY;
for (int i = 0; i < floatArray.length; i++) {
@ -79,7 +79,7 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"})
private static float testFloatMinReductionPartiallyUnrolled() {
float fmin = Float.POSITIVE_INFINITY;
for (int i = 0; i < floatArray.length / 2; i++) {
@ -90,7 +90,7 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"})
private static float testFloatMinReductionNonCounted() {
float fmin = Float.POSITIVE_INFINITY;
for (int i = 0; i < floatArray.length; i += stride) {
@ -100,7 +100,7 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"})
private static float testFloatMinReductionGlobalAccumulator() {
acc = Float.POSITIVE_INFINITY;
for (int i = 0; i < floatArray.length; i++) {
@ -110,7 +110,7 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MIN_F_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"})
private static float testFloatMinReductionInOuterLoop() {
float fmin = Float.POSITIVE_INFINITY;
int count = 0;
@ -124,7 +124,7 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MAX_F_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_F_REDUCTION_REG, ">= 1"})
private static float testFloatMaxReduction() {
float fmax = Float.NEGATIVE_INFINITY;
for (int i = 0; i < floatArray.length; i++) {
@ -134,21 +134,21 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MIN_D_REG, "1"},
failOn = {IRNode.MIN_D_REDUCTION_REG})
@IR(counts = {IRNode.MINMAX_D_REG, "1"},
failOn = {IRNode.MINMAX_D_REDUCTION_REG})
private static double testDoubleMin() {
return Math.min(doubleInput1, doubleInput2);
}
@Test
@IR(counts = {IRNode.MAX_D_REG, "1"},
failOn = {IRNode.MAX_D_REDUCTION_REG})
@IR(counts = {IRNode.MINMAX_D_REG, "1"},
failOn = {IRNode.MINMAX_D_REDUCTION_REG})
private static double testDoubleMax() {
return Math.max(doubleInput1, doubleInput2);
}
@Test
@IR(counts = {IRNode.MIN_D_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_D_REDUCTION_REG, ">= 1"})
private static double testDoubleMinReduction() {
double fmin = Double.POSITIVE_INFINITY;
for (int i = 0; i < doubleArray.length; i++) {
@ -158,7 +158,7 @@ public class TestFpMinMaxReductions {
}
@Test
@IR(counts = {IRNode.MAX_D_REDUCTION_REG, ">= 1"})
@IR(counts = {IRNode.MINMAX_D_REDUCTION_REG, ">= 1"})
private static double testDoubleMaxReduction() {
double fmax = Double.NEGATIVE_INFINITY;
for (int i = 0; i < doubleArray.length; i++) {

View File

@ -1203,31 +1203,11 @@ public class IRNode {
beforeMatchingNameRegex(MAX_D, "MaxD");
}
public static final String MAX_D_REDUCTION_REG = PREFIX + "MAX_D_REDUCTION_REG" + POSTFIX;
static {
machOnlyNameRegex(MAX_D_REDUCTION_REG, "maxD_reduction_reg");
}
public static final String MAX_D_REG = PREFIX + "MAX_D_REG" + POSTFIX;
static {
machOnlyNameRegex(MAX_D_REG, "maxD_reg");
}
public static final String MAX_F = PREFIX + "MAX_F" + POSTFIX;
static {
beforeMatchingNameRegex(MAX_F, "MaxF");
}
public static final String MAX_F_REDUCTION_REG = PREFIX + "MAX_F_REDUCTION_REG" + POSTFIX;
static {
machOnlyNameRegex(MAX_F_REDUCTION_REG, "maxF_reduction_reg");
}
public static final String MAX_F_REG = PREFIX + "MAX_F_REG" + POSTFIX;
static {
machOnlyNameRegex(MAX_F_REG, "maxF_reg");
}
public static final String MAX_I = PREFIX + "MAX_I" + POSTFIX;
static {
beforeMatchingNameRegex(MAX_I, "MaxI");
@ -1309,14 +1289,14 @@ public class IRNode {
beforeMatchingNameRegex(MIN_D, "MinD");
}
public static final String MIN_D_REDUCTION_REG = PREFIX + "MIN_D_REDUCTION_REG" + POSTFIX;
public static final String MINMAX_D_REDUCTION_REG = PREFIX + "MINMAX_D_REDUCTION_REG" + POSTFIX;
static {
machOnlyNameRegex(MIN_D_REDUCTION_REG, "minD_reduction_reg");
machOnlyNameRegex(MINMAX_D_REDUCTION_REG, "minmaxD_reduction_reg");
}
public static final String MIN_D_REG = PREFIX + "MIN_D_REG" + POSTFIX;
public static final String MINMAX_D_REG = PREFIX + "MINMAX_D_REG" + POSTFIX;
static {
machOnlyNameRegex(MIN_D_REG, "minD_reg");
machOnlyNameRegex(MINMAX_D_REG, "minmaxD_reg");
}
public static final String MIN_F = PREFIX + "MIN_F" + POSTFIX;
@ -1324,14 +1304,14 @@ public class IRNode {
beforeMatchingNameRegex(MIN_F, "MinF");
}
public static final String MIN_F_REDUCTION_REG = PREFIX + "MIN_F_REDUCTION_REG" + POSTFIX;
public static final String MINMAX_F_REDUCTION_REG = PREFIX + "MINMAX_F_REDUCTION_REG" + POSTFIX;
static {
machOnlyNameRegex(MIN_F_REDUCTION_REG, "minF_reduction_reg");
machOnlyNameRegex(MINMAX_F_REDUCTION_REG, "minmaxF_reduction_reg");
}
public static final String MIN_F_REG = PREFIX + "MIN_F_REG" + POSTFIX;
public static final String MINMAX_F_REG = PREFIX + "MINMAX_F_REG" + POSTFIX;
static {
machOnlyNameRegex(MIN_F_REG, "minF_reg");
machOnlyNameRegex(MINMAX_F_REG, "minmaxF_reg");
}
public static final String MIN_I = PREFIX + "MIN_I" + POSTFIX;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -28,6 +28,7 @@ import jdk.incubator.vector.*;
import org.openjdk.jmh.annotations.*;
import static jdk.incubator.vector.Float16.*;
import static java.lang.Float.*;
import java.util.Random;
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@ -45,11 +46,20 @@ public class Float16OperationsBenchmark {
short [] vector5;
boolean [] vectorPredicate;
private int c0, c1, c2, s1, s2;
Random r;
static final short f16_one = Float.floatToFloat16(1.0f);
static final short f16_two = Float.floatToFloat16(2.0f);
@Setup(Level.Trial)
public void BmSetup() {
r = new Random();
c1 = s1 = step();
c2 = vectorDim - (s2 = step());
rexp = new int[vectorDim];
vectorRes = new short[vectorDim];
vector1 = new short[vectorDim];
@ -84,6 +94,16 @@ public class Float16OperationsBenchmark {
);
}
private int step() {
return (r.nextInt() & 0xf) + 1;
}
private void inc() {
c1 = c1 + s1 < vectorDim ? c1 + s1 : (s1 = step());
c2 = c2 - s2 > 0 ? c2 - s2 : vectorDim - (s2 = step());
c0 = Math.abs(c2 - c1);
}
@Benchmark
public void addBenchmark() {
for (int i = 0; i < vectorDim; i++) {
@ -200,6 +220,14 @@ public class Float16OperationsBenchmark {
}
}
@Benchmark
public void maxScalarBenchmark() {
for (int i = 0; i < vectorDim; i++) {
inc(); // Ensures no auto-vectorization
vectorRes[c0] = float16ToRawShortBits(max(shortBitsToFloat16(vector1[c1]), shortBitsToFloat16(vector2[c2])));
}
}
@Benchmark
public void minBenchmark() {
for (int i = 0; i < vectorDim; i++) {
@ -207,6 +235,14 @@ public class Float16OperationsBenchmark {
}
}
@Benchmark
public void minScalarBenchmark() {
for (int i = 0; i < vectorDim; i++) {
inc(); // Ensures no auto-vectorization
vectorRes[c0] = float16ToRawShortBits(min(shortBitsToFloat16(vector1[c1]), shortBitsToFloat16(vector2[c2])));
}
}
@Benchmark
public void sqrtBenchmark() {
for (int i = 0; i < vectorDim; i++) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -45,14 +45,15 @@ public class FpMinMaxIntrinsics {
private Random r = new Random();
private static int stride = 1;
private static float acc;
private static float f_acc;
private static double d_acc;
@Setup
public void init() {
c1 = s1 = step();
c2 = COUNT - (s2 = step());
for (int i=0; i<COUNT; i++) {
for (int i = 0; i < COUNT; i++) {
floats[i] = r.nextFloat();
doubles[i] = r.nextDouble();
}
@ -64,25 +65,25 @@ public class FpMinMaxIntrinsics {
@Benchmark
public void dMax(Blackhole bh) {
for (int i=0; i<COUNT; i++)
for (int i = 0; i < COUNT; i++)
bh.consume(dMaxBench());
}
@Benchmark
public void dMin(Blackhole bh) {
for (int i=0; i<COUNT; i++)
for (int i = 0; i < COUNT; i++)
bh.consume(dMinBench());
}
@Benchmark
public void fMax(Blackhole bh) {
for (int i=0; i<COUNT; i++)
for (int i = 0; i < COUNT; i++)
bh.consume(fMaxBench());
}
@Benchmark
public void fMin(Blackhole bh) {
for (int i=0; i<COUNT; i++)
for (int i = 0; i < COUNT; i++)
bh.consume(fMinBench());
}
@ -112,11 +113,11 @@ public class FpMinMaxIntrinsics {
}
@Benchmark
public float fMinReduce() {
float result = Float.MAX_VALUE;
public double dMaxReduce() {
double result = Double.MIN_VALUE;
for (int i=0; i<COUNT; i++)
result = Math.min(result, floats[i]);
for (int i = 0; i < COUNT; i++)
result = Math.max(result, doubles[i]);
return result;
}
@ -125,12 +126,62 @@ public class FpMinMaxIntrinsics {
public double dMinReduce() {
double result = Double.MAX_VALUE;
for (int i=0; i<COUNT; i++)
for (int i = 0; i < COUNT; i++)
result = Math.min(result, doubles[i]);
return result;
}
@Benchmark
public float fMaxReduce() {
float result = Float.MIN_VALUE;
for (int i = 0; i < COUNT; i++)
result = Math.max(result, floats[i]);
return result;
}
@Benchmark
public float fMinReduce() {
float result = Float.MAX_VALUE;
for (int i = 0; i < COUNT; i++)
result = Math.min(result, floats[i]);
return result;
}
@Benchmark
public double dMaxReducePartiallyUnrolled() {
double result = Double.MIN_VALUE;
for (int i = 0; i < COUNT / 2; i++) {
result = Math.max(result, doubles[2*i]);
result = Math.max(result, doubles[2*i + 1]);
}
return result;
}
@Benchmark
public double dMinReducePartiallyUnrolled() {
double result = Double.MAX_VALUE;
for (int i = 0; i < COUNT / 2; i++) {
result = Math.min(result, doubles[2*i]);
result = Math.min(result, doubles[2*i + 1]);
}
return result;
}
@Benchmark
public float fMaxReducePartiallyUnrolled() {
float result = Float.MIN_VALUE;
for (int i = 0; i < COUNT / 2; i++) {
result = Math.max(result, floats[2*i]);
result = Math.max(result, floats[2*i + 1]);
}
return result;
}
@Benchmark
public float fMinReducePartiallyUnrolled() {
float result = Float.MAX_VALUE;
@ -141,6 +192,30 @@ public class FpMinMaxIntrinsics {
return result;
}
@Benchmark
public double dMaxReduceNonCounted() {
double result = Double.MIN_VALUE;
for (int i = 0; i < COUNT; i += stride)
result = Math.max(result, doubles[i]);
return result;
}
@Benchmark
public double dMinReduceNonCounted() {
double result = Double.MAX_VALUE;
for (int i = 0; i < COUNT; i += stride)
result = Math.min(result, doubles[i]);
return result;
}
@Benchmark
public float fMaxReduceNonCounted() {
float result = Float.MIN_VALUE;
for (int i = 0; i < COUNT; i += stride)
result = Math.max(result, floats[i]);
return result;
}
@Benchmark
public float fMinReduceNonCounted() {
float result = Float.MAX_VALUE;
@ -150,11 +225,74 @@ public class FpMinMaxIntrinsics {
}
@Benchmark
public float fMinReduceGlobalAccumulator() {
acc = Float.MAX_VALUE;
public double dMaxReduceGlobalAccumulator() {
d_acc = Double.MIN_VALUE;
for (int i = 0; i < COUNT; i += stride)
acc = Math.min(acc, floats[i]);
return acc;
d_acc = Math.max(d_acc, doubles[i]);
return d_acc;
}
@Benchmark
public double dMinReduceGlobalAccumulator() {
d_acc = Double.MAX_VALUE;
for (int i = 0; i < COUNT; i += stride)
d_acc = Math.min(d_acc, doubles[i]);
return d_acc;
}
@Benchmark
public float fMaxReduceGlobalAccumulator() {
f_acc = Float.MIN_VALUE;
for (int i = 0; i < COUNT; i += stride)
f_acc = Math.max(f_acc, floats[i]);
return f_acc;
}
@Benchmark
public float fMinReduceGlobalAccumulator() {
f_acc = Float.MAX_VALUE;
for (int i = 0; i < COUNT; i += stride)
f_acc = Math.min(f_acc, floats[i]);
return f_acc;
}
@Benchmark
public double dMaxReduceInOuterLoop() {
double result = Double.MIN_VALUE;
int count = 0;
for (int i = 0; i < COUNT; i++) {
result = Math.max(result, doubles[i]);
for (int j = 0; j < 10; j += stride) {
count++;
}
}
return result + count;
}
@Benchmark
public double dMinReduceInOuterLoop() {
double result = Double.MAX_VALUE;
int count = 0;
for (int i = 0; i < COUNT; i++) {
result = Math.min(result, doubles[i]);
for (int j = 0; j < 10; j += stride) {
count++;
}
}
return result + count;
}
@Benchmark
public float fMaxReduceInOuterLoop() {
float result = Float.MIN_VALUE;
int count = 0;
for (int i = 0; i < COUNT; i++) {
result = Math.max(result, floats[i]);
for (int j = 0; j < 10; j += stride) {
count++;
}
}
return result + count;
}
@Benchmark
@ -169,5 +307,4 @@ public class FpMinMaxIntrinsics {
}
return result + count;
}
}