8382482: Optimize equals scenario in x86 scalar floating point min/max reduction loops

Reviewed-by: sviswanathan, epeter, sparasa
This commit is contained in:
Mohamed Issa 2026-05-28 20:16:12 +00:00 committed by Srinivas Vamsi Parasa
parent 4eb67734b7
commit bb4d2abb0f
2 changed files with 56 additions and 36 deletions

View File

@ -1742,14 +1742,10 @@ static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
// ja -> b # a
// jp -> NaN # NaN
// jb -> a # b
// je #
// |-jz -> a | b # a & b
// | -> a #
// je -> a | b # a & b
static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
XMMRegister a, XMMRegister b,
XMMRegister xmmt, Register rt,
XMMRegister a, XMMRegister b, Register rt,
bool min, enum FP_PREC pt) {
Label nan, zero, below, above, done;
emit_fp_ucom(masm, pt, a, b);
@ -1759,31 +1755,26 @@ static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
} else {
__ jccb(Assembler::above, done);
}
__ jccb(Assembler::parity, nan); // PF=1
__ jccb(Assembler::below, below); // CF=1
// equal
__ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
emit_fp_ucom(masm, pt, a, xmmt);
__ jccb(Assembler::equal, zero);
movfp(masm, pt, dst, a, rt);
__ jmp(done);
__ bind(zero);
// Using bitwise operations is a low cost way to compute the correct result
// for zero and non-zero inputs in this scenario except for NaN, which is
// handled separately. The mantissa and exponent are valid with either
// bitwise operation. For zero inputs, the sign bit is chosen according to
// whether a minimum or maximum value is required.
if (min) {
// Negative sign preserved when available (e.g., min(+0, -0) -> -0)
__ vpor(dst, a, b, Assembler::AVX_128bit);
} else {
// Positive sign preserved when available (e.g., max(+0, -0) -> +0)
__ vpand(dst, a, b, Assembler::AVX_128bit);
}
__ jmp(done);
__ bind(above);
movfp(masm, pt, dst, min ? b : a, rt);
__ jmp(done);
__ bind(nan);
@ -7376,18 +7367,18 @@ instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
ins_pipe( pipe_slow );
%}
instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
%{
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
effect(USE a, USE b, TEMP rtmp, KILL cr);
format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinF) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
@ -7412,18 +7403,18 @@ instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atm
ins_pipe( pipe_slow );
%}
instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxF a b));
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
effect(USE a, USE b, TEMP rtmp, KILL cr);
format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinF) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_flt /*pt*/);
%}
ins_pipe( pipe_slow );
@ -7445,18 +7436,18 @@ instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
ins_pipe( pipe_slow );
%}
instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
%{
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
effect(USE a, USE b, TEMP rtmp, KILL cr);
format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinD) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_dbl /*pt*/);
%}
ins_pipe( pipe_slow );
@ -7481,18 +7472,18 @@ instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atm
ins_pipe( pipe_slow );
%}
instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
%{
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
match(Set dst (MaxD a b));
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
effect(USE a, USE b, TEMP rtmp, KILL cr);
format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
bool min = (opcode == Op_MinD) ? true : false;
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
min, fp_prec_dbl /*pt*/);
%}
ins_pipe( pipe_slow );

View File

@ -54,8 +54,37 @@ public class FpMinMaxIntrinsics {
c2 = COUNT - (s2 = step());
for (int i = 0; i < COUNT; i++) {
floats[i] = r.nextFloat();
doubles[i] = r.nextDouble();
final int mappedIndex = i % 100;
if (mappedIndex >= 0 && mappedIndex < 10) {
// NaN
floats[i] = Float.NaN;
doubles[i] = Double.NaN;
} else if (mappedIndex >= 20 && mappedIndex < 30) {
// Equal (+0.0)
floats[i] = +0.0f;
doubles[i] = +0.0;
} else if (mappedIndex >= 40 && mappedIndex < 50) {
// Equal (-0.0)
floats[i] = -0.0f;
doubles[i] = -0.0;
} else if (mappedIndex >= 60 && mappedIndex < 70) {
// Descending
floats[i] = (float) (COUNT - i);
doubles[i] = (double) (COUNT - i);
} else if (mappedIndex >= 80 && mappedIndex < 90) {
// Ascending
floats[i] = (float) i;
doubles[i] = (double) i;
} else if (mappedIndex >= 90 && mappedIndex < 100) {
// Random (negative)
floats[i] = -r.nextFloat();
doubles[i] = -r.nextDouble();
} else {
// Random (positive)
floats[i] = r.nextFloat();
doubles[i] = r.nextDouble();
}
}
}