mirror of
https://github.com/openjdk/jdk.git
synced 2026-06-06 18:53:37 +00:00
8382482: Optimize equals scenario in x86 scalar floating point min/max reduction loops
Reviewed-by: sviswanathan, epeter, sparasa
This commit is contained in:
parent
4eb67734b7
commit
bb4d2abb0f
@ -1742,14 +1742,10 @@ static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
|
||||
// ja -> b # a
|
||||
// jp -> NaN # NaN
|
||||
// jb -> a # b
|
||||
// je #
|
||||
// |-jz -> a | b # a & b
|
||||
// | -> a #
|
||||
// je -> a | b # a & b
|
||||
static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
|
||||
XMMRegister a, XMMRegister b,
|
||||
XMMRegister xmmt, Register rt,
|
||||
XMMRegister a, XMMRegister b, Register rt,
|
||||
bool min, enum FP_PREC pt) {
|
||||
|
||||
Label nan, zero, below, above, done;
|
||||
|
||||
emit_fp_ucom(masm, pt, a, b);
|
||||
@ -1759,31 +1755,26 @@ static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
|
||||
} else {
|
||||
__ jccb(Assembler::above, done);
|
||||
}
|
||||
|
||||
__ jccb(Assembler::parity, nan); // PF=1
|
||||
__ jccb(Assembler::below, below); // CF=1
|
||||
|
||||
// equal
|
||||
__ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
|
||||
emit_fp_ucom(masm, pt, a, xmmt);
|
||||
|
||||
__ jccb(Assembler::equal, zero);
|
||||
movfp(masm, pt, dst, a, rt);
|
||||
|
||||
__ jmp(done);
|
||||
|
||||
__ bind(zero);
|
||||
// Using bitwise operations is a low cost way to compute the correct result
|
||||
// for zero and non-zero inputs in this scenario except for NaN, which is
|
||||
// handled separately. The mantissa and exponent are valid with either
|
||||
// bitwise operation. For zero inputs, the sign bit is chosen according to
|
||||
// whether a minimum or maximum value is required.
|
||||
if (min) {
|
||||
// Negative sign preserved when available (e.g., min(+0, -0) -> -0)
|
||||
__ vpor(dst, a, b, Assembler::AVX_128bit);
|
||||
} else {
|
||||
// Positive sign preserved when available (e.g., max(+0, -0) -> +0)
|
||||
__ vpand(dst, a, b, Assembler::AVX_128bit);
|
||||
}
|
||||
|
||||
__ jmp(done);
|
||||
|
||||
__ bind(above);
|
||||
movfp(masm, pt, dst, min ? b : a, rt);
|
||||
|
||||
__ jmp(done);
|
||||
|
||||
__ bind(nan);
|
||||
@ -7376,18 +7367,18 @@ instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
|
||||
instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
|
||||
match(Set dst (MaxF a b));
|
||||
match(Set dst (MinF a b));
|
||||
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
|
||||
effect(USE a, USE b, TEMP rtmp, KILL cr);
|
||||
|
||||
format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
|
||||
format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
bool min = (opcode == Op_MinF) ? true : false;
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
|
||||
min, fp_prec_flt /*pt*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
@ -7412,18 +7403,18 @@ instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atm
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
|
||||
instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
|
||||
match(Set dst (MaxF a b));
|
||||
match(Set dst (MinF a b));
|
||||
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
|
||||
effect(USE a, USE b, TEMP rtmp, KILL cr);
|
||||
|
||||
format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
|
||||
format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
bool min = (opcode == Op_MinF) ? true : false;
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
|
||||
min, fp_prec_flt /*pt*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
@ -7445,18 +7436,18 @@ instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
|
||||
instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
|
||||
match(Set dst (MaxD a b));
|
||||
match(Set dst (MinD a b));
|
||||
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
|
||||
effect(USE a, USE b, TEMP rtmp, KILL cr);
|
||||
|
||||
format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
|
||||
format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
bool min = (opcode == Op_MinD) ? true : false;
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
|
||||
min, fp_prec_dbl /*pt*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
@ -7481,18 +7472,18 @@ instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atm
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
|
||||
instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
|
||||
match(Set dst (MaxD a b));
|
||||
match(Set dst (MinD a b));
|
||||
effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
|
||||
effect(USE a, USE b, TEMP rtmp, KILL cr);
|
||||
|
||||
format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
|
||||
format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int opcode = this->ideal_Opcode();
|
||||
bool min = (opcode == Op_MinD) ? true : false;
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|
||||
emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
|
||||
min, fp_prec_dbl /*pt*/);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
|
||||
@ -54,8 +54,37 @@ public class FpMinMaxIntrinsics {
|
||||
c2 = COUNT - (s2 = step());
|
||||
|
||||
for (int i = 0; i < COUNT; i++) {
|
||||
floats[i] = r.nextFloat();
|
||||
doubles[i] = r.nextDouble();
|
||||
final int mappedIndex = i % 100;
|
||||
|
||||
if (mappedIndex >= 0 && mappedIndex < 10) {
|
||||
// NaN
|
||||
floats[i] = Float.NaN;
|
||||
doubles[i] = Double.NaN;
|
||||
} else if (mappedIndex >= 20 && mappedIndex < 30) {
|
||||
// Equal (+0.0)
|
||||
floats[i] = +0.0f;
|
||||
doubles[i] = +0.0;
|
||||
} else if (mappedIndex >= 40 && mappedIndex < 50) {
|
||||
// Equal (-0.0)
|
||||
floats[i] = -0.0f;
|
||||
doubles[i] = -0.0;
|
||||
} else if (mappedIndex >= 60 && mappedIndex < 70) {
|
||||
// Descending
|
||||
floats[i] = (float) (COUNT - i);
|
||||
doubles[i] = (double) (COUNT - i);
|
||||
} else if (mappedIndex >= 80 && mappedIndex < 90) {
|
||||
// Ascending
|
||||
floats[i] = (float) i;
|
||||
doubles[i] = (double) i;
|
||||
} else if (mappedIndex >= 90 && mappedIndex < 100) {
|
||||
// Random (negative)
|
||||
floats[i] = -r.nextFloat();
|
||||
doubles[i] = -r.nextDouble();
|
||||
} else {
|
||||
// Random (positive)
|
||||
floats[i] = r.nextFloat();
|
||||
doubles[i] = r.nextDouble();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user