diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 29413e5457c..54e5f55fa2b 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -1094,32 +1094,78 @@ void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt, bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV); bool is_double_word = is_double_word_type(elem_bt); + /* Note on 'non-obvious' assembly sequence: + * + * While there are vminps/vmaxps instructions, there are two important differences between hardware + * and Java on how they handle floats: + * a. -0.0 and +0.0 are considered equal (vminps/vmaxps will return second parameter when inputs are equal) + * b. NaN is not necesarily propagated (vminps/vmaxps will return second parameter when either input is NaN) + * + * It is still more efficient to use vminps/vmaxps, but with some pre/post-processing: + * a. -0.0/+0.0: Bias negative (positive) numbers to second parameter before vminps (vmaxps) + * (only useful when signs differ, noop otherwise) + * b. NaN: Check if it was the first parameter that had the NaN (with vcmp[UNORD_Q]) + + * Following pseudo code describes the algorithm for max[FD] (Min algorithm is on similar lines): + * btmp = (b < +0.0) ? a : b + * atmp = (b < +0.0) ? b : a + * Tmp = Max_Float(atmp , btmp) + * Res = (atmp == NaN) ? atmp : Tmp + */ + + void (MacroAssembler::*vblend)(XMMRegister, XMMRegister, XMMRegister, XMMRegister, int, bool, XMMRegister); + void (MacroAssembler::*vmaxmin)(XMMRegister, XMMRegister, XMMRegister, int); + void (MacroAssembler::*vcmp)(XMMRegister, XMMRegister, XMMRegister, int, int); + XMMRegister mask; + if (!is_double_word && is_min) { - vblendvps(atmp, a, b, a, vlen_enc); - vblendvps(btmp, b, a, a, vlen_enc); - vminps(tmp, atmp, btmp, vlen_enc); - vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); - vblendvps(dst, tmp, atmp, btmp, vlen_enc); + mask = a; + vblend = &MacroAssembler::vblendvps; + vmaxmin = &MacroAssembler::vminps; + vcmp = &MacroAssembler::vcmpps; } else if (!is_double_word && !is_min) { - vblendvps(btmp, b, a, b, vlen_enc); - vblendvps(atmp, a, b, b, vlen_enc); - vmaxps(tmp, atmp, btmp, vlen_enc); - vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); - vblendvps(dst, tmp, atmp, btmp, vlen_enc); + mask = b; + vblend = &MacroAssembler::vblendvps; + vmaxmin = &MacroAssembler::vmaxps; + vcmp = &MacroAssembler::vcmpps; } else if (is_double_word && is_min) { - vblendvpd(atmp, a, b, a, vlen_enc); - vblendvpd(btmp, b, a, a, vlen_enc); - vminpd(tmp, atmp, btmp, vlen_enc); - vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); - vblendvpd(dst, tmp, atmp, btmp, vlen_enc); + mask = a; + vblend = &MacroAssembler::vblendvpd; + vmaxmin = &MacroAssembler::vminpd; + vcmp = &MacroAssembler::vcmppd; } else { assert(is_double_word && !is_min, "sanity"); - vblendvpd(btmp, b, a, b, vlen_enc); - vblendvpd(atmp, a, b, b, vlen_enc); - vmaxpd(tmp, atmp, btmp, vlen_enc); - vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); - vblendvpd(dst, tmp, atmp, btmp, vlen_enc); + mask = b; + vblend = &MacroAssembler::vblendvpd; + vmaxmin = &MacroAssembler::vmaxpd; + vcmp = &MacroAssembler::vcmppd; } + + // Make sure EnableX86ECoreOpts isn't disabled on register overlaps + XMMRegister maxmin, scratch; + if (dst == btmp) { + maxmin = btmp; + scratch = tmp; + } else { + maxmin = tmp; + scratch = btmp; + } + + bool precompute_mask = EnableX86ECoreOpts && UseAVX>1; + if (precompute_mask && !is_double_word) { + vpsrad(tmp, mask, 32, vlen_enc); + mask = tmp; + } else if (precompute_mask && is_double_word) { + vpxor(tmp, tmp, tmp, vlen_enc); + vpcmpgtq(tmp, tmp, mask, vlen_enc); + mask = tmp; + } + + (this->*vblend)(atmp, a, b, mask, vlen_enc, !precompute_mask, btmp); + (this->*vblend)(btmp, b, a, mask, vlen_enc, !precompute_mask, tmp); + (this->*vmaxmin)(maxmin, atmp, btmp, vlen_enc); + (this->*vcmp)(scratch, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + (this->*vblend)(dst, maxmin, atmp, scratch, vlen_enc, false, scratch); } void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, @@ -5318,18 +5364,18 @@ void C2_MacroAssembler::vector_signum_avx(int opcode, XMMRegister dst, XMMRegist if (opcode == Op_SignumVD) { vsubpd(dst, zero, one, vec_enc); // if src < 0 ? -1 : 1 - vblendvpd(dst, one, dst, src, vec_enc); + vblendvpd(dst, one, dst, src, vec_enc, true, xtmp1); // if src == NaN, -0.0 or 0.0 return src. vcmppd(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc); - vblendvpd(dst, dst, src, xtmp1, vec_enc); + vblendvpd(dst, dst, src, xtmp1, vec_enc, false, xtmp1); } else { assert(opcode == Op_SignumVF, ""); vsubps(dst, zero, one, vec_enc); // if src < 0 ? -1 : 1 - vblendvps(dst, one, dst, src, vec_enc); + vblendvps(dst, one, dst, src, vec_enc, true, xtmp1); // if src == NaN, -0.0 or 0.0 return src. vcmpps(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc); - vblendvps(dst, dst, src, xtmp1, vec_enc); + vblendvps(dst, dst, src, xtmp1, vec_enc, false, xtmp1); } } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index ed8a23771d3..6f04cdef508 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -3566,6 +3566,56 @@ void MacroAssembler::vbroadcastss(XMMRegister dst, AddressLiteral src, int vecto } } +// Vector float blend +// vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg) +void MacroAssembler::vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask, int vector_len, bool compute_mask, XMMRegister scratch) { + // WARN: Allow dst == (src1|src2), mask == scratch + bool blend_emulation = EnableX86ECoreOpts && UseAVX > 1; + bool scratch_available = scratch != xnoreg && scratch != src1 && scratch != src2 && scratch != dst; + bool dst_available = dst != mask && (dst != src1 || dst != src2); + if (blend_emulation && scratch_available && dst_available) { + if (compute_mask) { + vpsrad(scratch, mask, 32, vector_len); + mask = scratch; + } + if (dst == src1) { + vpandn(dst, mask, src1, vector_len); // if mask == 0, src1 + vpand (scratch, mask, src2, vector_len); // if mask == 1, src2 + } else { + vpand (dst, mask, src2, vector_len); // if mask == 1, src2 + vpandn(scratch, mask, src1, vector_len); // if mask == 0, src1 + } + vpor(dst, dst, scratch, vector_len); + } else { + Assembler::vblendvps(dst, src1, src2, mask, vector_len); + } +} + +// vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg) +void MacroAssembler::vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask, int vector_len, bool compute_mask, XMMRegister scratch) { + // WARN: Allow dst == (src1|src2), mask == scratch + bool blend_emulation = EnableX86ECoreOpts && UseAVX > 1; + bool scratch_available = scratch != xnoreg && scratch != src1 && scratch != src2 && scratch != dst && (!compute_mask || scratch != mask); + bool dst_available = dst != mask && (dst != src1 || dst != src2); + if (blend_emulation && scratch_available && dst_available) { + if (compute_mask) { + vpxor(scratch, scratch, scratch, vector_len); + vpcmpgtq(scratch, scratch, mask, vector_len); + mask = scratch; + } + if (dst == src1) { + vpandn(dst, mask, src1, vector_len); // if mask == 0, src + vpand (scratch, mask, src2, vector_len); // if mask == 1, src2 + } else { + vpand (dst, mask, src2, vector_len); // if mask == 1, src2 + vpandn(scratch, mask, src1, vector_len); // if mask == 0, src + } + vpor(dst, dst, scratch, vector_len); + } else { + Assembler::vblendvpd(dst, src1, src2, mask, vector_len); + } +} + void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpcmpeqb(dst, nds, src, vector_len); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 47943576dce..ea6a37d16ed 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1130,6 +1130,10 @@ public: using Assembler::vbroadcastss; void vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg); + // Vector float blend + void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg); + void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg); + void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 681abd7199f..694ec2ddbd6 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -7801,7 +7801,7 @@ instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ %} instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ - predicate(UseAVX > 0 && + predicate(UseAVX > 0 && !EnableX86ECoreOpts && n->in(2)->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length_in_bytes(n) <= 32 && is_integral_type(Matcher::vector_element_basic_type(n))); @@ -7815,7 +7815,7 @@ instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ %} instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ - predicate(UseAVX > 0 && + predicate(UseAVX > 0 && !EnableX86ECoreOpts && n->in(2)->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length_in_bytes(n) <= 32 && !is_integral_type(Matcher::vector_element_basic_type(n))); @@ -7828,6 +7828,22 @@ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ ins_pipe( pipe_slow ); %} +instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ + predicate(UseAVX > 0 && EnableX86ECoreOpts && + n->in(2)->bottom_type()->isa_vectmask() == NULL && + Matcher::vector_length_in_bytes(n) <= 32); + match(Set dst (VectorBlend (Binary src1 src2) mask)); + format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} + effect(TEMP vtmp, TEMP dst); + ins_encode %{ + int vlen_enc = vector_length_encoding(this); + __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); + __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); + __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); + %} + ins_pipe( pipe_slow ); +%} + instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ predicate(Matcher::vector_length_in_bytes(n) == 64 && n->in(2)->bottom_type()->isa_vectmask() == NULL); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 94844cd4e7f..759dd8a1d48 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4478,34 +4478,15 @@ instruct loadD(regD dst, memory mem) ins_pipe(pipe_slow); // XXX %} - -// Following pseudo code describes the algorithm for max[FD]: -// Min algorithm is on similar lines -// btmp = (b < +0.0) ? a : b -// atmp = (b < +0.0) ? b : a -// Tmp = Max_Float(atmp , btmp) -// Res = (atmp == NaN) ? atmp : Tmp - // max = java.lang.Math.max(float a, float b) instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ predicate(UseAVX > 0 && !SuperWord::is_reduction(n)); match(Set dst (MaxF a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); - format %{ - "vblendvps $btmp,$b,$a,$b \n\t" - "vblendvps $atmp,$a,$b,$b \n\t" - "vmaxss $tmp,$atmp,$btmp \n\t" - "vcmpps.unordered $btmp,$atmp,$atmp \n\t" - "vblendvps $dst,$tmp,$atmp,$btmp \n\t" - %} + format %{ "maxF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %} ins_encode %{ - int vector_len = Assembler::AVX_128bit; - __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); - __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); - __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); - __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); - __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); - %} + __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); + %} ins_pipe( pipe_slow ); %} @@ -4527,20 +4508,9 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, predicate(UseAVX > 0 && !SuperWord::is_reduction(n)); match(Set dst (MaxD a b)); effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); - format %{ - "vblendvpd $btmp,$b,$a,$b \n\t" - "vblendvpd $atmp,$a,$b,$b \n\t" - "vmaxsd $tmp,$atmp,$btmp \n\t" - "vcmppd.unordered $btmp,$atmp,$atmp \n\t" - "vblendvpd $dst,$tmp,$atmp,$btmp \n\t" - %} + format %{ "maxD $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %} ins_encode %{ - int vector_len = Assembler::AVX_128bit; - __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); - __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); - __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); - __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); - __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); %} ins_pipe( pipe_slow ); %} @@ -4563,20 +4533,9 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, predicate(UseAVX > 0 && !SuperWord::is_reduction(n)); match(Set dst (MinF a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); - format %{ - "vblendvps $atmp,$a,$b,$a \n\t" - "vblendvps $btmp,$b,$a,$a \n\t" - "vminss $tmp,$atmp,$btmp \n\t" - "vcmpps.unordered $btmp,$atmp,$atmp \n\t" - "vblendvps $dst,$tmp,$atmp,$btmp \n\t" - %} + format %{ "minF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %} ins_encode %{ - int vector_len = Assembler::AVX_128bit; - __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); - __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); - __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); - __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); - __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); %} ins_pipe( pipe_slow ); %} @@ -4599,20 +4558,9 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, predicate(UseAVX > 0 && !SuperWord::is_reduction(n)); match(Set dst (MinD a b)); effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); - format %{ - "vblendvpd $atmp,$a,$b,$a \n\t" - "vblendvpd $btmp,$b,$a,$a \n\t" - "vminsd $tmp,$atmp,$btmp \n\t" - "vcmppd.unordered $btmp,$atmp,$atmp \n\t" - "vblendvpd $dst,$tmp,$atmp,$btmp \n\t" - %} + format %{ "minD $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %} ins_encode %{ - int vector_len = Assembler::AVX_128bit; - __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); - __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); - __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister); - __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len); - __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); + __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit); %} ins_pipe( pipe_slow ); %} diff --git a/test/hotspot/jtreg/compiler/vectorization/TestSignumVector.java b/test/hotspot/jtreg/compiler/vectorization/TestSignumVector.java index bf6a10b855f..db85a61cc75 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestSignumVector.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestSignumVector.java @@ -33,6 +33,8 @@ package compiler.vectorization; +import java.util.Random; + import compiler.lib.ir_framework.*; public class TestSignumVector { @@ -62,12 +64,22 @@ public class TestSignumVector { public void kernel_test_signum_double() { dinp = new double[ARRLEN]; dout = new double[ARRLEN]; + Random rnd = new Random(20); for(int i = 0 ; i < ARRLEN; i++) { - dinp[i] = (double)i*1.4; + dinp[i] = (i-ARRLEN/2)*rnd.nextDouble(); } for (int i = 0; i < ITERS; i++) { test_signum_double(dout , dinp); } + for(int i = 0 ; i < ARRLEN; i++) { + if (i-ARRLEN/2<0) { + if (dout[i] != -1.0) throw new RuntimeException("Expected negative numbers in first half of array: " + java.util.Arrays.toString(dout)); + } else if (i-ARRLEN/2==0) { + if (dout[i] != 0) throw new RuntimeException("Expected zero in the middle of array: " + java.util.Arrays.toString(dout)); + } else { + if (dout[i] != 1.0) throw new RuntimeException("Expected positive numbers in second half of array: " + java.util.Arrays.toString(dout)); + } + } } @Test @@ -82,11 +94,21 @@ public class TestSignumVector { public void kernel_test_round() { finp = new float[ARRLEN]; fout = new float[ARRLEN]; + Random rnd = new Random(20); for(int i = 0 ; i < ARRLEN; i++) { - finp[i] = (float)i*1.4f; + finp[i] = (i-ARRLEN/2)*rnd.nextFloat(); } for (int i = 0; i < ITERS; i++) { test_signum_float(fout , finp); } + for(int i = 0 ; i < ARRLEN; i++) { + if (i-ARRLEN/2<0) { + if (fout[i] != -1.0) throw new RuntimeException("Expected negative numbers in first half of array: " + java.util.Arrays.toString(fout)); + } else if (i-ARRLEN/2==0) { + if (fout[i] != 0) throw new RuntimeException("Expected zero in the middle of array: " + java.util.Arrays.toString(fout)); + } else { + if (fout[i] != 1.0) throw new RuntimeException("Expected positive numbers in second half of array: " + java.util.Arrays.toString(fout)); + } + } } } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java index 92ccca40c8a..0f758be4ed0 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicDoubleOpTest.java @@ -42,6 +42,7 @@ package compiler.vectorization.runner; import compiler.lib.ir_framework.*; +import java.util.Random; public class BasicDoubleOpTest extends VectorizationTestRunner { @@ -50,11 +51,63 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { private double[] a; private double[] b; private double[] c; + private double[] d; + private double[] e; public BasicDoubleOpTest() { + // Positive test values sign | exponent | mantisa + double smallPositive = Double.longBitsToDouble(0<<63 | 0x03f << 52 | 0x30000f); + double positive = Double.longBitsToDouble(0<<63 | 0x07f << 52 | 0x30000f); + double bigPositive = Double.longBitsToDouble(0<<63 | 0x07f << 52 | 0x30100f); + double biggerPositive = Double.longBitsToDouble(0<<63 | 0x7fe << 52 | 0x30000f); + double maxPositive = Double.MAX_VALUE; + + // Special positive + double nan1 = Double.longBitsToDouble(0<<63 | 0x7ff << 52 | 0x7fffff); + double nan2 = Double.longBitsToDouble(0<<63 | 0x7ff << 52 | 0x30000f); + double inf = Double.longBitsToDouble(0<<63 | 0x7ff << 52); + double zero = 0.0; + + // Negative test values sign | exponent | mantisa + double smallNegative = Double.longBitsToDouble(1<<63 | 0x003 << 52 | 0x30000f); + double negative = Double.longBitsToDouble(1<<63 | 0x783 << 52 | 0x30100f); + double bigNegative = Double.longBitsToDouble(1<<63 | 0x783 << 52 | 0x30000f); + double biggerNegative = Double.longBitsToDouble(1<<63 | 0x786 << 52 | 0x30000f); + double maxNegative = Double.longBitsToDouble(1<<63 | 0x7fe << 52 | 0x7fffff); + + // Special negative + double nNan1 = Double.longBitsToDouble(1<<63 | 0x7ff << 52 | 0x7fffff); + double nNan2 = Double.longBitsToDouble(1<<63 | 0x7ff << 52 | 0x30000f); + double nInf = Double.longBitsToDouble(1<<63 | 0x7ff << 52); + double nZero = -0.0; + + double[] numberList = new double[] { + nInf, maxNegative, biggerNegative, bigNegative, negative, smallNegative, nZero, + zero, smallPositive, positive, bigPositive, biggerPositive, maxPositive, inf, + nan1, nan2, nNan1, nNan2 + }; + + Random rnd = new Random(10); a = new double[SIZE]; b = new double[SIZE]; c = new double[SIZE]; + d = new double[SIZE]; + e = new double[SIZE]; + + for (int i = 0; i < SIZE;) { + for (int j = 0; j < numberList.length && i < SIZE; j++, i++) { + for (int k = j; k < numberList.length && i < SIZE; k++, i++) { + if (rnd.nextBoolean()) { + d[i] = numberList[j]; + e[i] = numberList[k]; + } else { + d[i] = numberList[k]; + e[i] = numberList[j]; + } + } + } + } + for (int i = 0; i < SIZE; i++) { a[i] = 850.0 * i + 22222.22; b[i] = -12345.678; @@ -179,7 +232,7 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { public double[] vectorMax() { double[] res = new double[SIZE]; for (int i = 0; i < SIZE; i++) { - res[i] = Math.max(a[i], b[i]); + res[i] = Math.max(d[i], e[i]); } return res; } @@ -190,7 +243,7 @@ public class BasicDoubleOpTest extends VectorizationTestRunner { public double[] vectorMin() { double[] res = new double[SIZE]; for (int i = 0; i < SIZE; i++) { - res[i] = Math.min(a[i], b[i]); + res[i] = Math.min(d[i], e[i]); } return res; } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java index 72e8aa640ca..e2bf0492d2a 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicFloatOpTest.java @@ -42,6 +42,7 @@ package compiler.vectorization.runner; import compiler.lib.ir_framework.*; +import java.util.Random; public class BasicFloatOpTest extends VectorizationTestRunner { @@ -50,11 +51,72 @@ public class BasicFloatOpTest extends VectorizationTestRunner { private float[] a; private float[] b; private float[] c; + private float[] d; + private float[] e; public BasicFloatOpTest() { + // Positive test values sign | exponent | mantisa + float smallPositive = Float.intBitsToFloat(0<<31 | 0x3f << 23 | 0x30000f); + float positive = Float.intBitsToFloat(0<<31 | 0x7f << 23 | 0x30000f); + float bigPositive = Float.intBitsToFloat(0<<31 | 0x7f << 23 | 0x30100f); + float biggerPositive = Float.intBitsToFloat(0<<31 | 0xfe << 23 | 0x30000f); + float maxPositive = Float.MAX_VALUE; + + // Special positive + float nan1 = Float.intBitsToFloat(0<<31 | 0xff << 23 | 0x7fffff); + float nan2 = Float.intBitsToFloat(0<<31 | 0xff << 23 | 0x30000f); + float inf = Float.intBitsToFloat(0<<31 | 0xff << 23); + float zero = 0.0f; + + // Negative test values sign | exponent | mantisa + float smallNegative = Float.intBitsToFloat(1<<31 | 0x03 << 23 | 0x30000f); + float negative = Float.intBitsToFloat(1<<31 | 0x83 << 23 | 0x30100f); + float bigNegative = Float.intBitsToFloat(1<<31 | 0x83 << 23 | 0x30000f); + float biggerNegative = Float.intBitsToFloat(1<<31 | 0x86 << 23 | 0x30000f); + float maxNegative = Float.intBitsToFloat(1<<31 | 0xfe << 23 | 0x7fffff); + + // Special negative + float nNan1 = Float.intBitsToFloat(1<<31 | 0xff << 23 | 0x7fffff); + float nNan2 = Float.intBitsToFloat(1<<31 | 0xff << 23 | 0x30000f); + float nInf = Float.intBitsToFloat(1<<31 | 0xff << 23); + float nZero = -0.0f; + + float[] orderedList = new float[] { + nInf, maxNegative, biggerNegative, bigNegative, negative, smallNegative, nZero, + zero, smallPositive, positive, bigPositive, biggerPositive, maxPositive, inf + }; + + float[] NaNs = new float[] { + nan1, nan2, nNan1, nNan2 + }; + + float[] numberList = new float[] { + nInf, maxNegative, biggerNegative, bigNegative, negative, smallNegative, nZero, + zero, smallPositive, positive, bigPositive, biggerPositive, maxPositive, inf, + nan1, nan2, nNan1, nNan2 + }; + + Random rnd = new Random(11); a = new float[SIZE]; b = new float[SIZE]; c = new float[SIZE]; + d = new float[SIZE]; + e = new float[SIZE]; + + for (int i = 0; i < SIZE;) { + for (int j = 0; j < numberList.length && i < SIZE; j++, i++) { + for (int k = j; k < numberList.length && i < SIZE; k++, i++) { + if (rnd.nextBoolean()) { + d[i] = numberList[j]; + e[i] = numberList[k]; + } else { + d[i] = numberList[k]; + e[i] = numberList[j]; + } + } + } + } + for (int i = 0; i < SIZE; i++) { a[i] = 850.0f * i + 22222.22f; b[i] = -12345.678f; @@ -146,7 +208,7 @@ public class BasicFloatOpTest extends VectorizationTestRunner { public float[] vectorMax() { float[] res = new float[SIZE]; for (int i = 0; i < SIZE; i++) { - res[i] = Math.max(a[i], b[i]); + res[i] = Math.max(d[i], e[i]); } return res; } @@ -157,7 +219,7 @@ public class BasicFloatOpTest extends VectorizationTestRunner { public float[] vectorMin() { float[] res = new float[SIZE]; for (int i = 0; i < SIZE; i++) { - res[i] = Math.min(a[i], b[i]); + res[i] = Math.min(d[i], e[i]); } return res; }