mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-09 01:48:34 +00:00
8320347: Emulate vblendvp[sd] on ECore
Reviewed-by: sviswanathan, jbhateja
This commit is contained in:
parent
693847452f
commit
6aba6aa6f1
@ -1094,32 +1094,78 @@ void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt,
|
||||
bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
|
||||
bool is_double_word = is_double_word_type(elem_bt);
|
||||
|
||||
/* Note on 'non-obvious' assembly sequence:
|
||||
*
|
||||
* While there are vminps/vmaxps instructions, there are two important differences between hardware
|
||||
* and Java on how they handle floats:
|
||||
* a. -0.0 and +0.0 are considered equal (vminps/vmaxps will return second parameter when inputs are equal)
|
||||
* b. NaN is not necesarily propagated (vminps/vmaxps will return second parameter when either input is NaN)
|
||||
*
|
||||
* It is still more efficient to use vminps/vmaxps, but with some pre/post-processing:
|
||||
* a. -0.0/+0.0: Bias negative (positive) numbers to second parameter before vminps (vmaxps)
|
||||
* (only useful when signs differ, noop otherwise)
|
||||
* b. NaN: Check if it was the first parameter that had the NaN (with vcmp[UNORD_Q])
|
||||
|
||||
* Following pseudo code describes the algorithm for max[FD] (Min algorithm is on similar lines):
|
||||
* btmp = (b < +0.0) ? a : b
|
||||
* atmp = (b < +0.0) ? b : a
|
||||
* Tmp = Max_Float(atmp , btmp)
|
||||
* Res = (atmp == NaN) ? atmp : Tmp
|
||||
*/
|
||||
|
||||
void (MacroAssembler::*vblend)(XMMRegister, XMMRegister, XMMRegister, XMMRegister, int, bool, XMMRegister);
|
||||
void (MacroAssembler::*vmaxmin)(XMMRegister, XMMRegister, XMMRegister, int);
|
||||
void (MacroAssembler::*vcmp)(XMMRegister, XMMRegister, XMMRegister, int, int);
|
||||
XMMRegister mask;
|
||||
|
||||
if (!is_double_word && is_min) {
|
||||
vblendvps(atmp, a, b, a, vlen_enc);
|
||||
vblendvps(btmp, b, a, a, vlen_enc);
|
||||
vminps(tmp, atmp, btmp, vlen_enc);
|
||||
vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
|
||||
vblendvps(dst, tmp, atmp, btmp, vlen_enc);
|
||||
mask = a;
|
||||
vblend = &MacroAssembler::vblendvps;
|
||||
vmaxmin = &MacroAssembler::vminps;
|
||||
vcmp = &MacroAssembler::vcmpps;
|
||||
} else if (!is_double_word && !is_min) {
|
||||
vblendvps(btmp, b, a, b, vlen_enc);
|
||||
vblendvps(atmp, a, b, b, vlen_enc);
|
||||
vmaxps(tmp, atmp, btmp, vlen_enc);
|
||||
vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
|
||||
vblendvps(dst, tmp, atmp, btmp, vlen_enc);
|
||||
mask = b;
|
||||
vblend = &MacroAssembler::vblendvps;
|
||||
vmaxmin = &MacroAssembler::vmaxps;
|
||||
vcmp = &MacroAssembler::vcmpps;
|
||||
} else if (is_double_word && is_min) {
|
||||
vblendvpd(atmp, a, b, a, vlen_enc);
|
||||
vblendvpd(btmp, b, a, a, vlen_enc);
|
||||
vminpd(tmp, atmp, btmp, vlen_enc);
|
||||
vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
|
||||
vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
|
||||
mask = a;
|
||||
vblend = &MacroAssembler::vblendvpd;
|
||||
vmaxmin = &MacroAssembler::vminpd;
|
||||
vcmp = &MacroAssembler::vcmppd;
|
||||
} else {
|
||||
assert(is_double_word && !is_min, "sanity");
|
||||
vblendvpd(btmp, b, a, b, vlen_enc);
|
||||
vblendvpd(atmp, a, b, b, vlen_enc);
|
||||
vmaxpd(tmp, atmp, btmp, vlen_enc);
|
||||
vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
|
||||
vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
|
||||
mask = b;
|
||||
vblend = &MacroAssembler::vblendvpd;
|
||||
vmaxmin = &MacroAssembler::vmaxpd;
|
||||
vcmp = &MacroAssembler::vcmppd;
|
||||
}
|
||||
|
||||
// Make sure EnableX86ECoreOpts isn't disabled on register overlaps
|
||||
XMMRegister maxmin, scratch;
|
||||
if (dst == btmp) {
|
||||
maxmin = btmp;
|
||||
scratch = tmp;
|
||||
} else {
|
||||
maxmin = tmp;
|
||||
scratch = btmp;
|
||||
}
|
||||
|
||||
bool precompute_mask = EnableX86ECoreOpts && UseAVX>1;
|
||||
if (precompute_mask && !is_double_word) {
|
||||
vpsrad(tmp, mask, 32, vlen_enc);
|
||||
mask = tmp;
|
||||
} else if (precompute_mask && is_double_word) {
|
||||
vpxor(tmp, tmp, tmp, vlen_enc);
|
||||
vpcmpgtq(tmp, tmp, mask, vlen_enc);
|
||||
mask = tmp;
|
||||
}
|
||||
|
||||
(this->*vblend)(atmp, a, b, mask, vlen_enc, !precompute_mask, btmp);
|
||||
(this->*vblend)(btmp, b, a, mask, vlen_enc, !precompute_mask, tmp);
|
||||
(this->*vmaxmin)(maxmin, atmp, btmp, vlen_enc);
|
||||
(this->*vcmp)(scratch, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
|
||||
(this->*vblend)(dst, maxmin, atmp, scratch, vlen_enc, false, scratch);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
|
||||
@ -5318,18 +5364,18 @@ void C2_MacroAssembler::vector_signum_avx(int opcode, XMMRegister dst, XMMRegist
|
||||
if (opcode == Op_SignumVD) {
|
||||
vsubpd(dst, zero, one, vec_enc);
|
||||
// if src < 0 ? -1 : 1
|
||||
vblendvpd(dst, one, dst, src, vec_enc);
|
||||
vblendvpd(dst, one, dst, src, vec_enc, true, xtmp1);
|
||||
// if src == NaN, -0.0 or 0.0 return src.
|
||||
vcmppd(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc);
|
||||
vblendvpd(dst, dst, src, xtmp1, vec_enc);
|
||||
vblendvpd(dst, dst, src, xtmp1, vec_enc, false, xtmp1);
|
||||
} else {
|
||||
assert(opcode == Op_SignumVF, "");
|
||||
vsubps(dst, zero, one, vec_enc);
|
||||
// if src < 0 ? -1 : 1
|
||||
vblendvps(dst, one, dst, src, vec_enc);
|
||||
vblendvps(dst, one, dst, src, vec_enc, true, xtmp1);
|
||||
// if src == NaN, -0.0 or 0.0 return src.
|
||||
vcmpps(xtmp1, src, zero, Assembler::EQ_UQ, vec_enc);
|
||||
vblendvps(dst, dst, src, xtmp1, vec_enc);
|
||||
vblendvps(dst, dst, src, xtmp1, vec_enc, false, xtmp1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -3566,6 +3566,56 @@ void MacroAssembler::vbroadcastss(XMMRegister dst, AddressLiteral src, int vecto
|
||||
}
|
||||
}
|
||||
|
||||
// Vector float blend
|
||||
// vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg)
|
||||
void MacroAssembler::vblendvps(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask, int vector_len, bool compute_mask, XMMRegister scratch) {
|
||||
// WARN: Allow dst == (src1|src2), mask == scratch
|
||||
bool blend_emulation = EnableX86ECoreOpts && UseAVX > 1;
|
||||
bool scratch_available = scratch != xnoreg && scratch != src1 && scratch != src2 && scratch != dst;
|
||||
bool dst_available = dst != mask && (dst != src1 || dst != src2);
|
||||
if (blend_emulation && scratch_available && dst_available) {
|
||||
if (compute_mask) {
|
||||
vpsrad(scratch, mask, 32, vector_len);
|
||||
mask = scratch;
|
||||
}
|
||||
if (dst == src1) {
|
||||
vpandn(dst, mask, src1, vector_len); // if mask == 0, src1
|
||||
vpand (scratch, mask, src2, vector_len); // if mask == 1, src2
|
||||
} else {
|
||||
vpand (dst, mask, src2, vector_len); // if mask == 1, src2
|
||||
vpandn(scratch, mask, src1, vector_len); // if mask == 0, src1
|
||||
}
|
||||
vpor(dst, dst, scratch, vector_len);
|
||||
} else {
|
||||
Assembler::vblendvps(dst, src1, src2, mask, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
// vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg)
|
||||
void MacroAssembler::vblendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister mask, int vector_len, bool compute_mask, XMMRegister scratch) {
|
||||
// WARN: Allow dst == (src1|src2), mask == scratch
|
||||
bool blend_emulation = EnableX86ECoreOpts && UseAVX > 1;
|
||||
bool scratch_available = scratch != xnoreg && scratch != src1 && scratch != src2 && scratch != dst && (!compute_mask || scratch != mask);
|
||||
bool dst_available = dst != mask && (dst != src1 || dst != src2);
|
||||
if (blend_emulation && scratch_available && dst_available) {
|
||||
if (compute_mask) {
|
||||
vpxor(scratch, scratch, scratch, vector_len);
|
||||
vpcmpgtq(scratch, scratch, mask, vector_len);
|
||||
mask = scratch;
|
||||
}
|
||||
if (dst == src1) {
|
||||
vpandn(dst, mask, src1, vector_len); // if mask == 0, src
|
||||
vpand (scratch, mask, src2, vector_len); // if mask == 1, src2
|
||||
} else {
|
||||
vpand (dst, mask, src2, vector_len); // if mask == 1, src2
|
||||
vpandn(scratch, mask, src1, vector_len); // if mask == 0, src
|
||||
}
|
||||
vpor(dst, dst, scratch, vector_len);
|
||||
} else {
|
||||
Assembler::vblendvpd(dst, src1, src2, mask, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
||||
Assembler::vpcmpeqb(dst, nds, src, vector_len);
|
||||
|
||||
@ -1130,6 +1130,10 @@ public:
|
||||
using Assembler::vbroadcastss;
|
||||
void vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = noreg);
|
||||
|
||||
// Vector float blend
|
||||
void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg);
|
||||
void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len, bool compute_mask = true, XMMRegister scratch = xnoreg);
|
||||
|
||||
void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
|
||||
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
|
||||
void divsd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg);
|
||||
|
||||
@ -7801,7 +7801,7 @@ instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
|
||||
%}
|
||||
|
||||
instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
|
||||
predicate(UseAVX > 0 &&
|
||||
predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
|
||||
n->in(2)->bottom_type()->isa_vectmask() == NULL &&
|
||||
Matcher::vector_length_in_bytes(n) <= 32 &&
|
||||
is_integral_type(Matcher::vector_element_basic_type(n)));
|
||||
@ -7815,7 +7815,7 @@ instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
|
||||
%}
|
||||
|
||||
instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
|
||||
predicate(UseAVX > 0 &&
|
||||
predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
|
||||
n->in(2)->bottom_type()->isa_vectmask() == NULL &&
|
||||
Matcher::vector_length_in_bytes(n) <= 32 &&
|
||||
!is_integral_type(Matcher::vector_element_basic_type(n)));
|
||||
@ -7828,6 +7828,22 @@ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
|
||||
predicate(UseAVX > 0 && EnableX86ECoreOpts &&
|
||||
n->in(2)->bottom_type()->isa_vectmask() == NULL &&
|
||||
Matcher::vector_length_in_bytes(n) <= 32);
|
||||
match(Set dst (VectorBlend (Binary src1 src2) mask));
|
||||
format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
|
||||
effect(TEMP vtmp, TEMP dst);
|
||||
ins_encode %{
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
__ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
|
||||
__ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
|
||||
__ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
|
||||
predicate(Matcher::vector_length_in_bytes(n) == 64 &&
|
||||
n->in(2)->bottom_type()->isa_vectmask() == NULL);
|
||||
|
||||
@ -4478,34 +4478,15 @@ instruct loadD(regD dst, memory mem)
|
||||
ins_pipe(pipe_slow); // XXX
|
||||
%}
|
||||
|
||||
|
||||
// Following pseudo code describes the algorithm for max[FD]:
|
||||
// Min algorithm is on similar lines
|
||||
// btmp = (b < +0.0) ? a : b
|
||||
// atmp = (b < +0.0) ? b : a
|
||||
// Tmp = Max_Float(atmp , btmp)
|
||||
// Res = (atmp == NaN) ? atmp : Tmp
|
||||
|
||||
// max = java.lang.Math.max(float a, float b)
|
||||
instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
|
||||
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
|
||||
match(Set dst (MaxF a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"vblendvps $btmp,$b,$a,$b \n\t"
|
||||
"vblendvps $atmp,$a,$b,$b \n\t"
|
||||
"vmaxss $tmp,$atmp,$btmp \n\t"
|
||||
"vcmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
format %{ "maxF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
%}
|
||||
__ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
@ -4527,20 +4508,9 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
|
||||
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
|
||||
match(Set dst (MaxD a b));
|
||||
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
|
||||
format %{
|
||||
"vblendvpd $btmp,$b,$a,$b \n\t"
|
||||
"vblendvpd $atmp,$a,$b,$b \n\t"
|
||||
"vmaxsd $tmp,$atmp,$btmp \n\t"
|
||||
"vcmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
format %{ "maxD $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
|
||||
__ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
__ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -4563,20 +4533,9 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
|
||||
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
|
||||
match(Set dst (MinF a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"vblendvps $atmp,$a,$b,$a \n\t"
|
||||
"vblendvps $btmp,$b,$a,$a \n\t"
|
||||
"vminss $tmp,$atmp,$btmp \n\t"
|
||||
"vcmpps.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvps $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
format %{ "minF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
__ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -4599,20 +4558,9 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
|
||||
predicate(UseAVX > 0 && !SuperWord::is_reduction(n));
|
||||
match(Set dst (MinD a b));
|
||||
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
|
||||
format %{
|
||||
"vblendvpd $atmp,$a,$b,$a \n\t"
|
||||
"vblendvpd $btmp,$b,$a,$a \n\t"
|
||||
"vminsd $tmp,$atmp,$btmp \n\t"
|
||||
"vcmppd.unordered $btmp,$atmp,$atmp \n\t"
|
||||
"vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
|
||||
%}
|
||||
format %{ "minD $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
|
||||
ins_encode %{
|
||||
int vector_len = Assembler::AVX_128bit;
|
||||
__ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
|
||||
__ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
|
||||
__ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
|
||||
__ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
|
||||
__ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
@ -33,6 +33,8 @@
|
||||
|
||||
package compiler.vectorization;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
public class TestSignumVector {
|
||||
@ -62,12 +64,22 @@ public class TestSignumVector {
|
||||
public void kernel_test_signum_double() {
|
||||
dinp = new double[ARRLEN];
|
||||
dout = new double[ARRLEN];
|
||||
Random rnd = new Random(20);
|
||||
for(int i = 0 ; i < ARRLEN; i++) {
|
||||
dinp[i] = (double)i*1.4;
|
||||
dinp[i] = (i-ARRLEN/2)*rnd.nextDouble();
|
||||
}
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_signum_double(dout , dinp);
|
||||
}
|
||||
for(int i = 0 ; i < ARRLEN; i++) {
|
||||
if (i-ARRLEN/2<0) {
|
||||
if (dout[i] != -1.0) throw new RuntimeException("Expected negative numbers in first half of array: " + java.util.Arrays.toString(dout));
|
||||
} else if (i-ARRLEN/2==0) {
|
||||
if (dout[i] != 0) throw new RuntimeException("Expected zero in the middle of array: " + java.util.Arrays.toString(dout));
|
||||
} else {
|
||||
if (dout[i] != 1.0) throw new RuntimeException("Expected positive numbers in second half of array: " + java.util.Arrays.toString(dout));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -82,11 +94,21 @@ public class TestSignumVector {
|
||||
public void kernel_test_round() {
|
||||
finp = new float[ARRLEN];
|
||||
fout = new float[ARRLEN];
|
||||
Random rnd = new Random(20);
|
||||
for(int i = 0 ; i < ARRLEN; i++) {
|
||||
finp[i] = (float)i*1.4f;
|
||||
finp[i] = (i-ARRLEN/2)*rnd.nextFloat();
|
||||
}
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_signum_float(fout , finp);
|
||||
}
|
||||
for(int i = 0 ; i < ARRLEN; i++) {
|
||||
if (i-ARRLEN/2<0) {
|
||||
if (fout[i] != -1.0) throw new RuntimeException("Expected negative numbers in first half of array: " + java.util.Arrays.toString(fout));
|
||||
} else if (i-ARRLEN/2==0) {
|
||||
if (fout[i] != 0) throw new RuntimeException("Expected zero in the middle of array: " + java.util.Arrays.toString(fout));
|
||||
} else {
|
||||
if (fout[i] != 1.0) throw new RuntimeException("Expected positive numbers in second half of array: " + java.util.Arrays.toString(fout));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
package compiler.vectorization.runner;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import java.util.Random;
|
||||
|
||||
public class BasicDoubleOpTest extends VectorizationTestRunner {
|
||||
|
||||
@ -50,11 +51,63 @@ public class BasicDoubleOpTest extends VectorizationTestRunner {
|
||||
private double[] a;
|
||||
private double[] b;
|
||||
private double[] c;
|
||||
private double[] d;
|
||||
private double[] e;
|
||||
|
||||
public BasicDoubleOpTest() {
|
||||
// Positive test values sign | exponent | mantisa
|
||||
double smallPositive = Double.longBitsToDouble(0<<63 | 0x03f << 52 | 0x30000f);
|
||||
double positive = Double.longBitsToDouble(0<<63 | 0x07f << 52 | 0x30000f);
|
||||
double bigPositive = Double.longBitsToDouble(0<<63 | 0x07f << 52 | 0x30100f);
|
||||
double biggerPositive = Double.longBitsToDouble(0<<63 | 0x7fe << 52 | 0x30000f);
|
||||
double maxPositive = Double.MAX_VALUE;
|
||||
|
||||
// Special positive
|
||||
double nan1 = Double.longBitsToDouble(0<<63 | 0x7ff << 52 | 0x7fffff);
|
||||
double nan2 = Double.longBitsToDouble(0<<63 | 0x7ff << 52 | 0x30000f);
|
||||
double inf = Double.longBitsToDouble(0<<63 | 0x7ff << 52);
|
||||
double zero = 0.0;
|
||||
|
||||
// Negative test values sign | exponent | mantisa
|
||||
double smallNegative = Double.longBitsToDouble(1<<63 | 0x003 << 52 | 0x30000f);
|
||||
double negative = Double.longBitsToDouble(1<<63 | 0x783 << 52 | 0x30100f);
|
||||
double bigNegative = Double.longBitsToDouble(1<<63 | 0x783 << 52 | 0x30000f);
|
||||
double biggerNegative = Double.longBitsToDouble(1<<63 | 0x786 << 52 | 0x30000f);
|
||||
double maxNegative = Double.longBitsToDouble(1<<63 | 0x7fe << 52 | 0x7fffff);
|
||||
|
||||
// Special negative
|
||||
double nNan1 = Double.longBitsToDouble(1<<63 | 0x7ff << 52 | 0x7fffff);
|
||||
double nNan2 = Double.longBitsToDouble(1<<63 | 0x7ff << 52 | 0x30000f);
|
||||
double nInf = Double.longBitsToDouble(1<<63 | 0x7ff << 52);
|
||||
double nZero = -0.0;
|
||||
|
||||
double[] numberList = new double[] {
|
||||
nInf, maxNegative, biggerNegative, bigNegative, negative, smallNegative, nZero,
|
||||
zero, smallPositive, positive, bigPositive, biggerPositive, maxPositive, inf,
|
||||
nan1, nan2, nNan1, nNan2
|
||||
};
|
||||
|
||||
Random rnd = new Random(10);
|
||||
a = new double[SIZE];
|
||||
b = new double[SIZE];
|
||||
c = new double[SIZE];
|
||||
d = new double[SIZE];
|
||||
e = new double[SIZE];
|
||||
|
||||
for (int i = 0; i < SIZE;) {
|
||||
for (int j = 0; j < numberList.length && i < SIZE; j++, i++) {
|
||||
for (int k = j; k < numberList.length && i < SIZE; k++, i++) {
|
||||
if (rnd.nextBoolean()) {
|
||||
d[i] = numberList[j];
|
||||
e[i] = numberList[k];
|
||||
} else {
|
||||
d[i] = numberList[k];
|
||||
e[i] = numberList[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
a[i] = 850.0 * i + 22222.22;
|
||||
b[i] = -12345.678;
|
||||
@ -179,7 +232,7 @@ public class BasicDoubleOpTest extends VectorizationTestRunner {
|
||||
public double[] vectorMax() {
|
||||
double[] res = new double[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
res[i] = Math.max(a[i], b[i]);
|
||||
res[i] = Math.max(d[i], e[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@ -190,7 +243,7 @@ public class BasicDoubleOpTest extends VectorizationTestRunner {
|
||||
public double[] vectorMin() {
|
||||
double[] res = new double[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
res[i] = Math.min(a[i], b[i]);
|
||||
res[i] = Math.min(d[i], e[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
package compiler.vectorization.runner;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import java.util.Random;
|
||||
|
||||
public class BasicFloatOpTest extends VectorizationTestRunner {
|
||||
|
||||
@ -50,11 +51,72 @@ public class BasicFloatOpTest extends VectorizationTestRunner {
|
||||
private float[] a;
|
||||
private float[] b;
|
||||
private float[] c;
|
||||
private float[] d;
|
||||
private float[] e;
|
||||
|
||||
public BasicFloatOpTest() {
|
||||
// Positive test values sign | exponent | mantisa
|
||||
float smallPositive = Float.intBitsToFloat(0<<31 | 0x3f << 23 | 0x30000f);
|
||||
float positive = Float.intBitsToFloat(0<<31 | 0x7f << 23 | 0x30000f);
|
||||
float bigPositive = Float.intBitsToFloat(0<<31 | 0x7f << 23 | 0x30100f);
|
||||
float biggerPositive = Float.intBitsToFloat(0<<31 | 0xfe << 23 | 0x30000f);
|
||||
float maxPositive = Float.MAX_VALUE;
|
||||
|
||||
// Special positive
|
||||
float nan1 = Float.intBitsToFloat(0<<31 | 0xff << 23 | 0x7fffff);
|
||||
float nan2 = Float.intBitsToFloat(0<<31 | 0xff << 23 | 0x30000f);
|
||||
float inf = Float.intBitsToFloat(0<<31 | 0xff << 23);
|
||||
float zero = 0.0f;
|
||||
|
||||
// Negative test values sign | exponent | mantisa
|
||||
float smallNegative = Float.intBitsToFloat(1<<31 | 0x03 << 23 | 0x30000f);
|
||||
float negative = Float.intBitsToFloat(1<<31 | 0x83 << 23 | 0x30100f);
|
||||
float bigNegative = Float.intBitsToFloat(1<<31 | 0x83 << 23 | 0x30000f);
|
||||
float biggerNegative = Float.intBitsToFloat(1<<31 | 0x86 << 23 | 0x30000f);
|
||||
float maxNegative = Float.intBitsToFloat(1<<31 | 0xfe << 23 | 0x7fffff);
|
||||
|
||||
// Special negative
|
||||
float nNan1 = Float.intBitsToFloat(1<<31 | 0xff << 23 | 0x7fffff);
|
||||
float nNan2 = Float.intBitsToFloat(1<<31 | 0xff << 23 | 0x30000f);
|
||||
float nInf = Float.intBitsToFloat(1<<31 | 0xff << 23);
|
||||
float nZero = -0.0f;
|
||||
|
||||
float[] orderedList = new float[] {
|
||||
nInf, maxNegative, biggerNegative, bigNegative, negative, smallNegative, nZero,
|
||||
zero, smallPositive, positive, bigPositive, biggerPositive, maxPositive, inf
|
||||
};
|
||||
|
||||
float[] NaNs = new float[] {
|
||||
nan1, nan2, nNan1, nNan2
|
||||
};
|
||||
|
||||
float[] numberList = new float[] {
|
||||
nInf, maxNegative, biggerNegative, bigNegative, negative, smallNegative, nZero,
|
||||
zero, smallPositive, positive, bigPositive, biggerPositive, maxPositive, inf,
|
||||
nan1, nan2, nNan1, nNan2
|
||||
};
|
||||
|
||||
Random rnd = new Random(11);
|
||||
a = new float[SIZE];
|
||||
b = new float[SIZE];
|
||||
c = new float[SIZE];
|
||||
d = new float[SIZE];
|
||||
e = new float[SIZE];
|
||||
|
||||
for (int i = 0; i < SIZE;) {
|
||||
for (int j = 0; j < numberList.length && i < SIZE; j++, i++) {
|
||||
for (int k = j; k < numberList.length && i < SIZE; k++, i++) {
|
||||
if (rnd.nextBoolean()) {
|
||||
d[i] = numberList[j];
|
||||
e[i] = numberList[k];
|
||||
} else {
|
||||
d[i] = numberList[k];
|
||||
e[i] = numberList[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
a[i] = 850.0f * i + 22222.22f;
|
||||
b[i] = -12345.678f;
|
||||
@ -146,7 +208,7 @@ public class BasicFloatOpTest extends VectorizationTestRunner {
|
||||
public float[] vectorMax() {
|
||||
float[] res = new float[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
res[i] = Math.max(a[i], b[i]);
|
||||
res[i] = Math.max(d[i], e[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@ -157,7 +219,7 @@ public class BasicFloatOpTest extends VectorizationTestRunner {
|
||||
public float[] vectorMin() {
|
||||
float[] res = new float[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
res[i] = Math.min(a[i], b[i]);
|
||||
res[i] = Math.min(d[i], e[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user