mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8348868: AArch64: Add backend support for SelectFromTwoVector
Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Reviewed-by: haosun, aph, sviswanathan, xgong
This commit is contained in:
parent
8ac4a88f3c
commit
2ba8a06f0c
@ -881,6 +881,46 @@ reg_class vectorx_reg(
|
||||
V31, V31_H, V31_J, V31_K
|
||||
);
|
||||
|
||||
// Class for vector register V10
|
||||
reg_class v10_veca_reg(
|
||||
V10, V10_H, V10_J, V10_K
|
||||
);
|
||||
|
||||
// Class for vector register V11
|
||||
reg_class v11_veca_reg(
|
||||
V11, V11_H, V11_J, V11_K
|
||||
);
|
||||
|
||||
// Class for vector register V12
|
||||
reg_class v12_veca_reg(
|
||||
V12, V12_H, V12_J, V12_K
|
||||
);
|
||||
|
||||
// Class for vector register V13
|
||||
reg_class v13_veca_reg(
|
||||
V13, V13_H, V13_J, V13_K
|
||||
);
|
||||
|
||||
// Class for vector register V17
|
||||
reg_class v17_veca_reg(
|
||||
V17, V17_H, V17_J, V17_K
|
||||
);
|
||||
|
||||
// Class for vector register V18
|
||||
reg_class v18_veca_reg(
|
||||
V18, V18_H, V18_J, V18_K
|
||||
);
|
||||
|
||||
// Class for vector register V23
|
||||
reg_class v23_veca_reg(
|
||||
V23, V23_H, V23_J, V23_K
|
||||
);
|
||||
|
||||
// Class for vector register V24
|
||||
reg_class v24_veca_reg(
|
||||
V24, V24_H, V24_J, V24_K
|
||||
);
|
||||
|
||||
// Class for 128 bit register v0
|
||||
reg_class v0_reg(
|
||||
V0, V0_H
|
||||
@ -4969,6 +5009,86 @@ operand vReg()
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V10()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v10_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V11()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v11_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V12()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v12_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V13()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v13_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V17()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v17_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V18()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v18_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V23()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v23_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vReg_V24()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(v24_veca_reg));
|
||||
match(vReg);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand vecA()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectora_reg));
|
||||
|
||||
@ -257,6 +257,28 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_SelectFromTwoVector:
|
||||
// The "tbl" instruction for two vector table is supported only in Neon and SVE2. Return
|
||||
// false if vector length > 16B but supported SVE version < 2.
|
||||
// For vector length of 16B, generate SVE2 "tbl" instruction if SVE2 is supported, else
|
||||
// generate Neon "tbl" instruction to select from two vectors.
|
||||
// This operation is disabled for doubles and longs on machines with SVE < 2 and instead
|
||||
// the default VectorRearrange + VectorBlend is generated because the performance of the default
|
||||
// implementation was better than or equal to the implementation for SelectFromTwoVector.
|
||||
if (UseSVE < 2 && (type2aelembytes(bt) == 8 || length_in_bytes > 16)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Because the SVE2 "tbl" instruction is unpredicated and partial operations cannot be generated
|
||||
// using masks, we disable this operation on machines where length_in_bytes < MaxVectorSize
|
||||
// on that machine with the only exception of 8B vector length. This is because at the time of
|
||||
// writing this, there is no SVE2 machine available with length_in_bytes > 8 and
|
||||
// length_in_bytes < MaxVectorSize to test this operation on (for example - there isn't an
|
||||
// SVE2 machine available with MaxVectorSize = 32 to test a case with length_in_bytes = 16).
|
||||
if (UseSVE == 2 && length_in_bytes > 8 && length_in_bytes < MaxVectorSize) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -7172,3 +7194,71 @@ instruct vexpandBits(vReg dst, vReg src1, vReg src2) %{
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------------- SelectFromTwoVector ------------------------------------
|
||||
// The Neon and SVE2 tbl instruction for two vector lookup requires both the source vectors to be
|
||||
// consecutive. The match rules for SelectFromTwoVector reserve two consecutive vector registers
|
||||
// for src1 and src2.
|
||||
// Four combinations of vector registers for vselect_from_two_vectors are chosen at random
|
||||
// (two from volatile and two from non-volatile set) which gives more freedom to the register
|
||||
// allocator to choose the best pair of source registers at that point.
|
||||
|
||||
instruct vselect_from_two_vectors_10_11(vReg dst, vReg_V10 src1, vReg_V11 src2,
|
||||
vReg index, vReg tmp) %{
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
match(Set dst (SelectFromTwoVector (Binary index src1) src2));
|
||||
format %{ "vselect_from_two_vectors_10_11 $dst, $src1, $src2, $index\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, $index$$FloatRegister,
|
||||
$tmp$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vselect_from_two_vectors_12_13(vReg dst, vReg_V12 src1, vReg_V13 src2,
|
||||
vReg index, vReg tmp) %{
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
match(Set dst (SelectFromTwoVector (Binary index src1) src2));
|
||||
format %{ "vselect_from_two_vectors_12_13 $dst, $src1, $src2, $index\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, $index$$FloatRegister,
|
||||
$tmp$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vselect_from_two_vectors_17_18(vReg dst, vReg_V17 src1, vReg_V18 src2,
|
||||
vReg index, vReg tmp) %{
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
match(Set dst (SelectFromTwoVector (Binary index src1) src2));
|
||||
format %{ "vselect_from_two_vectors_17_18 $dst, $src1, $src2, $index\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, $index$$FloatRegister,
|
||||
$tmp$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vselect_from_two_vectors_23_24(vReg dst, vReg_V23 src1, vReg_V24 src2,
|
||||
vReg index, vReg tmp) %{
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
match(Set dst (SelectFromTwoVector (Binary index src1) src2));
|
||||
format %{ "vselect_from_two_vectors_23_24 $dst, $src1, $src2, $index\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, $index$$FloatRegister,
|
||||
$tmp$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
@ -247,6 +247,28 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_SelectFromTwoVector:
|
||||
// The "tbl" instruction for two vector table is supported only in Neon and SVE2. Return
|
||||
// false if vector length > 16B but supported SVE version < 2.
|
||||
// For vector length of 16B, generate SVE2 "tbl" instruction if SVE2 is supported, else
|
||||
// generate Neon "tbl" instruction to select from two vectors.
|
||||
// This operation is disabled for doubles and longs on machines with SVE < 2 and instead
|
||||
// the default VectorRearrange + VectorBlend is generated because the performance of the default
|
||||
// implementation was better than or equal to the implementation for SelectFromTwoVector.
|
||||
if (UseSVE < 2 && (type2aelembytes(bt) == 8 || length_in_bytes > 16)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Because the SVE2 "tbl" instruction is unpredicated and partial operations cannot be generated
|
||||
// using masks, we disable this operation on machines where length_in_bytes < MaxVectorSize
|
||||
// on that machine with the only exception of 8B vector length. This is because at the time of
|
||||
// writing this, there is no SVE2 machine available with length_in_bytes > 8 and
|
||||
// length_in_bytes < MaxVectorSize to test this operation on (for example - there isn't an
|
||||
// SVE2 machine available with MaxVectorSize = 32 to test a case with length_in_bytes = 16).
|
||||
if (UseSVE == 2 && length_in_bytes > 8 && length_in_bytes < MaxVectorSize) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -5154,3 +5176,34 @@ BITPERM(vcompressBits, CompressBitsV, sve_bext)
|
||||
|
||||
// ----------------------------------- ExpandBitsV ---------------------------------
|
||||
BITPERM(vexpandBits, ExpandBitsV, sve_bdep)
|
||||
|
||||
// ------------------------------------- SelectFromTwoVector ------------------------------------
|
||||
// The Neon and SVE2 tbl instruction for two vector lookup requires both the source vectors to be
|
||||
// consecutive. The match rules for SelectFromTwoVector reserve two consecutive vector registers
|
||||
// for src1 and src2.
|
||||
// Four combinations of vector registers for vselect_from_two_vectors are chosen at random
|
||||
// (two from volatile and two from non-volatile set) which gives more freedom to the register
|
||||
// allocator to choose the best pair of source registers at that point.
|
||||
dnl
|
||||
dnl SELECT_FROM_TWO_VECTORS($1, $2 )
|
||||
dnl SELECT_FROM_TWO_VECTORS(first_reg, second_reg)
|
||||
define(`SELECT_FROM_TWO_VECTORS', `
|
||||
instruct vselect_from_two_vectors_$1_$2(vReg dst, vReg_V$1 src1, vReg_V$2 src2,
|
||||
vReg index, vReg tmp) %{
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
match(Set dst (SelectFromTwoVector (Binary index src1) src2));
|
||||
format %{ "vselect_from_two_vectors_$1_$2 $dst, $src1, $src2, $index\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ select_from_two_vectors($dst$$FloatRegister, $src1$$FloatRegister,
|
||||
$src2$$FloatRegister, $index$$FloatRegister,
|
||||
$tmp$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
SELECT_FROM_TWO_VECTORS(10, 11)
|
||||
SELECT_FROM_TWO_VECTORS(12, 13)
|
||||
SELECT_FROM_TWO_VECTORS(17, 18)
|
||||
SELECT_FROM_TWO_VECTORS(23, 24)
|
||||
|
||||
@ -4231,12 +4231,29 @@ public:
|
||||
sf(imm1, 9, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// SVE programmable table lookup/permute using vector of element indices
|
||||
void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) {
|
||||
private:
|
||||
void _sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, unsigned reg_count, FloatRegister Zm) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
// Only supports one or two vector lookup. One vector lookup was introduced in SVE1
|
||||
// and two vector lookup in SVE2
|
||||
assert(0 < reg_count && reg_count <= 2, "invalid number of registers");
|
||||
|
||||
int op11 = (reg_count == 1) ? 0b10 : 0b01;
|
||||
|
||||
f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
|
||||
f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);
|
||||
f(0b001, 15, 13), f(op11, 12, 11), f(0b0, 10), rf(Zn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
// SVE/SVE2 Programmable table lookup in one or two vector table (zeroing)
|
||||
void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) {
|
||||
_sve_tbl(Zd, T, Zn, 1, Zm);
|
||||
}
|
||||
|
||||
void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn1, FloatRegister Zn2, FloatRegister Zm) {
|
||||
assert(Zn1->successor() == Zn2, "invalid order of registers");
|
||||
_sve_tbl(Zd, T, Zn1, 2, Zm);
|
||||
}
|
||||
|
||||
// Shuffle active elements of vector to the right and fill with zero
|
||||
|
||||
@ -2858,3 +2858,124 @@ void C2_MacroAssembler::reconstruct_frame_pointer(Register rtmp) {
|
||||
add(rfp, sp, framesize - 2 * wordSize);
|
||||
}
|
||||
}
|
||||
|
||||
// Selects elements from two source vectors (src1, src2) based on index values in the index register
|
||||
// using Neon instructions and places it in the destination vector element corresponding to the
|
||||
// index vector element. Each index in the index register must be in the range - [0, 2 * NUM_ELEM),
|
||||
// where NUM_ELEM is the number of BasicType elements per vector.
|
||||
// If idx < NUM_ELEM --> selects src1[idx] (idx is an element of the index register)
|
||||
// Otherwise, selects src2[idx – NUM_ELEM]
|
||||
void C2_MacroAssembler::select_from_two_vectors_neon(FloatRegister dst, FloatRegister src1,
|
||||
FloatRegister src2, FloatRegister index,
|
||||
FloatRegister tmp, unsigned vector_length_in_bytes) {
|
||||
assert_different_registers(dst, src1, src2, tmp);
|
||||
SIMD_Arrangement size = vector_length_in_bytes == 16 ? T16B : T8B;
|
||||
|
||||
if (vector_length_in_bytes == 16) {
|
||||
assert(UseSVE <= 1, "sve must be <= 1");
|
||||
assert(src1->successor() == src2, "Source registers must be ordered");
|
||||
// If the vector length is 16B, then use the Neon "tbl" instruction with two vector table
|
||||
tbl(dst, size, src1, 2, index);
|
||||
} else { // vector length == 8
|
||||
assert(UseSVE == 0, "must be Neon only");
|
||||
// We need to fit both the source vectors (src1, src2) in a 128-bit register because the
|
||||
// Neon "tbl" instruction supports only looking up 16B vectors. We then use the Neon "tbl"
|
||||
// instruction with one vector lookup
|
||||
ins(tmp, D, src1, 0, 0);
|
||||
ins(tmp, D, src2, 1, 0);
|
||||
tbl(dst, size, tmp, 1, index);
|
||||
}
|
||||
}
|
||||
|
||||
// Selects elements from two source vectors (src1, src2) based on index values in the index register
|
||||
// using SVE/SVE2 instructions and places it in the destination vector element corresponding to the
|
||||
// index vector element. Each index in the index register must be in the range - [0, 2 * NUM_ELEM),
|
||||
// where NUM_ELEM is the number of BasicType elements per vector.
|
||||
// If idx < NUM_ELEM --> selects src1[idx] (idx is an element of the index register)
|
||||
// Otherwise, selects src2[idx – NUM_ELEM]
|
||||
void C2_MacroAssembler::select_from_two_vectors_sve(FloatRegister dst, FloatRegister src1,
|
||||
FloatRegister src2, FloatRegister index,
|
||||
FloatRegister tmp, SIMD_RegVariant T,
|
||||
unsigned vector_length_in_bytes) {
|
||||
assert_different_registers(dst, src1, src2, index, tmp);
|
||||
|
||||
if (vector_length_in_bytes == 8) {
|
||||
// We need to fit both the source vectors (src1, src2) in a single vector register because the
|
||||
// SVE "tbl" instruction is unpredicated and works on the entire vector which can lead to
|
||||
// incorrect results if each source vector is only partially filled. We then use the SVE "tbl"
|
||||
// instruction with one vector lookup
|
||||
assert(UseSVE >= 1, "sve must be >= 1");
|
||||
ins(tmp, D, src1, 0, 0);
|
||||
ins(tmp, D, src2, 1, 0);
|
||||
sve_tbl(dst, T, tmp, index);
|
||||
} else { // UseSVE == 2 and vector_length_in_bytes > 8
|
||||
// If the vector length is > 8, then use the SVE2 "tbl" instruction with the two vector table.
|
||||
// The assertion - vector_length_in_bytes == MaxVectorSize ensures that this operation
|
||||
// is not executed on machines where vector_length_in_bytes < MaxVectorSize
|
||||
// with the only exception of 8B vector length.
|
||||
assert(UseSVE == 2 && vector_length_in_bytes == MaxVectorSize, "must be");
|
||||
assert(src1->successor() == src2, "Source registers must be ordered");
|
||||
sve_tbl(dst, T, src1, src2, index);
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::select_from_two_vectors(FloatRegister dst, FloatRegister src1,
|
||||
FloatRegister src2, FloatRegister index,
|
||||
FloatRegister tmp, BasicType bt,
|
||||
unsigned vector_length_in_bytes) {
|
||||
|
||||
assert_different_registers(dst, src1, src2, index, tmp);
|
||||
|
||||
// The cases that can reach this method are -
|
||||
// - UseSVE = 0, vector_length_in_bytes = 8 or 16
|
||||
// - UseSVE = 1, vector_length_in_bytes = 8 or 16
|
||||
// - UseSVE = 2, vector_length_in_bytes >= 8
|
||||
//
|
||||
// SVE/SVE2 tbl instructions are generated when UseSVE = 1 with vector_length_in_bytes = 8
|
||||
// and UseSVE = 2 with vector_length_in_bytes >= 8
|
||||
//
|
||||
// Neon instructions are generated when UseSVE = 0 with vector_length_in_bytes = 8 or 16 and
|
||||
// UseSVE = 1 with vector_length_in_bytes = 16
|
||||
|
||||
if ((UseSVE == 1 && vector_length_in_bytes == 8) || UseSVE == 2) {
|
||||
SIMD_RegVariant T = elemType_to_regVariant(bt);
|
||||
select_from_two_vectors_sve(dst, src1, src2, index, tmp, T, vector_length_in_bytes);
|
||||
return;
|
||||
}
|
||||
|
||||
// The only BasicTypes that can reach here are T_SHORT, T_BYTE, T_INT and T_FLOAT
|
||||
assert(bt != T_DOUBLE && bt != T_LONG, "unsupported basic type");
|
||||
assert(vector_length_in_bytes <= 16, "length_in_bytes must be <= 16");
|
||||
|
||||
bool isQ = vector_length_in_bytes == 16;
|
||||
|
||||
SIMD_Arrangement size1 = isQ ? T16B : T8B;
|
||||
SIMD_Arrangement size2 = esize2arrangement((uint)type2aelembytes(bt), isQ);
|
||||
|
||||
// Neon "tbl" instruction only supports byte tables, so we need to look at chunks of
|
||||
// 2B for selecting shorts or chunks of 4B for selecting ints/floats from the table.
|
||||
// The index values in "index" register are in the range of [0, 2 * NUM_ELEM) where NUM_ELEM
|
||||
// is the number of elements that can fit in a vector. For ex. for T_SHORT with 64-bit vector length,
|
||||
// the indices can range from [0, 8).
|
||||
// As an example with 64-bit vector length and T_SHORT type - let index = [2, 5, 1, 0]
|
||||
// Move a constant 0x02 in every byte of tmp - tmp = [0x0202, 0x0202, 0x0202, 0x0202]
|
||||
// Multiply index vector with tmp to yield - dst = [0x0404, 0x0a0a, 0x0202, 0x0000]
|
||||
// Move a constant 0x0100 in every 2B of tmp - tmp = [0x0100, 0x0100, 0x0100, 0x0100]
|
||||
// Add the multiplied result to the vector in tmp to obtain the byte level
|
||||
// offsets - dst = [0x0504, 0x0b0a, 0x0302, 0x0100]
|
||||
// Use these offsets in the "tbl" instruction to select chunks of 2B.
|
||||
|
||||
if (bt == T_BYTE) {
|
||||
select_from_two_vectors_neon(dst, src1, src2, index, tmp, vector_length_in_bytes);
|
||||
} else {
|
||||
int elem_size = (bt == T_SHORT) ? 2 : 4;
|
||||
uint64_t tbl_offset = (bt == T_SHORT) ? 0x0100u : 0x03020100u;
|
||||
|
||||
mov(tmp, size1, elem_size);
|
||||
mulv(dst, size2, index, tmp);
|
||||
mov(tmp, size2, tbl_offset);
|
||||
addv(dst, size1, dst, tmp); // "dst" now contains the processed index elements
|
||||
// to select a set of 2B/4B
|
||||
select_from_two_vectors_neon(dst, src1, src2, dst, tmp, vector_length_in_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
@ -34,6 +34,15 @@
|
||||
void neon_reduce_logical_helper(int opc, bool sf, Register Rd, Register Rn, Register Rm,
|
||||
enum shift_kind kind = Assembler::LSL, unsigned shift = 0);
|
||||
|
||||
void select_from_two_vectors_neon(FloatRegister dst, FloatRegister src1,
|
||||
FloatRegister src2, FloatRegister index,
|
||||
FloatRegister tmp, unsigned vector_length_in_bytes);
|
||||
|
||||
void select_from_two_vectors_sve(FloatRegister dst, FloatRegister src1,
|
||||
FloatRegister src2, FloatRegister index,
|
||||
FloatRegister tmp, SIMD_RegVariant T,
|
||||
unsigned vector_length_in_bytes);
|
||||
|
||||
public:
|
||||
// jdk.internal.util.ArraysSupport.vectorizedHashCode
|
||||
address arrays_hashcode(Register ary, Register cnt, Register result, FloatRegister vdata0,
|
||||
@ -193,4 +202,9 @@
|
||||
|
||||
void reconstruct_frame_pointer(Register rtmp);
|
||||
|
||||
// Select from a table of two vectors
|
||||
void select_from_two_vectors(FloatRegister dst, FloatRegister src1, FloatRegister src2,
|
||||
FloatRegister index, FloatRegister tmp, BasicType bt,
|
||||
unsigned vector_length_in_bytes);
|
||||
|
||||
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
|
||||
@ -1831,7 +1831,10 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
||||
}
|
||||
break;
|
||||
case Op_SelectFromTwoVector:
|
||||
if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) {
|
||||
if (size_in_bits < 128) {
|
||||
return false;
|
||||
}
|
||||
if ((size_in_bits < 512 && !VM_Version::supports_avx512vl())) {
|
||||
return false;
|
||||
}
|
||||
if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
|
||||
|
||||
@ -2706,6 +2706,9 @@ bool LibraryCallKit::inline_vector_select_from_two_vectors() {
|
||||
index_elem_bt = T_LONG;
|
||||
}
|
||||
|
||||
// Check if the platform requires a VectorLoadShuffle node to be generated
|
||||
bool need_load_shuffle = Matcher::vector_rearrange_requires_load_shuffle(index_elem_bt, num_elem);
|
||||
|
||||
bool lowerSelectFromOp = false;
|
||||
if (!arch_supports_vector(Op_SelectFromTwoVector, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
int cast_vopc = VectorCastNode::opcode(-1, elem_bt, true);
|
||||
@ -2715,7 +2718,7 @@ bool LibraryCallKit::inline_vector_select_from_two_vectors() {
|
||||
!arch_supports_vector(Op_VectorMaskCast, num_elem, elem_bt, VecMaskNotUsed) ||
|
||||
!arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad) ||
|
||||
!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed) ||
|
||||
!arch_supports_vector(Op_VectorLoadShuffle, num_elem, index_elem_bt, VecMaskNotUsed) ||
|
||||
(need_load_shuffle && !arch_supports_vector(Op_VectorLoadShuffle, num_elem, index_elem_bt, VecMaskNotUsed)) ||
|
||||
!arch_supports_vector(Op_Replicate, num_elem, index_elem_bt, VecMaskNotUsed)) {
|
||||
log_if_needed(" ** not supported: opc=%d vlen=%d etype=%s ismask=useload",
|
||||
Op_SelectFromTwoVector, num_elem, type2name(elem_bt));
|
||||
|
||||
@ -2087,6 +2087,10 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["index", "__ sve_index(z7, __ D, r5, 5);", "index\tz7.d, x5, #5"],
|
||||
["cpy", "__ sve_cpy(z7, __ H, p3, r5);", "cpy\tz7.h, p3/m, w5"],
|
||||
["tbl", "__ sve_tbl(z16, __ S, z17, z18);", "tbl\tz16.s, {z17.s}, z18.s"],
|
||||
["tbl", "__ sve_tbl(z16, __ B, z17, z18, z16);", "tbl\tz16.b, {z17.b, z18.b}, z16.b"],
|
||||
["tbl", "__ sve_tbl(z16, __ H, z17, z18, z16);", "tbl\tz16.h, {z17.h, z18.h}, z16.h"],
|
||||
["tbl", "__ sve_tbl(z16, __ S, z17, z18, z16);", "tbl\tz16.s, {z17.s, z18.s}, z16.s"],
|
||||
["tbl", "__ sve_tbl(z16, __ D, z17, z18, z16);", "tbl\tz16.d, {z17.d, z18.d}, z16.d"],
|
||||
["ld1w", "__ sve_ld1w_gather(z15, p0, r5, z16);", "ld1w\t{z15.s}, p0/z, [x5, z16.s, uxtw #2]"],
|
||||
["ld1d", "__ sve_ld1d_gather(z15, p0, r5, z16);", "ld1d\t{z15.d}, p0/z, [x5, z16.d, uxtw #3]"],
|
||||
["st1w", "__ sve_st1w_scatter(z15, p0, r5, z16);", "st1w\t{z15.s}, p0, [x5, z16.s, uxtw #2]"],
|
||||
|
||||
@ -1100,6 +1100,10 @@
|
||||
__ sve_index(z7, __ D, r5, 5); // index z7.d, x5, #5
|
||||
__ sve_cpy(z7, __ H, p3, r5); // cpy z7.h, p3/m, w5
|
||||
__ sve_tbl(z16, __ S, z17, z18); // tbl z16.s, {z17.s}, z18.s
|
||||
__ sve_tbl(z16, __ B, z17, z18, z16); // tbl z16.b, {z17.b, z18.b}, z16.b
|
||||
__ sve_tbl(z16, __ H, z17, z18, z16); // tbl z16.h, {z17.h, z18.h}, z16.h
|
||||
__ sve_tbl(z16, __ S, z17, z18, z16); // tbl z16.s, {z17.s, z18.s}, z16.s
|
||||
__ sve_tbl(z16, __ D, z17, z18, z16); // tbl z16.d, {z17.d, z18.d}, z16.d
|
||||
__ sve_ld1w_gather(z15, p0, r5, z16); // ld1w {z15.s}, p0/z, [x5, z16.s, uxtw #2]
|
||||
__ sve_ld1d_gather(z15, p0, r5, z16); // ld1d {z15.d}, p0/z, [x5, z16.d, uxtw #3]
|
||||
__ sve_st1w_scatter(z15, p0, r5, z16); // st1w {z15.s}, p0, [x5, z16.s, uxtw #2]
|
||||
@ -1438,30 +1442,30 @@
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140004b0, 0x94000000,
|
||||
0x97ffffd4, 0x940004ad, 0x3400000a, 0x34fffa2a,
|
||||
0x3400954a, 0x35000008, 0x35fff9c8, 0x350094e8,
|
||||
0xb400000b, 0xb4fff96b, 0xb400948b, 0xb500001d,
|
||||
0xb5fff91d, 0xb500943d, 0x10000013, 0x10fff8b3,
|
||||
0x100093d3, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36309356, 0x3758000c, 0x375ff7cc, 0x375892ec,
|
||||
0x14000000, 0x17ffffd7, 0x140004b4, 0x94000000,
|
||||
0x97ffffd4, 0x940004b1, 0x3400000a, 0x34fffa2a,
|
||||
0x340095ca, 0x35000008, 0x35fff9c8, 0x35009568,
|
||||
0xb400000b, 0xb4fff96b, 0xb400950b, 0xb500001d,
|
||||
0xb5fff91d, 0xb50094bd, 0x10000013, 0x10fff8b3,
|
||||
0x10009453, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x363093d6, 0x3758000c, 0x375ff7cc, 0x3758936c,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x540090c0, 0x54000001, 0x54fff541, 0x54009061,
|
||||
0x54000002, 0x54fff4e2, 0x54009002, 0x54000002,
|
||||
0x54fff482, 0x54008fa2, 0x54000003, 0x54fff423,
|
||||
0x54008f43, 0x54000003, 0x54fff3c3, 0x54008ee3,
|
||||
0x54000004, 0x54fff364, 0x54008e84, 0x54000005,
|
||||
0x54fff305, 0x54008e25, 0x54000006, 0x54fff2a6,
|
||||
0x54008dc6, 0x54000007, 0x54fff247, 0x54008d67,
|
||||
0x54000008, 0x54fff1e8, 0x54008d08, 0x54000009,
|
||||
0x54fff189, 0x54008ca9, 0x5400000a, 0x54fff12a,
|
||||
0x54008c4a, 0x5400000b, 0x54fff0cb, 0x54008beb,
|
||||
0x5400000c, 0x54fff06c, 0x54008b8c, 0x5400000d,
|
||||
0x54fff00d, 0x54008b2d, 0x5400000e, 0x54ffefae,
|
||||
0x54008ace, 0x5400000f, 0x54ffef4f, 0x54008a6f,
|
||||
0x54009140, 0x54000001, 0x54fff541, 0x540090e1,
|
||||
0x54000002, 0x54fff4e2, 0x54009082, 0x54000002,
|
||||
0x54fff482, 0x54009022, 0x54000003, 0x54fff423,
|
||||
0x54008fc3, 0x54000003, 0x54fff3c3, 0x54008f63,
|
||||
0x54000004, 0x54fff364, 0x54008f04, 0x54000005,
|
||||
0x54fff305, 0x54008ea5, 0x54000006, 0x54fff2a6,
|
||||
0x54008e46, 0x54000007, 0x54fff247, 0x54008de7,
|
||||
0x54000008, 0x54fff1e8, 0x54008d88, 0x54000009,
|
||||
0x54fff189, 0x54008d29, 0x5400000a, 0x54fff12a,
|
||||
0x54008cca, 0x5400000b, 0x54fff0cb, 0x54008c6b,
|
||||
0x5400000c, 0x54fff06c, 0x54008c0c, 0x5400000d,
|
||||
0x54fff00d, 0x54008bad, 0x5400000e, 0x54ffefae,
|
||||
0x54008b4e, 0x5400000f, 0x54ffef4f, 0x54008aef,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
||||
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
||||
@ -1668,7 +1672,8 @@
|
||||
0x65d8a801, 0x65dcac01, 0x655cb241, 0x0520a1e0,
|
||||
0x0521a601, 0x052281e0, 0x05238601, 0x04a14026,
|
||||
0x042244a6, 0x046344a6, 0x04a444a6, 0x04e544a7,
|
||||
0x0568aca7, 0x05b23230, 0x853040af, 0xc5b040af,
|
||||
0x0568aca7, 0x05b23230, 0x05302a30, 0x05702a30,
|
||||
0x05b02a30, 0x05f02a30, 0x853040af, 0xc5b040af,
|
||||
0xe57080af, 0xe5b080af, 0x25034440, 0x254054c4,
|
||||
0x25034640, 0x25415a05, 0x25834440, 0x25c54489,
|
||||
0x250b5d3a, 0x2550dc20, 0x2518e3e1, 0x2518e021,
|
||||
|
||||
@ -2851,6 +2851,36 @@ public class IRNode {
|
||||
fromBeforeRemoveUselessToFinalCode(BLACKHOLE, "Blackhole");
|
||||
}
|
||||
|
||||
public static final String SELECT_FROM_TWO_VECTOR_VB = VECTOR_PREFIX + "SELECT_FROM_TWO_VECTOR_VB" + POSTFIX;
|
||||
static {
|
||||
vectorNode(SELECT_FROM_TWO_VECTOR_VB, "SelectFromTwoVector", TYPE_BYTE);
|
||||
}
|
||||
|
||||
public static final String SELECT_FROM_TWO_VECTOR_VS = VECTOR_PREFIX + "SELECT_FROM_TWO_VECTOR_VS" + POSTFIX;
|
||||
static {
|
||||
vectorNode(SELECT_FROM_TWO_VECTOR_VS, "SelectFromTwoVector", TYPE_SHORT);
|
||||
}
|
||||
|
||||
public static final String SELECT_FROM_TWO_VECTOR_VI = VECTOR_PREFIX + "SELECT_FROM_TWO_VECTOR_VI" + POSTFIX;
|
||||
static {
|
||||
vectorNode(SELECT_FROM_TWO_VECTOR_VI, "SelectFromTwoVector", TYPE_INT);
|
||||
}
|
||||
|
||||
public static final String SELECT_FROM_TWO_VECTOR_VF = VECTOR_PREFIX + "SELECT_FROM_TWO_VECTOR_VF" + POSTFIX;
|
||||
static {
|
||||
vectorNode(SELECT_FROM_TWO_VECTOR_VF, "SelectFromTwoVector", TYPE_FLOAT);
|
||||
}
|
||||
|
||||
public static final String SELECT_FROM_TWO_VECTOR_VD = VECTOR_PREFIX + "SELECT_FROM_TWO_VECTOR_VD" + POSTFIX;
|
||||
static {
|
||||
vectorNode(SELECT_FROM_TWO_VECTOR_VD, "SelectFromTwoVector", TYPE_DOUBLE);
|
||||
}
|
||||
|
||||
public static final String SELECT_FROM_TWO_VECTOR_VL = VECTOR_PREFIX + "SELECT_FROM_TWO_VECTOR_VL" + POSTFIX;
|
||||
static {
|
||||
vectorNode(SELECT_FROM_TWO_VECTOR_VL, "SelectFromTwoVector", TYPE_LONG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility methods to set up IR_NODE_MAPPINGS.
|
||||
*/
|
||||
|
||||
@ -105,6 +105,7 @@ public class IREncodingPrinter {
|
||||
"avx512f",
|
||||
"avx512_fp16",
|
||||
"avx512_vnni",
|
||||
"avx512_vbmi",
|
||||
"bmi2",
|
||||
// AArch64
|
||||
"sha3",
|
||||
|
||||
@ -0,0 +1,486 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.generators.*;
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.incubator.vector.VectorOperators;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import jdk.test.lib.Asserts;
|
||||
import jdk.test.lib.Utils;
|
||||
/**
|
||||
* @test
|
||||
* @bug 8348868
|
||||
* @library /test/lib /
|
||||
* @summary Verify that SelectFromTwoVector IR node is correctly being
|
||||
* generated on aarch64 and x86
|
||||
* @modules jdk.incubator.vector
|
||||
* @run driver compiler.vectorapi.TestSelectFromTwoVectorOp
|
||||
*/
|
||||
|
||||
public class TestSelectFromTwoVectorOp {
|
||||
private static final int SIZE = 1024;
|
||||
private static final Generators random = Generators.G;
|
||||
|
||||
private static byte[] ba;
|
||||
private static byte[] bb;
|
||||
private static byte[] bres;
|
||||
private static byte[][] bindex;
|
||||
|
||||
private static short[] sa;
|
||||
private static short[] sb;
|
||||
private static short[] sres;
|
||||
private static short[][] sindex;
|
||||
|
||||
private static int[] ia;
|
||||
private static int[] ib;
|
||||
private static int[] ires;
|
||||
private static int[][] iindex;
|
||||
|
||||
private static float[] fa;
|
||||
private static float[] fb;
|
||||
private static float[] fres;
|
||||
private static float[][] findex;
|
||||
|
||||
private static long[] la;
|
||||
private static long[] lb;
|
||||
private static long[] lres;
|
||||
private static long[][] lindex;
|
||||
|
||||
private static double[] da;
|
||||
private static double[] db;
|
||||
private static double[] dres;
|
||||
private static double[][] dindex;
|
||||
|
||||
// Stores the possible number of elements that can be
|
||||
// held in various vector sizes/shapes
|
||||
private static int [] nums = {2, 4, 8, 16, 32, 64};
|
||||
|
||||
static {
|
||||
ba = new byte[SIZE];
|
||||
bb = new byte[SIZE];
|
||||
bres = new byte[SIZE];
|
||||
bindex = new byte[4][SIZE];
|
||||
|
||||
sa = new short[SIZE];
|
||||
sb = new short[SIZE];
|
||||
sres = new short[SIZE];
|
||||
sindex = new short[4][SIZE];
|
||||
|
||||
ia = new int[SIZE];
|
||||
ib = new int[SIZE];
|
||||
ires = new int[SIZE];
|
||||
iindex = new int[4][SIZE];
|
||||
|
||||
fa = new float[SIZE];
|
||||
fb = new float[SIZE];
|
||||
fres = new float[SIZE];
|
||||
findex = new float[4][SIZE];
|
||||
|
||||
la = new long[SIZE];
|
||||
lb = new long[SIZE];
|
||||
lres = new long[SIZE];
|
||||
lindex = new long[3][SIZE];
|
||||
|
||||
da = new double[SIZE];
|
||||
db = new double[SIZE];
|
||||
dres = new double[SIZE];
|
||||
dindex = new double[3][SIZE];
|
||||
|
||||
// Populate the indices
|
||||
for (int i = 0; i < bindex.length; i++) {
|
||||
bindex[i] = new byte[SIZE];
|
||||
sindex[i] = new short[SIZE];
|
||||
iindex[i] = new int[SIZE];
|
||||
findex[i] = new float[SIZE];
|
||||
|
||||
// The index array contains indices in the range of [0, vector_length * 2)
|
||||
Generator<Integer> byteGen1 = random.uniformInts(0, (nums[i + 2] * 2) - 1);
|
||||
Generator<Integer> shortGen1 = random.uniformInts(0, (nums[i + 1] * 2) - 1);
|
||||
|
||||
for (int j = 0; j < SIZE; j++) {
|
||||
bindex[i][j] = byteGen1.next().byteValue();
|
||||
sindex[i][j] = shortGen1.next().shortValue();
|
||||
}
|
||||
|
||||
if (i < dindex.length) {
|
||||
dindex[i] = new double[SIZE];
|
||||
lindex[i] = new long[SIZE];
|
||||
|
||||
random.fill(random.uniformDoubles(0, (double) ((nums[i] * 2) - 1)), dindex[i]);
|
||||
random.fill(random.uniformLongs(0, (long) ((nums[i] * 2) - 1)), lindex[i]);
|
||||
}
|
||||
|
||||
random.fill(random.uniformInts(0, (nums[i] * 2) - 1), iindex[i]);
|
||||
random.fill(random.uniformFloats(0, (float)((nums[i] * 2) - 1)), findex[i]);
|
||||
}
|
||||
|
||||
// Populate the sources
|
||||
Generator<Integer> byteGen = random.uniformInts(Byte.MIN_VALUE, Byte.MAX_VALUE);
|
||||
Generator<Integer> shortGen = random.uniformInts(Short.MIN_VALUE, Short.MAX_VALUE);
|
||||
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
ba[i] = byteGen.next().byteValue();
|
||||
bb[i] = byteGen.next().byteValue();
|
||||
|
||||
sa[i] = shortGen.next().shortValue();
|
||||
sb[i] = shortGen.next().shortValue();
|
||||
}
|
||||
|
||||
random.fill(random.ints(), ia);
|
||||
random.fill(random.ints(), ib);
|
||||
random.fill(random.floats(), fa);
|
||||
random.fill(random.floats(), fb);
|
||||
random.fill(random.longs(), la);
|
||||
random.fill(random.longs(), lb);
|
||||
random.fill(random.doubles(), da);
|
||||
random.fill(random.doubles(), db);
|
||||
}
|
||||
|
||||
// Test SelectFromTwoVector operation for Bytes
|
||||
@ForceInline
|
||||
public static void ByteSelectFromTwoVectorKernel(VectorSpecies SPECIES, byte[] ba,
|
||||
byte[] bb, byte[] bindex) {
|
||||
for (int i = 0; i < SPECIES.loopBound(ba.length); i += SPECIES.length()) {
|
||||
ByteVector.fromArray(SPECIES, bindex, i)
|
||||
.selectFrom(ByteVector.fromArray(SPECIES, ba, i),
|
||||
ByteVector.fromArray(SPECIES, bb, i))
|
||||
.intoArray(bres, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeature = {"asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=8"})
|
||||
public static void selectFromTwoVector_Byte64() {
|
||||
ByteSelectFromTwoVectorKernel(ByteVector.SPECIES_64, ba, bb, bindex[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_16, ">0"},
|
||||
applyIfCPUFeature = {"asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_16, ">0"},
|
||||
applyIfCPUFeatureAnd = {"avx512_vbmi", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
public static void selectFromTwoVector_Byte128() {
|
||||
ByteSelectFromTwoVectorKernel(ByteVector.SPECIES_128, ba, bb, bindex[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_32},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_32, ">0"},
|
||||
applyIfCPUFeature = {"sve2", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_32, ">0"},
|
||||
applyIfCPUFeatureAnd = {"avx512_vbmi", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
public static void selectFromTwoVector_Byte256() {
|
||||
ByteSelectFromTwoVectorKernel(ByteVector.SPECIES_256, ba, bb, bindex[2]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_64},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_64, ">0"},
|
||||
applyIfCPUFeature = {"sve2", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VB, IRNode.VECTOR_SIZE_64, ">0"},
|
||||
applyIfCPUFeatureAnd = {"avx512_vbmi", "true", "avx512f", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
public static void selectFromTwoVector_Byte512() {
|
||||
ByteSelectFromTwoVectorKernel(ByteVector.SPECIES_512, ba, bb, bindex[3]);
|
||||
}
|
||||
|
||||
// Test SelectFromTwoVector operation for Shorts
|
||||
@ForceInline
|
||||
public static void ShortSelectFromTwoVectorKernel(VectorSpecies SPECIES, short[] sa,
|
||||
short[] sb, short[] sindex) {
|
||||
for (int i = 0; i < SPECIES.loopBound(sa.length); i += SPECIES.length()) {
|
||||
ShortVector.fromArray(SPECIES, sindex, i)
|
||||
.selectFrom(ShortVector.fromArray(SPECIES, sa, i),
|
||||
ShortVector.fromArray(SPECIES, sb, i))
|
||||
.intoArray(sres, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_4, ">0"},
|
||||
applyIfCPUFeature = {"asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=8"})
|
||||
public static void selectFromTwoVector_Short64() {
|
||||
ShortSelectFromTwoVectorKernel(ShortVector.SPECIES_64, sa, sb, sindex[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeature = {"sve2", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeatureAnd = {"avx512bw", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
public static void selectFromTwoVector_Short128() {
|
||||
ShortSelectFromTwoVectorKernel(ShortVector.SPECIES_128, sa, sb, sindex[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_16},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_16, ">0"},
|
||||
applyIfCPUFeature = {"sve2", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_16, ">0"},
|
||||
applyIfCPUFeatureAnd = {"avx512bw", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
public static void selectFromTwoVector_Short256() {
|
||||
ShortSelectFromTwoVectorKernel(ShortVector.SPECIES_256, sa, sb, sindex[2]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_32},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_32, ">0"},
|
||||
applyIfCPUFeature = {"sve2", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VS, IRNode.VECTOR_SIZE_32, ">0"},
|
||||
applyIfCPUFeatureAnd = {"avx512bw", "true", "avx512f", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
public static void selectFromTwoVector_Short512() {
|
||||
ShortSelectFromTwoVectorKernel(ShortVector.SPECIES_512, sa, sb, sindex[3]);
|
||||
}
|
||||
|
||||
// Test SelectFromTwoVector operation for Ints
|
||||
@ForceInline
|
||||
public static void IntSelectFromTwoVectorKernel(VectorSpecies SPECIES, int[] ia,
|
||||
int[] ib, int[] iindex) {
|
||||
for (int i = 0; i < SPECIES.loopBound(ia.length); i += SPECIES.length()) {
|
||||
IntVector.fromArray(SPECIES, iindex, i)
|
||||
.selectFrom(IntVector.fromArray(SPECIES, ia, i),
|
||||
IntVector.fromArray(SPECIES, ib, i))
|
||||
.intoArray(ires, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VI, IRNode.VECTOR_SIZE_2, ">0"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=8"})
|
||||
public static void selectFromTwoVector_Int64() {
|
||||
IntSelectFromTwoVectorKernel(IntVector.SPECIES_64, ia, ib, iindex[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VI, IRNode.VECTOR_SIZE_4, ">0"},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VI, IRNode.VECTOR_SIZE_4, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
public static void selectFromTwoVector_Int128() {
|
||||
IntSelectFromTwoVectorKernel(IntVector.SPECIES_128, ia, ib, iindex[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VI, IRNode.VECTOR_SIZE_8},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VI, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
public static void selectFromTwoVector_Int256() {
|
||||
IntSelectFromTwoVectorKernel(IntVector.SPECIES_256, ia, ib, iindex[2]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VI, IRNode.VECTOR_SIZE_16},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VI, IRNode.VECTOR_SIZE_16, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512f", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
public static void selectFromTwoVector_Int512() {
|
||||
IntSelectFromTwoVectorKernel(IntVector.SPECIES_512, ia, ib, iindex[3]);
|
||||
}
|
||||
|
||||
// Test SelectFromTwoVector operation for Floats
|
||||
@ForceInline
|
||||
public static void FloatSelectFromTwoVectorKernel(VectorSpecies SPECIES, float[] fa,
|
||||
float[] fb, float[] findex) {
|
||||
for (int i = 0; i < SPECIES.loopBound(ia.length); i += SPECIES.length()) {
|
||||
FloatVector.fromArray(SPECIES, findex, i)
|
||||
.selectFrom(FloatVector.fromArray(SPECIES, fa, i),
|
||||
FloatVector.fromArray(SPECIES, fb, i))
|
||||
.intoArray(fres, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VF, IRNode.VECTOR_SIZE_2, ">0"},
|
||||
applyIfCPUFeatureOr = {"asimd", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=8"})
|
||||
public static void selectFromTwoVector_Float64() {
|
||||
FloatSelectFromTwoVectorKernel(FloatVector.SPECIES_64, fa, fb, findex[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VF, IRNode.VECTOR_SIZE_4, ">0"},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VF, IRNode.VECTOR_SIZE_4, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
public static void selectFromTwoVector_Float128() {
|
||||
FloatSelectFromTwoVectorKernel(FloatVector.SPECIES_128, fa, fb, findex[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VF, IRNode.VECTOR_SIZE_8},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VF, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
public static void selectFromTwoVector_Float256() {
|
||||
FloatSelectFromTwoVectorKernel(FloatVector.SPECIES_256, fa, fb, findex[2]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VF, IRNode.VECTOR_SIZE_16},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VF, IRNode.VECTOR_SIZE_16, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512f", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
public static void selectFromTwoVector_Float512() {
|
||||
FloatSelectFromTwoVectorKernel(FloatVector.SPECIES_512, fa, fb, findex[3]);
|
||||
}
|
||||
|
||||
// Test SelectFromTwoVector operation for Doubles
|
||||
@ForceInline
|
||||
public static void DoubleSelectFromTwoVectorKernel(VectorSpecies SPECIES, double[] da,
|
||||
double[] db, double[] dindex) {
|
||||
for (int i = 0; i < SPECIES.loopBound(ia.length); i += SPECIES.length()) {
|
||||
DoubleVector.fromArray(SPECIES, dindex, i)
|
||||
.selectFrom(DoubleVector.fromArray(SPECIES, da, i),
|
||||
DoubleVector.fromArray(SPECIES, db, i))
|
||||
.intoArray(dres, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VD, IRNode.VECTOR_SIZE_2},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VD, IRNode.VECTOR_SIZE_2, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
public static void selectFromTwoVector_Double128() {
|
||||
DoubleSelectFromTwoVectorKernel(DoubleVector.SPECIES_128, da, db, dindex[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VD, IRNode.VECTOR_SIZE_4},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VD, IRNode.VECTOR_SIZE_4, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
public static void selectFromTwoVector_Double256() {
|
||||
DoubleSelectFromTwoVectorKernel(DoubleVector.SPECIES_256, da, db, dindex[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VD, IRNode.VECTOR_SIZE_8},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VD, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512f", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
public static void selectFromTwoVector_Double512() {
|
||||
DoubleSelectFromTwoVectorKernel(DoubleVector.SPECIES_512, da, db, dindex[2]);
|
||||
}
|
||||
|
||||
// Test SelectFromTwoVector operation for Longs
|
||||
@ForceInline
|
||||
public static void LongSelectFromTwoVectorKernel(VectorSpecies SPECIES, long[] la,
|
||||
long[] lb, long[] lindex) {
|
||||
for (int i = 0; i < SPECIES.loopBound(ia.length); i += SPECIES.length()) {
|
||||
LongVector.fromArray(SPECIES, lindex, i)
|
||||
.selectFrom(LongVector.fromArray(SPECIES, la, i),
|
||||
LongVector.fromArray(SPECIES, lb, i))
|
||||
.intoArray(lres, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VL, IRNode.VECTOR_SIZE_2},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VL, IRNode.VECTOR_SIZE_2, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"})
|
||||
public static void selectFromTwoVector_Long128() {
|
||||
LongSelectFromTwoVectorKernel(LongVector.SPECIES_128, la, lb, lindex[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VL, IRNode.VECTOR_SIZE_4},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VL, IRNode.VECTOR_SIZE_4, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512vl", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"})
|
||||
public static void selectFromTwoVector_Long256() {
|
||||
LongSelectFromTwoVectorKernel(LongVector.SPECIES_256, la, lb, lindex[1]);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = {IRNode.SELECT_FROM_TWO_VECTOR_VL, IRNode.VECTOR_SIZE_8},
|
||||
applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
@IR(counts = {IRNode.SELECT_FROM_TWO_VECTOR_VL, IRNode.VECTOR_SIZE_8, ">0"},
|
||||
applyIfCPUFeatureOr = {"sve2", "true", "avx512f", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=64"})
|
||||
public static void selectFromTwoVector_Long512() {
|
||||
LongSelectFromTwoVectorKernel(LongVector.SPECIES_512, la, lb, lindex[2]);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user