mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8363989: AArch64: Add missing backend support of VectorAPI expand operation
Reviewed-by: epeter, eliu, xgong
This commit is contained in:
parent
cc65836d00
commit
e6f8450d95
@ -216,11 +216,6 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_ExpandV:
|
||||
if (UseSVE < 2 || is_subword_type(bt)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_VectorMaskToLong:
|
||||
if (UseSVE > 0 && vlen > 64) {
|
||||
return false;
|
||||
@ -7113,10 +7108,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg,
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
|
||||
instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (ExpandV src mask));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
|
||||
format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
|
||||
__ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{
|
||||
predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4));
|
||||
match(Set dst (ExpandV src pg));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
|
||||
format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
|
||||
__ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4);
|
||||
match(Set dst (ExpandV src pg));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vexpand $dst, $pg, $src" %}
|
||||
format %{ "vexpand_sve2_SD $dst, $src, $pg" %}
|
||||
ins_encode %{
|
||||
// Example input: src = 1 2 3 4 5 6 7 8
|
||||
// pg = 1 0 0 1 1 0 1 1
|
||||
@ -7127,7 +7151,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
|
||||
// for TBL whose value is used to select the indexed element from src vector.
|
||||
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
// dst = 0 0 0 0 0 0 0 0
|
||||
__ sve_dup($dst$$FloatRegister, size, 0);
|
||||
|
||||
@ -206,11 +206,6 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_ExpandV:
|
||||
if (UseSVE < 2 || is_subword_type(bt)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_VectorMaskToLong:
|
||||
if (UseSVE > 0 && vlen > 64) {
|
||||
return false;
|
||||
@ -5101,10 +5096,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg,
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
|
||||
instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{
|
||||
predicate(UseSVE == 0);
|
||||
match(Set dst (ExpandV src mask));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
|
||||
format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
|
||||
__ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{
|
||||
predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4));
|
||||
match(Set dst (ExpandV src pg));
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
|
||||
format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
|
||||
__ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{
|
||||
predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4);
|
||||
match(Set dst (ExpandV src pg));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vexpand $dst, $pg, $src" %}
|
||||
format %{ "vexpand_sve2_SD $dst, $src, $pg" %}
|
||||
ins_encode %{
|
||||
// Example input: src = 1 2 3 4 5 6 7 8
|
||||
// pg = 1 0 0 1 1 0 1 1
|
||||
@ -5115,7 +5139,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
|
||||
// for TBL whose value is used to select the indexed element from src vector.
|
||||
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
|
||||
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
|
||||
// dst = 0 0 0 0 0 0 0 0
|
||||
__ sve_dup($dst$$FloatRegister, size, 0);
|
||||
|
||||
@ -4068,6 +4068,13 @@ public:
|
||||
INSN(sve_brkb, 0b10); // Break before first true condition
|
||||
#undef INSN
|
||||
|
||||
// SVE move prefix (unpredicated)
|
||||
void sve_movprfx(FloatRegister Zd, FloatRegister Zn) {
|
||||
starti;
|
||||
f(0b00000100, 31, 24), f(0b00, 23, 22), f(0b1, 21), f(0b00000, 20, 16);
|
||||
f(0b101111, 15, 10), rf(Zn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// Element count and increment scalar (SVE)
|
||||
#define INSN(NAME, TYPE) \
|
||||
void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \
|
||||
|
||||
@ -2771,3 +2771,90 @@ void C2_MacroAssembler::select_from_two_vectors(FloatRegister dst, FloatRegister
|
||||
select_from_two_vectors_neon(dst, src1, src2, dst, tmp, vector_length_in_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
// Vector expand implementation. Elements from the src vector are expanded into
|
||||
// the dst vector under the control of the vector mask.
|
||||
// Since there are no native instructions directly corresponding to expand before
|
||||
// SVE2p2, the following implementations mainly leverages the TBL instruction to
|
||||
// implement expand. To compute the index input for TBL, the prefix sum algorithm
|
||||
// (https://en.wikipedia.org/wiki/Prefix_sum) is used. The same algorithm is used
|
||||
// for NEON and SVE, but with different instructions where appropriate.
|
||||
|
||||
// Vector expand implementation for NEON.
|
||||
//
|
||||
// An example of 128-bit Byte vector:
|
||||
// Data direction: high <== low
|
||||
// Input:
|
||||
// src = g f e d c b a 9 8 7 6 5 4 3 2 1
|
||||
// mask = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1
|
||||
// Expected result:
|
||||
// dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
|
||||
void C2_MacroAssembler::vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask,
|
||||
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
|
||||
int vector_length_in_bytes) {
|
||||
assert(vector_length_in_bytes <= 16, "the vector length in bytes for NEON must be <= 16");
|
||||
assert_different_registers(dst, src, mask, tmp1, tmp2);
|
||||
// Since the TBL instruction only supports byte table, we need to
|
||||
// compute indices in byte type for all types.
|
||||
SIMD_Arrangement size = vector_length_in_bytes == 16 ? T16B : T8B;
|
||||
// tmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
dup(tmp1, size, zr);
|
||||
// dst = 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
|
||||
negr(dst, size, mask);
|
||||
// Calculate vector index for TBL with prefix sum algorithm.
|
||||
// dst = 8 8 8 7 6 6 6 5 4 4 4 3 2 2 2 1
|
||||
for (int i = 1; i < vector_length_in_bytes; i <<= 1) {
|
||||
ext(tmp2, size, tmp1, dst, vector_length_in_bytes - i);
|
||||
addv(dst, size, tmp2, dst);
|
||||
}
|
||||
// tmp2 = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1
|
||||
orr(tmp2, size, mask, mask);
|
||||
// tmp2 = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
|
||||
bsl(tmp2, size, dst, tmp1);
|
||||
// tmp1 = 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
|
||||
movi(tmp1, size, 1);
|
||||
// dst = -1 -1 7 6 -1 -1 5 4 -1 -1 3 2 -1 -1 1 0
|
||||
subv(dst, size, tmp2, tmp1);
|
||||
// dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
|
||||
tbl(dst, size, src, 1, dst);
|
||||
}
|
||||
|
||||
// Vector expand implementation for SVE.
|
||||
//
|
||||
// An example of 128-bit Short vector:
|
||||
// Data direction: high <== low
|
||||
// Input:
|
||||
// src = gf ed cb a9 87 65 43 21
|
||||
// pg = 00 01 00 01 00 01 00 01
|
||||
// Expected result:
|
||||
// dst = 00 87 00 65 00 43 00 21
|
||||
void C2_MacroAssembler::vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
|
||||
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
|
||||
int vector_length_in_bytes) {
|
||||
assert(UseSVE > 0, "expand implementation only for SVE");
|
||||
assert_different_registers(dst, src, tmp1, tmp2);
|
||||
SIMD_RegVariant size = elemType_to_regVariant(bt);
|
||||
|
||||
// tmp1 = 00 00 00 00 00 00 00 00
|
||||
sve_dup(tmp1, size, 0);
|
||||
sve_movprfx(tmp2, tmp1);
|
||||
// tmp2 = 00 01 00 01 00 01 00 01
|
||||
sve_cpy(tmp2, size, pg, 1, true);
|
||||
// Calculate vector index for TBL with prefix sum algorithm.
|
||||
// tmp2 = 04 04 03 03 02 02 01 01
|
||||
for (int i = type2aelembytes(bt); i < vector_length_in_bytes; i <<= 1) {
|
||||
sve_movprfx(dst, tmp1);
|
||||
// The EXT instruction operates on the full-width sve register. The correct
|
||||
// index calculation method is:
|
||||
// vector_length_in_bytes - i + MaxVectorSize - vector_length_in_bytes =>
|
||||
// MaxVectorSize - i.
|
||||
sve_ext(dst, tmp2, MaxVectorSize - i);
|
||||
sve_add(tmp2, size, dst, tmp2);
|
||||
}
|
||||
// dst = 00 04 00 03 00 02 00 01
|
||||
sve_sel(dst, size, pg, tmp2, tmp1);
|
||||
// dst = -1 03 -1 02 -1 01 -1 00
|
||||
sve_sub(dst, size, 1);
|
||||
// dst = 00 87 00 65 00 43 00 21
|
||||
sve_tbl(dst, size, src, dst);
|
||||
}
|
||||
@ -204,4 +204,10 @@
|
||||
FloatRegister index, FloatRegister tmp, BasicType bt,
|
||||
unsigned vector_length_in_bytes);
|
||||
|
||||
void vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask,
|
||||
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
|
||||
int vector_length_in_bytes);
|
||||
void vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
|
||||
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
|
||||
int vector_length_in_bytes);
|
||||
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
|
||||
|
||||
@ -2135,6 +2135,7 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"],
|
||||
["compact", "__ sve_compact(z16, __ S, z16, p1);", "compact\tz16.s, p1, z16.s"],
|
||||
["compact", "__ sve_compact(z16, __ D, z16, p1);", "compact\tz16.d, p1, z16.d"],
|
||||
["movprfx", "__ sve_movprfx(z17, z1);", "movprfx\tz17, z1"],
|
||||
["ext", "__ sve_ext(z17, z16, 63);", "ext\tz17.b, z17.b, z16.b, #63"],
|
||||
["facgt", "__ sve_fac(Assembler::GT, p1, __ H, p2, z4, z5);", "facgt\tp1.h, p2/z, z4.h, z5.h"],
|
||||
["facgt", "__ sve_fac(Assembler::GT, p1, __ S, p2, z4, z5);", "facgt\tp1.s, p2/z, z4.s, z5.s"],
|
||||
|
||||
@ -1148,6 +1148,7 @@
|
||||
__ sve_punpkhi(p1, p0); // punpkhi p1.h, p0.b
|
||||
__ sve_compact(z16, __ S, z16, p1); // compact z16.s, p1, z16.s
|
||||
__ sve_compact(z16, __ D, z16, p1); // compact z16.d, p1, z16.d
|
||||
__ sve_movprfx(z17, z1); // movprfx z17, z1
|
||||
__ sve_ext(z17, z16, 63); // ext z17.b, z17.b, z16.b, #63
|
||||
__ sve_fac(Assembler::GT, p1, __ H, p2, z4, z5); // facgt p1.h, p2/z, z4.h, z5.h
|
||||
__ sve_fac(Assembler::GT, p1, __ S, p2, z4, z5); // facgt p1.s, p2/z, z4.s, z5.s
|
||||
@ -1444,30 +1445,30 @@
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140004b6, 0x94000000,
|
||||
0x97ffffd4, 0x940004b3, 0x3400000a, 0x34fffa2a,
|
||||
0x3400960a, 0x35000008, 0x35fff9c8, 0x350095a8,
|
||||
0xb400000b, 0xb4fff96b, 0xb400954b, 0xb500001d,
|
||||
0xb5fff91d, 0xb50094fd, 0x10000013, 0x10fff8b3,
|
||||
0x10009493, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36309416, 0x3758000c, 0x375ff7cc, 0x375893ac,
|
||||
0x14000000, 0x17ffffd7, 0x140004b7, 0x94000000,
|
||||
0x97ffffd4, 0x940004b4, 0x3400000a, 0x34fffa2a,
|
||||
0x3400962a, 0x35000008, 0x35fff9c8, 0x350095c8,
|
||||
0xb400000b, 0xb4fff96b, 0xb400956b, 0xb500001d,
|
||||
0xb5fff91d, 0xb500951d, 0x10000013, 0x10fff8b3,
|
||||
0x100094b3, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36309436, 0x3758000c, 0x375ff7cc, 0x375893cc,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x54009180, 0x54000001, 0x54fff541, 0x54009121,
|
||||
0x54000002, 0x54fff4e2, 0x540090c2, 0x54000002,
|
||||
0x54fff482, 0x54009062, 0x54000003, 0x54fff423,
|
||||
0x54009003, 0x54000003, 0x54fff3c3, 0x54008fa3,
|
||||
0x54000004, 0x54fff364, 0x54008f44, 0x54000005,
|
||||
0x54fff305, 0x54008ee5, 0x54000006, 0x54fff2a6,
|
||||
0x54008e86, 0x54000007, 0x54fff247, 0x54008e27,
|
||||
0x54000008, 0x54fff1e8, 0x54008dc8, 0x54000009,
|
||||
0x54fff189, 0x54008d69, 0x5400000a, 0x54fff12a,
|
||||
0x54008d0a, 0x5400000b, 0x54fff0cb, 0x54008cab,
|
||||
0x5400000c, 0x54fff06c, 0x54008c4c, 0x5400000d,
|
||||
0x54fff00d, 0x54008bed, 0x5400000e, 0x54ffefae,
|
||||
0x54008b8e, 0x5400000f, 0x54ffef4f, 0x54008b2f,
|
||||
0x540091a0, 0x54000001, 0x54fff541, 0x54009141,
|
||||
0x54000002, 0x54fff4e2, 0x540090e2, 0x54000002,
|
||||
0x54fff482, 0x54009082, 0x54000003, 0x54fff423,
|
||||
0x54009023, 0x54000003, 0x54fff3c3, 0x54008fc3,
|
||||
0x54000004, 0x54fff364, 0x54008f64, 0x54000005,
|
||||
0x54fff305, 0x54008f05, 0x54000006, 0x54fff2a6,
|
||||
0x54008ea6, 0x54000007, 0x54fff247, 0x54008e47,
|
||||
0x54000008, 0x54fff1e8, 0x54008de8, 0x54000009,
|
||||
0x54fff189, 0x54008d89, 0x5400000a, 0x54fff12a,
|
||||
0x54008d2a, 0x5400000b, 0x54fff0cb, 0x54008ccb,
|
||||
0x5400000c, 0x54fff06c, 0x54008c6c, 0x5400000d,
|
||||
0x54fff00d, 0x54008c0d, 0x5400000e, 0x54ffefae,
|
||||
0x54008bae, 0x5400000f, 0x54ffef4f, 0x54008b4f,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
||||
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
||||
@ -1686,66 +1687,66 @@
|
||||
0x25d8e184, 0x2518e407, 0x05214800, 0x05614800,
|
||||
0x05a14800, 0x05e14800, 0x05214c00, 0x05614c00,
|
||||
0x05a14c00, 0x05e14c00, 0x05304001, 0x05314001,
|
||||
0x05a18610, 0x05e18610, 0x05271e11, 0x6545e891,
|
||||
0x6585e891, 0x65c5e891, 0x6545c891, 0x6585c891,
|
||||
0x65c5c891, 0x45b0c210, 0x45f1c231, 0x1e601000,
|
||||
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
|
||||
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
|
||||
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
|
||||
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
|
||||
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
|
||||
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
|
||||
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
|
||||
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8268267,
|
||||
0xf82d023c, 0xf8301046, 0xf83d2083, 0xf8263290,
|
||||
0xf82d528c, 0xf8284299, 0xf8337160, 0xf8386286,
|
||||
0xf8bf820e, 0xf8a600e0, 0xf8af1353, 0xf8a922ea,
|
||||
0xf8b53396, 0xf8a251e3, 0xf8b340f4, 0xf8a470fd,
|
||||
0xf8a06209, 0xf8f48097, 0xf8f002ea, 0xf8eb10d9,
|
||||
0xf8ff21b0, 0xf8f7302c, 0xf8ee52a9, 0xf8f041fa,
|
||||
0xf8e471e4, 0xf8e863c6, 0xf864823d, 0xf87d013a,
|
||||
0xf86f1162, 0xf87d20e3, 0xf86132bb, 0xf870510e,
|
||||
0xf8704336, 0xf86572b4, 0xf8706217, 0xb83e8294,
|
||||
0xb8200264, 0xb8381284, 0xb8242358, 0xb8333102,
|
||||
0xb828530e, 0xb83042df, 0xb824703f, 0xb82a6194,
|
||||
0xb8a080e9, 0xb8b80090, 0xb8bb1146, 0xb8bb21b8,
|
||||
0xb8b032df, 0xb8b653f4, 0xb8bd41c9, 0xb8b47287,
|
||||
0xb8bc6169, 0xb8ee828c, 0xb8e10138, 0xb8f3126d,
|
||||
0xb8f020b0, 0xb8e03183, 0xb8e851ef, 0xb8f041e4,
|
||||
0xb8fe7005, 0xb8ea6376, 0xb8638120, 0xb873015d,
|
||||
0xb8781284, 0xb86723b8, 0xb86e3175, 0xb87b51ed,
|
||||
0xb87f41d1, 0xb863721e, 0xb87660f4, 0xce216874,
|
||||
0xce104533, 0xce648c15, 0xce8e3302, 0xce6e82ab,
|
||||
0xce6c87d1, 0xcec08063, 0xce638937, 0x25e0c358,
|
||||
0x25a1c7d3, 0x0580785a, 0x05426328, 0x05009892,
|
||||
0x25a0cc29, 0x2561cec8, 0x058044b3, 0x05401c99,
|
||||
0x05006b49, 0x25e0d6f7, 0x2561c528, 0x0583c8bc,
|
||||
0x0542522f, 0x05001ec0, 0x25e0de65, 0x25a1c113,
|
||||
0x05803cad, 0x0540f3c0, 0x0500ab15, 0x2560c28c,
|
||||
0x2561d7c0, 0x05801ed7, 0x0542633b, 0x05003696,
|
||||
0x2560d4b4, 0x25e1c918, 0x058021ff, 0x05400e15,
|
||||
0x0500f3de, 0x0473025a, 0x04bd05ab, 0x658e0025,
|
||||
0x658a08e2, 0x659a0493, 0x043e1062, 0x04f418b4,
|
||||
0x046d15bd, 0x04611fce, 0x04d6a07c, 0x04001929,
|
||||
0x041a09da, 0x04d098f4, 0x04db10d4, 0x0459a3ad,
|
||||
0x041aa029, 0x041919fb, 0x04d39e24, 0x04118302,
|
||||
0x04101dba, 0x04d7ae16, 0x04dea571, 0x04180210,
|
||||
0x05e786fc, 0x05e4915c, 0x04881cf1, 0x044a0f04,
|
||||
0x04090969, 0x048b16c4, 0x044101e4, 0x04dcbf44,
|
||||
0x65809745, 0x658d833f, 0x65c68468, 0x65c79b07,
|
||||
0x65829e38, 0x049dafca, 0x6582bba8, 0x65c0b7ff,
|
||||
0x65c1b4e0, 0x658dbadd, 0x65819a9d, 0x65ed9246,
|
||||
0x65b30815, 0x65e6263c, 0x65eebb94, 0x65bad14e,
|
||||
0x65efe178, 0x65fc5697, 0x65e07f14, 0x040c55a6,
|
||||
0x04977f4d, 0x043d3046, 0x04b733a0, 0x046830a4,
|
||||
0x04ed322d, 0x05686948, 0x05bd6c13, 0x65c88ef0,
|
||||
0x450db3d7, 0x4540b6d9, 0x043e3979, 0x445896ce,
|
||||
0x445a9005, 0x44d98069, 0x445b87ae, 0x04da348e,
|
||||
0x04982edb, 0x0499397f, 0x0408338c, 0x04ca309c,
|
||||
0x65c721e6, 0x65c63641, 0x65982882, 0x04812b8b,
|
||||
0x0e251083, 0x4e3712d5, 0x0e61101f, 0x4e6d118b,
|
||||
0x0eba1338, 0x4eb712d5, 0x2e31120f, 0x6e2e11ac,
|
||||
0x2e6810e6, 0x6e6f11cd, 0x2eaa1128, 0x6eb1120f,
|
||||
|
||||
0x05a18610, 0x05e18610, 0x0420bc31, 0x05271e11,
|
||||
0x6545e891, 0x6585e891, 0x65c5e891, 0x6545c891,
|
||||
0x6585c891, 0x65c5c891, 0x45b0c210, 0x45f1c231,
|
||||
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
|
||||
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
|
||||
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
|
||||
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
|
||||
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
|
||||
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
|
||||
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
|
||||
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
|
||||
0xf8268267, 0xf82d023c, 0xf8301046, 0xf83d2083,
|
||||
0xf8263290, 0xf82d528c, 0xf8284299, 0xf8337160,
|
||||
0xf8386286, 0xf8bf820e, 0xf8a600e0, 0xf8af1353,
|
||||
0xf8a922ea, 0xf8b53396, 0xf8a251e3, 0xf8b340f4,
|
||||
0xf8a470fd, 0xf8a06209, 0xf8f48097, 0xf8f002ea,
|
||||
0xf8eb10d9, 0xf8ff21b0, 0xf8f7302c, 0xf8ee52a9,
|
||||
0xf8f041fa, 0xf8e471e4, 0xf8e863c6, 0xf864823d,
|
||||
0xf87d013a, 0xf86f1162, 0xf87d20e3, 0xf86132bb,
|
||||
0xf870510e, 0xf8704336, 0xf86572b4, 0xf8706217,
|
||||
0xb83e8294, 0xb8200264, 0xb8381284, 0xb8242358,
|
||||
0xb8333102, 0xb828530e, 0xb83042df, 0xb824703f,
|
||||
0xb82a6194, 0xb8a080e9, 0xb8b80090, 0xb8bb1146,
|
||||
0xb8bb21b8, 0xb8b032df, 0xb8b653f4, 0xb8bd41c9,
|
||||
0xb8b47287, 0xb8bc6169, 0xb8ee828c, 0xb8e10138,
|
||||
0xb8f3126d, 0xb8f020b0, 0xb8e03183, 0xb8e851ef,
|
||||
0xb8f041e4, 0xb8fe7005, 0xb8ea6376, 0xb8638120,
|
||||
0xb873015d, 0xb8781284, 0xb86723b8, 0xb86e3175,
|
||||
0xb87b51ed, 0xb87f41d1, 0xb863721e, 0xb87660f4,
|
||||
0xce216874, 0xce104533, 0xce648c15, 0xce8e3302,
|
||||
0xce6e82ab, 0xce6c87d1, 0xcec08063, 0xce638937,
|
||||
0x25e0c358, 0x25a1c7d3, 0x0580785a, 0x05426328,
|
||||
0x05009892, 0x25a0cc29, 0x2561cec8, 0x058044b3,
|
||||
0x05401c99, 0x05006b49, 0x25e0d6f7, 0x2561c528,
|
||||
0x0583c8bc, 0x0542522f, 0x05001ec0, 0x25e0de65,
|
||||
0x25a1c113, 0x05803cad, 0x0540f3c0, 0x0500ab15,
|
||||
0x2560c28c, 0x2561d7c0, 0x05801ed7, 0x0542633b,
|
||||
0x05003696, 0x2560d4b4, 0x25e1c918, 0x058021ff,
|
||||
0x05400e15, 0x0500f3de, 0x0473025a, 0x04bd05ab,
|
||||
0x658e0025, 0x658a08e2, 0x659a0493, 0x043e1062,
|
||||
0x04f418b4, 0x046d15bd, 0x04611fce, 0x04d6a07c,
|
||||
0x04001929, 0x041a09da, 0x04d098f4, 0x04db10d4,
|
||||
0x0459a3ad, 0x041aa029, 0x041919fb, 0x04d39e24,
|
||||
0x04118302, 0x04101dba, 0x04d7ae16, 0x04dea571,
|
||||
0x04180210, 0x05e786fc, 0x05e4915c, 0x04881cf1,
|
||||
0x044a0f04, 0x04090969, 0x048b16c4, 0x044101e4,
|
||||
0x04dcbf44, 0x65809745, 0x658d833f, 0x65c68468,
|
||||
0x65c79b07, 0x65829e38, 0x049dafca, 0x6582bba8,
|
||||
0x65c0b7ff, 0x65c1b4e0, 0x658dbadd, 0x65819a9d,
|
||||
0x65ed9246, 0x65b30815, 0x65e6263c, 0x65eebb94,
|
||||
0x65bad14e, 0x65efe178, 0x65fc5697, 0x65e07f14,
|
||||
0x040c55a6, 0x04977f4d, 0x043d3046, 0x04b733a0,
|
||||
0x046830a4, 0x04ed322d, 0x05686948, 0x05bd6c13,
|
||||
0x65c88ef0, 0x450db3d7, 0x4540b6d9, 0x043e3979,
|
||||
0x445896ce, 0x445a9005, 0x44d98069, 0x445b87ae,
|
||||
0x04da348e, 0x04982edb, 0x0499397f, 0x0408338c,
|
||||
0x04ca309c, 0x65c721e6, 0x65c63641, 0x65982882,
|
||||
0x04812b8b, 0x0e251083, 0x4e3712d5, 0x0e61101f,
|
||||
0x4e6d118b, 0x0eba1338, 0x4eb712d5, 0x2e31120f,
|
||||
0x6e2e11ac, 0x2e6810e6, 0x6e6f11cd, 0x2eaa1128,
|
||||
0x6eb1120f,
|
||||
};
|
||||
// END Generated code -- do not edit
|
||||
|
||||
@ -2755,6 +2755,36 @@ public class IRNode {
|
||||
vectorNode(EXPAND_BITS_VL, "ExpandBitsV", TYPE_LONG);
|
||||
}
|
||||
|
||||
public static final String EXPAND_VB = VECTOR_PREFIX + "EXPAND_VB" + POSTFIX;
|
||||
static {
|
||||
vectorNode(EXPAND_VB, "ExpandV", TYPE_BYTE);
|
||||
}
|
||||
|
||||
public static final String EXPAND_VS = VECTOR_PREFIX + "EXPAND_VS" + POSTFIX;
|
||||
static {
|
||||
vectorNode(EXPAND_VS, "ExpandV", TYPE_SHORT);
|
||||
}
|
||||
|
||||
public static final String EXPAND_VI = VECTOR_PREFIX + "EXPAND_VI" + POSTFIX;
|
||||
static {
|
||||
vectorNode(EXPAND_VI, "ExpandV", TYPE_INT);
|
||||
}
|
||||
|
||||
public static final String EXPAND_VL = VECTOR_PREFIX + "EXPAND_VL" + POSTFIX;
|
||||
static {
|
||||
vectorNode(EXPAND_VL, "ExpandV", TYPE_LONG);
|
||||
}
|
||||
|
||||
public static final String EXPAND_VF = VECTOR_PREFIX + "EXPAND_VF" + POSTFIX;
|
||||
static {
|
||||
vectorNode(EXPAND_VF, "ExpandV", TYPE_FLOAT);
|
||||
}
|
||||
|
||||
public static final String EXPAND_VD = VECTOR_PREFIX + "EXPAND_VD" + POSTFIX;
|
||||
static {
|
||||
vectorNode(EXPAND_VD, "ExpandV", TYPE_DOUBLE);
|
||||
}
|
||||
|
||||
public static final String Z_LOAD_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "Z_LOAD_P_WITH_BARRIER_FLAG" + POSTFIX;
|
||||
static {
|
||||
String regex = START + "zLoadP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END;
|
||||
|
||||
198
test/hotspot/jtreg/compiler/vectorapi/VectorExpandTest.java
Normal file
198
test/hotspot/jtreg/compiler/vectorapi/VectorExpandTest.java
Normal file
@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.generators.*;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.test.lib.Asserts;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8363989
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @summary AArch64: Add missing backend support of VectorAPI expand operation
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver compiler.vectorapi.VectorExpandTest
|
||||
*/
|
||||
|
||||
public class VectorExpandTest {
|
||||
static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_MAX;
|
||||
static final int LENGTH = 512;
|
||||
static final Generators RD = Generators.G;
|
||||
static byte[] ba, bb;
|
||||
static short[] sa, sb;
|
||||
static int[] ia, ib;
|
||||
static long[] la, lb;
|
||||
static float[] fa, fb;
|
||||
static double[] da, db;
|
||||
static boolean[] ma;
|
||||
|
||||
static {
|
||||
ba = new byte[LENGTH];
|
||||
bb = new byte[LENGTH];
|
||||
sa = new short[LENGTH];
|
||||
sb = new short[LENGTH];
|
||||
ia = new int[LENGTH];
|
||||
ib = new int[LENGTH];
|
||||
la = new long[LENGTH];
|
||||
lb = new long[LENGTH];
|
||||
fa = new float[LENGTH];
|
||||
fb = new float[LENGTH];
|
||||
da = new double[LENGTH];
|
||||
db = new double[LENGTH];
|
||||
ma = new boolean[LENGTH];
|
||||
|
||||
Generator<Integer> iGen = RD.ints();
|
||||
Generator<Long> lGen = RD.longs();
|
||||
Generator<Float> fGen = RD.floats();
|
||||
Generator<Double> dGen = RD.doubles();
|
||||
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ba[i] = iGen.next().byteValue();
|
||||
sa[i] = iGen.next().shortValue();
|
||||
ma[i] = iGen.next() % 2 == 0;
|
||||
}
|
||||
RD.fill(iGen, ia);
|
||||
RD.fill(lGen, la);
|
||||
RD.fill(fGen, fa);
|
||||
RD.fill(dGen, da);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.EXPAND_VB, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
|
||||
public static void testVectorExpandByte(ByteVector av, VectorMask<Byte> m) {
|
||||
av.expand(m).intoArray(bb, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testVectorExpandByte")
|
||||
public static void testVectorExpandByte_runner() {
|
||||
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||
VectorMask<Byte> m = VectorMask.fromArray(B_SPECIES, ma, 0);
|
||||
testVectorExpandByte(av, m);
|
||||
int index = 0;
|
||||
for (int i = 0; i < m.length(); i++) {
|
||||
Asserts.assertEquals(m.laneIsSet(i) ? ba[index++] : (byte)0, bb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.EXPAND_VS, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
|
||||
public static void testVectorExpandShort(ShortVector av, VectorMask<Short> m) {
|
||||
av.expand(m).intoArray(sb, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testVectorExpandShort")
|
||||
public static void testVectorExpandShort_runner() {
|
||||
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||
VectorMask<Short> m = VectorMask.fromArray(S_SPECIES, ma, 0);
|
||||
testVectorExpandShort(av, m);
|
||||
int index = 0;
|
||||
for (int i = 0; i < m.length(); i++) {
|
||||
Asserts.assertEquals(m.laneIsSet(i) ? sa[index++] : (short)0, sb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.EXPAND_VI, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
|
||||
public static void testVectorExpandInt(IntVector av, VectorMask<Integer> m) {
|
||||
av.expand(m).intoArray(ib, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testVectorExpandInt")
|
||||
public static void testVectorExpandInt_runner() {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||
VectorMask<Integer> m = VectorMask.fromArray(I_SPECIES, ma, 0);
|
||||
testVectorExpandInt(av, m);
|
||||
int index = 0;
|
||||
for (int i = 0; i < m.length(); i++) {
|
||||
Asserts.assertEquals(m.laneIsSet(i) ? ia[index++] : (int)0, ib[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.EXPAND_VL, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
|
||||
public static void testVectorExpandLong(LongVector av, VectorMask<Long> m) {
|
||||
av.expand(m).intoArray(lb, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testVectorExpandLong")
|
||||
public static void testVectorExpandLong_runner() {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||
VectorMask<Long> m = VectorMask.fromArray(L_SPECIES, ma, 0);
|
||||
testVectorExpandLong(av, m);
|
||||
int index = 0;
|
||||
for (int i = 0; i < m.length(); i++) {
|
||||
Asserts.assertEquals(m.laneIsSet(i) ? la[index++] : (long)0, lb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.EXPAND_VF, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
|
||||
public static void testVectorExpandFloat(FloatVector av, VectorMask<Float> m) {
|
||||
av.expand(m).intoArray(fb, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testVectorExpandFloat")
|
||||
public static void testVectorExpandFloat_runner() {
|
||||
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, 0);
|
||||
VectorMask<Float> m = VectorMask.fromArray(F_SPECIES, ma, 0);
|
||||
testVectorExpandFloat(av, m);
|
||||
int index = 0;
|
||||
for (int i = 0; i < m.length(); i++) {
|
||||
Asserts.assertEquals(m.laneIsSet(i) ? fa[index++] : (float)0, fb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.EXPAND_VD, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
|
||||
public static void testVectorExpandDouble(DoubleVector av, VectorMask<Double> m) {
|
||||
av.expand(m).intoArray(db, 0);
|
||||
}
|
||||
|
||||
@Run(test = "testVectorExpandDouble")
|
||||
public static void testVectorExpandDouble_runner() {
|
||||
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, 0);
|
||||
VectorMask<Double> m = VectorMask.fromArray(D_SPECIES, ma, 0);
|
||||
testVectorExpandDouble(av, m);
|
||||
int index = 0;
|
||||
for (int i = 0; i < m.length(); i++) {
|
||||
Asserts.assertEquals(m.laneIsSet(i) ? da[index++] : (double)0, db[i]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework testFramework = new TestFramework();
|
||||
testFramework.setDefaultWarmup(10000)
|
||||
.addFlags("--add-modules=jdk.incubator.vector")
|
||||
.start();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user