8363989: AArch64: Add missing backend support of VectorAPI expand operation

Reviewed-by: epeter, eliu, xgong
This commit is contained in:
erifan 2025-09-22 02:03:03 +00:00 committed by Xiaohong Gong
parent cc65836d00
commit e6f8450d95
9 changed files with 473 additions and 97 deletions

View File

@ -216,11 +216,6 @@ source %{
return false;
}
break;
case Op_ExpandV:
if (UseSVE < 2 || is_subword_type(bt)) {
return false;
}
break;
case Op_VectorMaskToLong:
if (UseSVE > 0 && vlen > 64) {
return false;
@ -7113,10 +7108,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg,
ins_pipe(pipe_slow);
%}
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 0);
match(Set dst (ExpandV src mask));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
__ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister,
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
%}
ins_pipe(pipe_slow);
%}
instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4));
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
__ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
%}
ins_pipe(pipe_slow);
%}
instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4);
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst);
format %{ "vexpand $dst, $pg, $src" %}
format %{ "vexpand_sve2_SD $dst, $src, $pg" %}
ins_encode %{
// Example input: src = 1 2 3 4 5 6 7 8
// pg = 1 0 0 1 1 0 1 1
@ -7127,7 +7151,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
// for TBL whose value is used to select the indexed element from src vector.
BasicType bt = Matcher::vector_element_basic_type(this);
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
// dst = 0 0 0 0 0 0 0 0
__ sve_dup($dst$$FloatRegister, size, 0);

View File

@ -206,11 +206,6 @@ source %{
return false;
}
break;
case Op_ExpandV:
if (UseSVE < 2 || is_subword_type(bt)) {
return false;
}
break;
case Op_VectorMaskToLong:
if (UseSVE > 0 && vlen > 64) {
return false;
@ -5101,10 +5096,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg,
ins_pipe(pipe_slow);
%}
instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 0);
match(Set dst (ExpandV src mask));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
__ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister,
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
%}
ins_pipe(pipe_slow);
%}
instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{
predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4));
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %}
ins_encode %{
BasicType bt = Matcher::vector_element_basic_type(this);
int length_in_bytes = (int) Matcher::vector_length_in_bytes(this);
__ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
$tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes);
%}
ins_pipe(pipe_slow);
%}
instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{
predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4);
match(Set dst (ExpandV src pg));
effect(TEMP_DEF dst);
format %{ "vexpand $dst, $pg, $src" %}
format %{ "vexpand_sve2_SD $dst, $src, $pg" %}
ins_encode %{
// Example input: src = 1 2 3 4 5 6 7 8
// pg = 1 0 0 1 1 0 1 1
@ -5115,7 +5139,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{
// for TBL whose value is used to select the indexed element from src vector.
BasicType bt = Matcher::vector_element_basic_type(this);
assert(UseSVE == 2 && !is_subword_type(bt), "unsupported");
Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
// dst = 0 0 0 0 0 0 0 0
__ sve_dup($dst$$FloatRegister, size, 0);

View File

@ -4068,6 +4068,13 @@ public:
INSN(sve_brkb, 0b10); // Break before first true condition
#undef INSN
// SVE move prefix (unpredicated)
void sve_movprfx(FloatRegister Zd, FloatRegister Zn) {
starti;
f(0b00000100, 31, 24), f(0b00, 23, 22), f(0b1, 21), f(0b00000, 20, 16);
f(0b101111, 15, 10), rf(Zn, 5), rf(Zd, 0);
}
// Element count and increment scalar (SVE)
#define INSN(NAME, TYPE) \
void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \

View File

@ -2771,3 +2771,90 @@ void C2_MacroAssembler::select_from_two_vectors(FloatRegister dst, FloatRegister
select_from_two_vectors_neon(dst, src1, src2, dst, tmp, vector_length_in_bytes);
}
}
// Vector expand implementation. Elements from the src vector are expanded into
// the dst vector under the control of the vector mask.
// Since there are no native instructions directly corresponding to expand before
// SVE2p2, the following implementations mainly leverages the TBL instruction to
// implement expand. To compute the index input for TBL, the prefix sum algorithm
// (https://en.wikipedia.org/wiki/Prefix_sum) is used. The same algorithm is used
// for NEON and SVE, but with different instructions where appropriate.
// Vector expand implementation for NEON.
//
// An example of 128-bit Byte vector:
// Data direction: high <== low
// Input:
// src = g f e d c b a 9 8 7 6 5 4 3 2 1
// mask = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1
// Expected result:
// dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
void C2_MacroAssembler::vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask,
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
int vector_length_in_bytes) {
assert(vector_length_in_bytes <= 16, "the vector length in bytes for NEON must be <= 16");
assert_different_registers(dst, src, mask, tmp1, tmp2);
// Since the TBL instruction only supports byte table, we need to
// compute indices in byte type for all types.
SIMD_Arrangement size = vector_length_in_bytes == 16 ? T16B : T8B;
// tmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
dup(tmp1, size, zr);
// dst = 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
negr(dst, size, mask);
// Calculate vector index for TBL with prefix sum algorithm.
// dst = 8 8 8 7 6 6 6 5 4 4 4 3 2 2 2 1
for (int i = 1; i < vector_length_in_bytes; i <<= 1) {
ext(tmp2, size, tmp1, dst, vector_length_in_bytes - i);
addv(dst, size, tmp2, dst);
}
// tmp2 = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1
orr(tmp2, size, mask, mask);
// tmp2 = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
bsl(tmp2, size, dst, tmp1);
// tmp1 = 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
movi(tmp1, size, 1);
// dst = -1 -1 7 6 -1 -1 5 4 -1 -1 3 2 -1 -1 1 0
subv(dst, size, tmp2, tmp1);
// dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1
tbl(dst, size, src, 1, dst);
}
// Vector expand implementation for SVE.
//
// An example of 128-bit Short vector:
// Data direction: high <== low
// Input:
// src = gf ed cb a9 87 65 43 21
// pg = 00 01 00 01 00 01 00 01
// Expected result:
// dst = 00 87 00 65 00 43 00 21
void C2_MacroAssembler::vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
int vector_length_in_bytes) {
assert(UseSVE > 0, "expand implementation only for SVE");
assert_different_registers(dst, src, tmp1, tmp2);
SIMD_RegVariant size = elemType_to_regVariant(bt);
// tmp1 = 00 00 00 00 00 00 00 00
sve_dup(tmp1, size, 0);
sve_movprfx(tmp2, tmp1);
// tmp2 = 00 01 00 01 00 01 00 01
sve_cpy(tmp2, size, pg, 1, true);
// Calculate vector index for TBL with prefix sum algorithm.
// tmp2 = 04 04 03 03 02 02 01 01
for (int i = type2aelembytes(bt); i < vector_length_in_bytes; i <<= 1) {
sve_movprfx(dst, tmp1);
// The EXT instruction operates on the full-width sve register. The correct
// index calculation method is:
// vector_length_in_bytes - i + MaxVectorSize - vector_length_in_bytes =>
// MaxVectorSize - i.
sve_ext(dst, tmp2, MaxVectorSize - i);
sve_add(tmp2, size, dst, tmp2);
}
// dst = 00 04 00 03 00 02 00 01
sve_sel(dst, size, pg, tmp2, tmp1);
// dst = -1 03 -1 02 -1 01 -1 00
sve_sub(dst, size, 1);
// dst = 00 87 00 65 00 43 00 21
sve_tbl(dst, size, src, dst);
}

View File

@ -204,4 +204,10 @@
FloatRegister index, FloatRegister tmp, BasicType bt,
unsigned vector_length_in_bytes);
void vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask,
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
int vector_length_in_bytes);
void vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg,
FloatRegister tmp1, FloatRegister tmp2, BasicType bt,
int vector_length_in_bytes);
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP

View File

@ -2135,6 +2135,7 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"],
["compact", "__ sve_compact(z16, __ S, z16, p1);", "compact\tz16.s, p1, z16.s"],
["compact", "__ sve_compact(z16, __ D, z16, p1);", "compact\tz16.d, p1, z16.d"],
["movprfx", "__ sve_movprfx(z17, z1);", "movprfx\tz17, z1"],
["ext", "__ sve_ext(z17, z16, 63);", "ext\tz17.b, z17.b, z16.b, #63"],
["facgt", "__ sve_fac(Assembler::GT, p1, __ H, p2, z4, z5);", "facgt\tp1.h, p2/z, z4.h, z5.h"],
["facgt", "__ sve_fac(Assembler::GT, p1, __ S, p2, z4, z5);", "facgt\tp1.s, p2/z, z4.s, z5.s"],

View File

@ -1148,6 +1148,7 @@
__ sve_punpkhi(p1, p0); // punpkhi p1.h, p0.b
__ sve_compact(z16, __ S, z16, p1); // compact z16.s, p1, z16.s
__ sve_compact(z16, __ D, z16, p1); // compact z16.d, p1, z16.d
__ sve_movprfx(z17, z1); // movprfx z17, z1
__ sve_ext(z17, z16, 63); // ext z17.b, z17.b, z16.b, #63
__ sve_fac(Assembler::GT, p1, __ H, p2, z4, z5); // facgt p1.h, p2/z, z4.h, z5.h
__ sve_fac(Assembler::GT, p1, __ S, p2, z4, z5); // facgt p1.s, p2/z, z4.s, z5.s
@ -1444,30 +1445,30 @@
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140004b6, 0x94000000,
0x97ffffd4, 0x940004b3, 0x3400000a, 0x34fffa2a,
0x3400960a, 0x35000008, 0x35fff9c8, 0x350095a8,
0xb400000b, 0xb4fff96b, 0xb400954b, 0xb500001d,
0xb5fff91d, 0xb50094fd, 0x10000013, 0x10fff8b3,
0x10009493, 0x90000013, 0x36300016, 0x3637f836,
0x36309416, 0x3758000c, 0x375ff7cc, 0x375893ac,
0x14000000, 0x17ffffd7, 0x140004b7, 0x94000000,
0x97ffffd4, 0x940004b4, 0x3400000a, 0x34fffa2a,
0x3400962a, 0x35000008, 0x35fff9c8, 0x350095c8,
0xb400000b, 0xb4fff96b, 0xb400956b, 0xb500001d,
0xb5fff91d, 0xb500951d, 0x10000013, 0x10fff8b3,
0x100094b3, 0x90000013, 0x36300016, 0x3637f836,
0x36309436, 0x3758000c, 0x375ff7cc, 0x375893cc,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54009180, 0x54000001, 0x54fff541, 0x54009121,
0x54000002, 0x54fff4e2, 0x540090c2, 0x54000002,
0x54fff482, 0x54009062, 0x54000003, 0x54fff423,
0x54009003, 0x54000003, 0x54fff3c3, 0x54008fa3,
0x54000004, 0x54fff364, 0x54008f44, 0x54000005,
0x54fff305, 0x54008ee5, 0x54000006, 0x54fff2a6,
0x54008e86, 0x54000007, 0x54fff247, 0x54008e27,
0x54000008, 0x54fff1e8, 0x54008dc8, 0x54000009,
0x54fff189, 0x54008d69, 0x5400000a, 0x54fff12a,
0x54008d0a, 0x5400000b, 0x54fff0cb, 0x54008cab,
0x5400000c, 0x54fff06c, 0x54008c4c, 0x5400000d,
0x54fff00d, 0x54008bed, 0x5400000e, 0x54ffefae,
0x54008b8e, 0x5400000f, 0x54ffef4f, 0x54008b2f,
0x540091a0, 0x54000001, 0x54fff541, 0x54009141,
0x54000002, 0x54fff4e2, 0x540090e2, 0x54000002,
0x54fff482, 0x54009082, 0x54000003, 0x54fff423,
0x54009023, 0x54000003, 0x54fff3c3, 0x54008fc3,
0x54000004, 0x54fff364, 0x54008f64, 0x54000005,
0x54fff305, 0x54008f05, 0x54000006, 0x54fff2a6,
0x54008ea6, 0x54000007, 0x54fff247, 0x54008e47,
0x54000008, 0x54fff1e8, 0x54008de8, 0x54000009,
0x54fff189, 0x54008d89, 0x5400000a, 0x54fff12a,
0x54008d2a, 0x5400000b, 0x54fff0cb, 0x54008ccb,
0x5400000c, 0x54fff06c, 0x54008c6c, 0x5400000d,
0x54fff00d, 0x54008c0d, 0x5400000e, 0x54ffefae,
0x54008bae, 0x5400000f, 0x54ffef4f, 0x54008b4f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
@ -1686,66 +1687,66 @@
0x25d8e184, 0x2518e407, 0x05214800, 0x05614800,
0x05a14800, 0x05e14800, 0x05214c00, 0x05614c00,
0x05a14c00, 0x05e14c00, 0x05304001, 0x05314001,
0x05a18610, 0x05e18610, 0x05271e11, 0x6545e891,
0x6585e891, 0x65c5e891, 0x6545c891, 0x6585c891,
0x65c5c891, 0x45b0c210, 0x45f1c231, 0x1e601000,
0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000,
0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000,
0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000,
0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000,
0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000,
0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000,
0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000,
0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8268267,
0xf82d023c, 0xf8301046, 0xf83d2083, 0xf8263290,
0xf82d528c, 0xf8284299, 0xf8337160, 0xf8386286,
0xf8bf820e, 0xf8a600e0, 0xf8af1353, 0xf8a922ea,
0xf8b53396, 0xf8a251e3, 0xf8b340f4, 0xf8a470fd,
0xf8a06209, 0xf8f48097, 0xf8f002ea, 0xf8eb10d9,
0xf8ff21b0, 0xf8f7302c, 0xf8ee52a9, 0xf8f041fa,
0xf8e471e4, 0xf8e863c6, 0xf864823d, 0xf87d013a,
0xf86f1162, 0xf87d20e3, 0xf86132bb, 0xf870510e,
0xf8704336, 0xf86572b4, 0xf8706217, 0xb83e8294,
0xb8200264, 0xb8381284, 0xb8242358, 0xb8333102,
0xb828530e, 0xb83042df, 0xb824703f, 0xb82a6194,
0xb8a080e9, 0xb8b80090, 0xb8bb1146, 0xb8bb21b8,
0xb8b032df, 0xb8b653f4, 0xb8bd41c9, 0xb8b47287,
0xb8bc6169, 0xb8ee828c, 0xb8e10138, 0xb8f3126d,
0xb8f020b0, 0xb8e03183, 0xb8e851ef, 0xb8f041e4,
0xb8fe7005, 0xb8ea6376, 0xb8638120, 0xb873015d,
0xb8781284, 0xb86723b8, 0xb86e3175, 0xb87b51ed,
0xb87f41d1, 0xb863721e, 0xb87660f4, 0xce216874,
0xce104533, 0xce648c15, 0xce8e3302, 0xce6e82ab,
0xce6c87d1, 0xcec08063, 0xce638937, 0x25e0c358,
0x25a1c7d3, 0x0580785a, 0x05426328, 0x05009892,
0x25a0cc29, 0x2561cec8, 0x058044b3, 0x05401c99,
0x05006b49, 0x25e0d6f7, 0x2561c528, 0x0583c8bc,
0x0542522f, 0x05001ec0, 0x25e0de65, 0x25a1c113,
0x05803cad, 0x0540f3c0, 0x0500ab15, 0x2560c28c,
0x2561d7c0, 0x05801ed7, 0x0542633b, 0x05003696,
0x2560d4b4, 0x25e1c918, 0x058021ff, 0x05400e15,
0x0500f3de, 0x0473025a, 0x04bd05ab, 0x658e0025,
0x658a08e2, 0x659a0493, 0x043e1062, 0x04f418b4,
0x046d15bd, 0x04611fce, 0x04d6a07c, 0x04001929,
0x041a09da, 0x04d098f4, 0x04db10d4, 0x0459a3ad,
0x041aa029, 0x041919fb, 0x04d39e24, 0x04118302,
0x04101dba, 0x04d7ae16, 0x04dea571, 0x04180210,
0x05e786fc, 0x05e4915c, 0x04881cf1, 0x044a0f04,
0x04090969, 0x048b16c4, 0x044101e4, 0x04dcbf44,
0x65809745, 0x658d833f, 0x65c68468, 0x65c79b07,
0x65829e38, 0x049dafca, 0x6582bba8, 0x65c0b7ff,
0x65c1b4e0, 0x658dbadd, 0x65819a9d, 0x65ed9246,
0x65b30815, 0x65e6263c, 0x65eebb94, 0x65bad14e,
0x65efe178, 0x65fc5697, 0x65e07f14, 0x040c55a6,
0x04977f4d, 0x043d3046, 0x04b733a0, 0x046830a4,
0x04ed322d, 0x05686948, 0x05bd6c13, 0x65c88ef0,
0x450db3d7, 0x4540b6d9, 0x043e3979, 0x445896ce,
0x445a9005, 0x44d98069, 0x445b87ae, 0x04da348e,
0x04982edb, 0x0499397f, 0x0408338c, 0x04ca309c,
0x65c721e6, 0x65c63641, 0x65982882, 0x04812b8b,
0x0e251083, 0x4e3712d5, 0x0e61101f, 0x4e6d118b,
0x0eba1338, 0x4eb712d5, 0x2e31120f, 0x6e2e11ac,
0x2e6810e6, 0x6e6f11cd, 0x2eaa1128, 0x6eb1120f,
0x05a18610, 0x05e18610, 0x0420bc31, 0x05271e11,
0x6545e891, 0x6585e891, 0x65c5e891, 0x6545c891,
0x6585c891, 0x65c5c891, 0x45b0c210, 0x45f1c231,
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
0xf8268267, 0xf82d023c, 0xf8301046, 0xf83d2083,
0xf8263290, 0xf82d528c, 0xf8284299, 0xf8337160,
0xf8386286, 0xf8bf820e, 0xf8a600e0, 0xf8af1353,
0xf8a922ea, 0xf8b53396, 0xf8a251e3, 0xf8b340f4,
0xf8a470fd, 0xf8a06209, 0xf8f48097, 0xf8f002ea,
0xf8eb10d9, 0xf8ff21b0, 0xf8f7302c, 0xf8ee52a9,
0xf8f041fa, 0xf8e471e4, 0xf8e863c6, 0xf864823d,
0xf87d013a, 0xf86f1162, 0xf87d20e3, 0xf86132bb,
0xf870510e, 0xf8704336, 0xf86572b4, 0xf8706217,
0xb83e8294, 0xb8200264, 0xb8381284, 0xb8242358,
0xb8333102, 0xb828530e, 0xb83042df, 0xb824703f,
0xb82a6194, 0xb8a080e9, 0xb8b80090, 0xb8bb1146,
0xb8bb21b8, 0xb8b032df, 0xb8b653f4, 0xb8bd41c9,
0xb8b47287, 0xb8bc6169, 0xb8ee828c, 0xb8e10138,
0xb8f3126d, 0xb8f020b0, 0xb8e03183, 0xb8e851ef,
0xb8f041e4, 0xb8fe7005, 0xb8ea6376, 0xb8638120,
0xb873015d, 0xb8781284, 0xb86723b8, 0xb86e3175,
0xb87b51ed, 0xb87f41d1, 0xb863721e, 0xb87660f4,
0xce216874, 0xce104533, 0xce648c15, 0xce8e3302,
0xce6e82ab, 0xce6c87d1, 0xcec08063, 0xce638937,
0x25e0c358, 0x25a1c7d3, 0x0580785a, 0x05426328,
0x05009892, 0x25a0cc29, 0x2561cec8, 0x058044b3,
0x05401c99, 0x05006b49, 0x25e0d6f7, 0x2561c528,
0x0583c8bc, 0x0542522f, 0x05001ec0, 0x25e0de65,
0x25a1c113, 0x05803cad, 0x0540f3c0, 0x0500ab15,
0x2560c28c, 0x2561d7c0, 0x05801ed7, 0x0542633b,
0x05003696, 0x2560d4b4, 0x25e1c918, 0x058021ff,
0x05400e15, 0x0500f3de, 0x0473025a, 0x04bd05ab,
0x658e0025, 0x658a08e2, 0x659a0493, 0x043e1062,
0x04f418b4, 0x046d15bd, 0x04611fce, 0x04d6a07c,
0x04001929, 0x041a09da, 0x04d098f4, 0x04db10d4,
0x0459a3ad, 0x041aa029, 0x041919fb, 0x04d39e24,
0x04118302, 0x04101dba, 0x04d7ae16, 0x04dea571,
0x04180210, 0x05e786fc, 0x05e4915c, 0x04881cf1,
0x044a0f04, 0x04090969, 0x048b16c4, 0x044101e4,
0x04dcbf44, 0x65809745, 0x658d833f, 0x65c68468,
0x65c79b07, 0x65829e38, 0x049dafca, 0x6582bba8,
0x65c0b7ff, 0x65c1b4e0, 0x658dbadd, 0x65819a9d,
0x65ed9246, 0x65b30815, 0x65e6263c, 0x65eebb94,
0x65bad14e, 0x65efe178, 0x65fc5697, 0x65e07f14,
0x040c55a6, 0x04977f4d, 0x043d3046, 0x04b733a0,
0x046830a4, 0x04ed322d, 0x05686948, 0x05bd6c13,
0x65c88ef0, 0x450db3d7, 0x4540b6d9, 0x043e3979,
0x445896ce, 0x445a9005, 0x44d98069, 0x445b87ae,
0x04da348e, 0x04982edb, 0x0499397f, 0x0408338c,
0x04ca309c, 0x65c721e6, 0x65c63641, 0x65982882,
0x04812b8b, 0x0e251083, 0x4e3712d5, 0x0e61101f,
0x4e6d118b, 0x0eba1338, 0x4eb712d5, 0x2e31120f,
0x6e2e11ac, 0x2e6810e6, 0x6e6f11cd, 0x2eaa1128,
0x6eb1120f,
};
// END Generated code -- do not edit

View File

@ -2755,6 +2755,36 @@ public class IRNode {
vectorNode(EXPAND_BITS_VL, "ExpandBitsV", TYPE_LONG);
}
public static final String EXPAND_VB = VECTOR_PREFIX + "EXPAND_VB" + POSTFIX;
static {
vectorNode(EXPAND_VB, "ExpandV", TYPE_BYTE);
}
public static final String EXPAND_VS = VECTOR_PREFIX + "EXPAND_VS" + POSTFIX;
static {
vectorNode(EXPAND_VS, "ExpandV", TYPE_SHORT);
}
public static final String EXPAND_VI = VECTOR_PREFIX + "EXPAND_VI" + POSTFIX;
static {
vectorNode(EXPAND_VI, "ExpandV", TYPE_INT);
}
public static final String EXPAND_VL = VECTOR_PREFIX + "EXPAND_VL" + POSTFIX;
static {
vectorNode(EXPAND_VL, "ExpandV", TYPE_LONG);
}
public static final String EXPAND_VF = VECTOR_PREFIX + "EXPAND_VF" + POSTFIX;
static {
vectorNode(EXPAND_VF, "ExpandV", TYPE_FLOAT);
}
public static final String EXPAND_VD = VECTOR_PREFIX + "EXPAND_VD" + POSTFIX;
static {
vectorNode(EXPAND_VD, "ExpandV", TYPE_DOUBLE);
}
public static final String Z_LOAD_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "Z_LOAD_P_WITH_BARRIER_FLAG" + POSTFIX;
static {
String regex = START + "zLoadP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END;

View File

@ -0,0 +1,198 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.vectorapi;
import compiler.lib.generators.*;
import compiler.lib.ir_framework.*;
import jdk.incubator.vector.*;
import jdk.test.lib.Asserts;
/**
* @test
* @bug 8363989
* @key randomness
* @library /test/lib /
* @summary AArch64: Add missing backend support of VectorAPI expand operation
* @modules jdk.incubator.vector
*
* @run driver compiler.vectorapi.VectorExpandTest
*/
public class VectorExpandTest {
static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_MAX;
static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_MAX;
static final int LENGTH = 512;
static final Generators RD = Generators.G;
static byte[] ba, bb;
static short[] sa, sb;
static int[] ia, ib;
static long[] la, lb;
static float[] fa, fb;
static double[] da, db;
static boolean[] ma;
static {
ba = new byte[LENGTH];
bb = new byte[LENGTH];
sa = new short[LENGTH];
sb = new short[LENGTH];
ia = new int[LENGTH];
ib = new int[LENGTH];
la = new long[LENGTH];
lb = new long[LENGTH];
fa = new float[LENGTH];
fb = new float[LENGTH];
da = new double[LENGTH];
db = new double[LENGTH];
ma = new boolean[LENGTH];
Generator<Integer> iGen = RD.ints();
Generator<Long> lGen = RD.longs();
Generator<Float> fGen = RD.floats();
Generator<Double> dGen = RD.doubles();
for (int i = 0; i < LENGTH; i++) {
ba[i] = iGen.next().byteValue();
sa[i] = iGen.next().shortValue();
ma[i] = iGen.next() % 2 == 0;
}
RD.fill(iGen, ia);
RD.fill(lGen, la);
RD.fill(fGen, fa);
RD.fill(dGen, da);
}
@Test
@IR(counts = { IRNode.EXPAND_VB, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
public static void testVectorExpandByte(ByteVector av, VectorMask<Byte> m) {
av.expand(m).intoArray(bb, 0);
}
@Run(test = "testVectorExpandByte")
public static void testVectorExpandByte_runner() {
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
VectorMask<Byte> m = VectorMask.fromArray(B_SPECIES, ma, 0);
testVectorExpandByte(av, m);
int index = 0;
for (int i = 0; i < m.length(); i++) {
Asserts.assertEquals(m.laneIsSet(i) ? ba[index++] : (byte)0, bb[i]);
}
}
@Test
@IR(counts = { IRNode.EXPAND_VS, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
public static void testVectorExpandShort(ShortVector av, VectorMask<Short> m) {
av.expand(m).intoArray(sb, 0);
}
@Run(test = "testVectorExpandShort")
public static void testVectorExpandShort_runner() {
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
VectorMask<Short> m = VectorMask.fromArray(S_SPECIES, ma, 0);
testVectorExpandShort(av, m);
int index = 0;
for (int i = 0; i < m.length(); i++) {
Asserts.assertEquals(m.laneIsSet(i) ? sa[index++] : (short)0, sb[i]);
}
}
@Test
@IR(counts = { IRNode.EXPAND_VI, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
public static void testVectorExpandInt(IntVector av, VectorMask<Integer> m) {
av.expand(m).intoArray(ib, 0);
}
@Run(test = "testVectorExpandInt")
public static void testVectorExpandInt_runner() {
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
VectorMask<Integer> m = VectorMask.fromArray(I_SPECIES, ma, 0);
testVectorExpandInt(av, m);
int index = 0;
for (int i = 0; i < m.length(); i++) {
Asserts.assertEquals(m.laneIsSet(i) ? ia[index++] : (int)0, ib[i]);
}
}
@Test
@IR(counts = { IRNode.EXPAND_VL, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
public static void testVectorExpandLong(LongVector av, VectorMask<Long> m) {
av.expand(m).intoArray(lb, 0);
}
@Run(test = "testVectorExpandLong")
public static void testVectorExpandLong_runner() {
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
VectorMask<Long> m = VectorMask.fromArray(L_SPECIES, ma, 0);
testVectorExpandLong(av, m);
int index = 0;
for (int i = 0; i < m.length(); i++) {
Asserts.assertEquals(m.laneIsSet(i) ? la[index++] : (long)0, lb[i]);
}
}
@Test
@IR(counts = { IRNode.EXPAND_VF, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
public static void testVectorExpandFloat(FloatVector av, VectorMask<Float> m) {
av.expand(m).intoArray(fb, 0);
}
@Run(test = "testVectorExpandFloat")
public static void testVectorExpandFloat_runner() {
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, 0);
VectorMask<Float> m = VectorMask.fromArray(F_SPECIES, ma, 0);
testVectorExpandFloat(av, m);
int index = 0;
for (int i = 0; i < m.length(); i++) {
Asserts.assertEquals(m.laneIsSet(i) ? fa[index++] : (float)0, fb[i]);
}
}
@Test
@IR(counts = { IRNode.EXPAND_VD, "= 1" }, applyIfCPUFeature = { "asimd", "true" })
public static void testVectorExpandDouble(DoubleVector av, VectorMask<Double> m) {
av.expand(m).intoArray(db, 0);
}
@Run(test = "testVectorExpandDouble")
public static void testVectorExpandDouble_runner() {
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, 0);
VectorMask<Double> m = VectorMask.fromArray(D_SPECIES, ma, 0);
testVectorExpandDouble(av, m);
int index = 0;
for (int i = 0; i < m.length(); i++) {
Asserts.assertEquals(m.laneIsSet(i) ? da[index++] : (double)0, db[i]);
}
}
public static void main(String[] args) {
TestFramework testFramework = new TestFramework();
testFramework.setDefaultWarmup(10000)
.addFlags("--add-modules=jdk.incubator.vector")
.start();
}
}