From e6f8450d957f79beacf2fc70e545db3a4bb58742 Mon Sep 17 00:00:00 2001 From: erifan Date: Mon, 22 Sep 2025 02:03:03 +0000 Subject: [PATCH] 8363989: AArch64: Add missing backend support of VectorAPI expand operation Reviewed-by: epeter, eliu, xgong --- src/hotspot/cpu/aarch64/aarch64_vector.ad | 39 +++- src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 | 39 +++- src/hotspot/cpu/aarch64/assembler_aarch64.hpp | 7 + .../cpu/aarch64/c2_MacroAssembler_aarch64.cpp | 87 ++++++++ .../cpu/aarch64/c2_MacroAssembler_aarch64.hpp | 6 + test/hotspot/gtest/aarch64/aarch64-asmtest.py | 1 + test/hotspot/gtest/aarch64/asmtest.out.h | 163 +++++++------- .../compiler/lib/ir_framework/IRNode.java | 30 +++ .../compiler/vectorapi/VectorExpandTest.java | 198 ++++++++++++++++++ 9 files changed, 473 insertions(+), 97 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/vectorapi/VectorExpandTest.java diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index 67c4dad27a7..ef35b66003d 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -216,11 +216,6 @@ source %{ return false; } break; - case Op_ExpandV: - if (UseSVE < 2 || is_subword_type(bt)) { - return false; - } - break; case Op_VectorMaskToLong: if (UseSVE > 0 && vlen > 64) { return false; @@ -7113,10 +7108,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg, ins_pipe(pipe_slow); %} -instruct vexpand(vReg dst, vReg src, pRegGov pg) %{ +instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{ + predicate(UseSVE == 0); + match(Set dst (ExpandV src mask)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + int length_in_bytes = (int) Matcher::vector_length_in_bytes(this); + __ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister, + $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes); + %} + ins_pipe(pipe_slow); +%} + +instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{ + predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4)); + match(Set dst (ExpandV src pg)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + int length_in_bytes = (int) Matcher::vector_length_in_bytes(this); + __ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister, + $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes); + %} + ins_pipe(pipe_slow); +%} + +instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{ + predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4); match(Set dst (ExpandV src pg)); effect(TEMP_DEF dst); - format %{ "vexpand $dst, $pg, $src" %} + format %{ "vexpand_sve2_SD $dst, $src, $pg" %} ins_encode %{ // Example input: src = 1 2 3 4 5 6 7 8 // pg = 1 0 0 1 1 0 1 1 @@ -7127,7 +7151,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{ // for TBL whose value is used to select the indexed element from src vector. BasicType bt = Matcher::vector_element_basic_type(this); - assert(UseSVE == 2 && !is_subword_type(bt), "unsupported"); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); // dst = 0 0 0 0 0 0 0 0 __ sve_dup($dst$$FloatRegister, size, 0); diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 28f91204ec3..012de7e46d8 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -206,11 +206,6 @@ source %{ return false; } break; - case Op_ExpandV: - if (UseSVE < 2 || is_subword_type(bt)) { - return false; - } - break; case Op_VectorMaskToLong: if (UseSVE > 0 && vlen > 64) { return false; @@ -5101,10 +5096,39 @@ instruct vcompressS(vReg dst, vReg src, pReg pg, ins_pipe(pipe_slow); %} -instruct vexpand(vReg dst, vReg src, pRegGov pg) %{ +instruct vexpand_neon(vReg dst, vReg src, vReg mask, vReg tmp1, vReg tmp2) %{ + predicate(UseSVE == 0); + match(Set dst (ExpandV src mask)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vexpand_neon $dst, $src, $mask\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + int length_in_bytes = (int) Matcher::vector_length_in_bytes(this); + __ vector_expand_neon($dst$$FloatRegister, $src$$FloatRegister, $mask$$FloatRegister, + $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes); + %} + ins_pipe(pipe_slow); +%} + +instruct vexpand_sve(vReg dst, vReg src, pRegGov pg, vReg tmp1, vReg tmp2) %{ + predicate(UseSVE == 1 || (UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) < 4)); + match(Set dst (ExpandV src pg)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); + format %{ "vexpand_sve $dst, $src, $pg\t# KILL $tmp1, $tmp2" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + int length_in_bytes = (int) Matcher::vector_length_in_bytes(this); + __ vector_expand_sve($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister, + $tmp1$$FloatRegister, $tmp2$$FloatRegister, bt, length_in_bytes); + %} + ins_pipe(pipe_slow); +%} + +instruct vexpand_sve2_SD(vReg dst, vReg src, pRegGov pg) %{ + predicate(UseSVE == 2 && type2aelembytes(Matcher::vector_element_basic_type(n)) >= 4); match(Set dst (ExpandV src pg)); effect(TEMP_DEF dst); - format %{ "vexpand $dst, $pg, $src" %} + format %{ "vexpand_sve2_SD $dst, $src, $pg" %} ins_encode %{ // Example input: src = 1 2 3 4 5 6 7 8 // pg = 1 0 0 1 1 0 1 1 @@ -5115,7 +5139,6 @@ instruct vexpand(vReg dst, vReg src, pRegGov pg) %{ // for TBL whose value is used to select the indexed element from src vector. BasicType bt = Matcher::vector_element_basic_type(this); - assert(UseSVE == 2 && !is_subword_type(bt), "unsupported"); Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt); // dst = 0 0 0 0 0 0 0 0 __ sve_dup($dst$$FloatRegister, size, 0); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index a5d2cbfac98..4c4251fbe9f 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -4068,6 +4068,13 @@ public: INSN(sve_brkb, 0b10); // Break before first true condition #undef INSN + // SVE move prefix (unpredicated) + void sve_movprfx(FloatRegister Zd, FloatRegister Zn) { + starti; + f(0b00000100, 31, 24), f(0b00, 23, 22), f(0b1, 21), f(0b00000, 20, 16); + f(0b101111, 15, 10), rf(Zn, 5), rf(Zd, 0); + } + // Element count and increment scalar (SVE) #define INSN(NAME, TYPE) \ void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \ diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index b1562c54f4e..b61a0e4e378 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -2771,3 +2771,90 @@ void C2_MacroAssembler::select_from_two_vectors(FloatRegister dst, FloatRegister select_from_two_vectors_neon(dst, src1, src2, dst, tmp, vector_length_in_bytes); } } + +// Vector expand implementation. Elements from the src vector are expanded into +// the dst vector under the control of the vector mask. +// Since there are no native instructions directly corresponding to expand before +// SVE2p2, the following implementations mainly leverages the TBL instruction to +// implement expand. To compute the index input for TBL, the prefix sum algorithm +// (https://en.wikipedia.org/wiki/Prefix_sum) is used. The same algorithm is used +// for NEON and SVE, but with different instructions where appropriate. + +// Vector expand implementation for NEON. +// +// An example of 128-bit Byte vector: +// Data direction: high <== low +// Input: +// src = g f e d c b a 9 8 7 6 5 4 3 2 1 +// mask = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 +// Expected result: +// dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1 +void C2_MacroAssembler::vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask, + FloatRegister tmp1, FloatRegister tmp2, BasicType bt, + int vector_length_in_bytes) { + assert(vector_length_in_bytes <= 16, "the vector length in bytes for NEON must be <= 16"); + assert_different_registers(dst, src, mask, tmp1, tmp2); + // Since the TBL instruction only supports byte table, we need to + // compute indices in byte type for all types. + SIMD_Arrangement size = vector_length_in_bytes == 16 ? T16B : T8B; + // tmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + dup(tmp1, size, zr); + // dst = 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 + negr(dst, size, mask); + // Calculate vector index for TBL with prefix sum algorithm. + // dst = 8 8 8 7 6 6 6 5 4 4 4 3 2 2 2 1 + for (int i = 1; i < vector_length_in_bytes; i <<= 1) { + ext(tmp2, size, tmp1, dst, vector_length_in_bytes - i); + addv(dst, size, tmp2, dst); + } + // tmp2 = 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 0 0 -1 -1 + orr(tmp2, size, mask, mask); + // tmp2 = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1 + bsl(tmp2, size, dst, tmp1); + // tmp1 = 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + movi(tmp1, size, 1); + // dst = -1 -1 7 6 -1 -1 5 4 -1 -1 3 2 -1 -1 1 0 + subv(dst, size, tmp2, tmp1); + // dst = 0 0 8 7 0 0 6 5 0 0 4 3 0 0 2 1 + tbl(dst, size, src, 1, dst); +} + +// Vector expand implementation for SVE. +// +// An example of 128-bit Short vector: +// Data direction: high <== low +// Input: +// src = gf ed cb a9 87 65 43 21 +// pg = 00 01 00 01 00 01 00 01 +// Expected result: +// dst = 00 87 00 65 00 43 00 21 +void C2_MacroAssembler::vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg, + FloatRegister tmp1, FloatRegister tmp2, BasicType bt, + int vector_length_in_bytes) { + assert(UseSVE > 0, "expand implementation only for SVE"); + assert_different_registers(dst, src, tmp1, tmp2); + SIMD_RegVariant size = elemType_to_regVariant(bt); + + // tmp1 = 00 00 00 00 00 00 00 00 + sve_dup(tmp1, size, 0); + sve_movprfx(tmp2, tmp1); + // tmp2 = 00 01 00 01 00 01 00 01 + sve_cpy(tmp2, size, pg, 1, true); + // Calculate vector index for TBL with prefix sum algorithm. + // tmp2 = 04 04 03 03 02 02 01 01 + for (int i = type2aelembytes(bt); i < vector_length_in_bytes; i <<= 1) { + sve_movprfx(dst, tmp1); + // The EXT instruction operates on the full-width sve register. The correct + // index calculation method is: + // vector_length_in_bytes - i + MaxVectorSize - vector_length_in_bytes => + // MaxVectorSize - i. + sve_ext(dst, tmp2, MaxVectorSize - i); + sve_add(tmp2, size, dst, tmp2); + } + // dst = 00 04 00 03 00 02 00 01 + sve_sel(dst, size, pg, tmp2, tmp1); + // dst = -1 03 -1 02 -1 01 -1 00 + sve_sub(dst, size, 1); + // dst = 00 87 00 65 00 43 00 21 + sve_tbl(dst, size, src, dst); +} \ No newline at end of file diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index 0403a27910f..cb8ded142f4 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -204,4 +204,10 @@ FloatRegister index, FloatRegister tmp, BasicType bt, unsigned vector_length_in_bytes); + void vector_expand_neon(FloatRegister dst, FloatRegister src, FloatRegister mask, + FloatRegister tmp1, FloatRegister tmp2, BasicType bt, + int vector_length_in_bytes); + void vector_expand_sve(FloatRegister dst, FloatRegister src, PRegister pg, + FloatRegister tmp1, FloatRegister tmp2, BasicType bt, + int vector_length_in_bytes); #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP diff --git a/test/hotspot/gtest/aarch64/aarch64-asmtest.py b/test/hotspot/gtest/aarch64/aarch64-asmtest.py index e1abddf3e1c..bf4f2111999 100644 --- a/test/hotspot/gtest/aarch64/aarch64-asmtest.py +++ b/test/hotspot/gtest/aarch64/aarch64-asmtest.py @@ -2135,6 +2135,7 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", ["punpkhi", "__ sve_punpkhi(p1, p0);", "punpkhi\tp1.h, p0.b"], ["compact", "__ sve_compact(z16, __ S, z16, p1);", "compact\tz16.s, p1, z16.s"], ["compact", "__ sve_compact(z16, __ D, z16, p1);", "compact\tz16.d, p1, z16.d"], + ["movprfx", "__ sve_movprfx(z17, z1);", "movprfx\tz17, z1"], ["ext", "__ sve_ext(z17, z16, 63);", "ext\tz17.b, z17.b, z16.b, #63"], ["facgt", "__ sve_fac(Assembler::GT, p1, __ H, p2, z4, z5);", "facgt\tp1.h, p2/z, z4.h, z5.h"], ["facgt", "__ sve_fac(Assembler::GT, p1, __ S, p2, z4, z5);", "facgt\tp1.s, p2/z, z4.s, z5.s"], diff --git a/test/hotspot/gtest/aarch64/asmtest.out.h b/test/hotspot/gtest/aarch64/asmtest.out.h index 7a3225eaed4..352ea33750e 100644 --- a/test/hotspot/gtest/aarch64/asmtest.out.h +++ b/test/hotspot/gtest/aarch64/asmtest.out.h @@ -1148,6 +1148,7 @@ __ sve_punpkhi(p1, p0); // punpkhi p1.h, p0.b __ sve_compact(z16, __ S, z16, p1); // compact z16.s, p1, z16.s __ sve_compact(z16, __ D, z16, p1); // compact z16.d, p1, z16.d + __ sve_movprfx(z17, z1); // movprfx z17, z1 __ sve_ext(z17, z16, 63); // ext z17.b, z17.b, z16.b, #63 __ sve_fac(Assembler::GT, p1, __ H, p2, z4, z5); // facgt p1.h, p2/z, z4.h, z5.h __ sve_fac(Assembler::GT, p1, __ S, p2, z4, z5); // facgt p1.s, p2/z, z4.s, z5.s @@ -1444,30 +1445,30 @@ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x140004b6, 0x94000000, - 0x97ffffd4, 0x940004b3, 0x3400000a, 0x34fffa2a, - 0x3400960a, 0x35000008, 0x35fff9c8, 0x350095a8, - 0xb400000b, 0xb4fff96b, 0xb400954b, 0xb500001d, - 0xb5fff91d, 0xb50094fd, 0x10000013, 0x10fff8b3, - 0x10009493, 0x90000013, 0x36300016, 0x3637f836, - 0x36309416, 0x3758000c, 0x375ff7cc, 0x375893ac, + 0x14000000, 0x17ffffd7, 0x140004b7, 0x94000000, + 0x97ffffd4, 0x940004b4, 0x3400000a, 0x34fffa2a, + 0x3400962a, 0x35000008, 0x35fff9c8, 0x350095c8, + 0xb400000b, 0xb4fff96b, 0xb400956b, 0xb500001d, + 0xb5fff91d, 0xb500951d, 0x10000013, 0x10fff8b3, + 0x100094b3, 0x90000013, 0x36300016, 0x3637f836, + 0x36309436, 0x3758000c, 0x375ff7cc, 0x375893cc, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x54009180, 0x54000001, 0x54fff541, 0x54009121, - 0x54000002, 0x54fff4e2, 0x540090c2, 0x54000002, - 0x54fff482, 0x54009062, 0x54000003, 0x54fff423, - 0x54009003, 0x54000003, 0x54fff3c3, 0x54008fa3, - 0x54000004, 0x54fff364, 0x54008f44, 0x54000005, - 0x54fff305, 0x54008ee5, 0x54000006, 0x54fff2a6, - 0x54008e86, 0x54000007, 0x54fff247, 0x54008e27, - 0x54000008, 0x54fff1e8, 0x54008dc8, 0x54000009, - 0x54fff189, 0x54008d69, 0x5400000a, 0x54fff12a, - 0x54008d0a, 0x5400000b, 0x54fff0cb, 0x54008cab, - 0x5400000c, 0x54fff06c, 0x54008c4c, 0x5400000d, - 0x54fff00d, 0x54008bed, 0x5400000e, 0x54ffefae, - 0x54008b8e, 0x5400000f, 0x54ffef4f, 0x54008b2f, + 0x540091a0, 0x54000001, 0x54fff541, 0x54009141, + 0x54000002, 0x54fff4e2, 0x540090e2, 0x54000002, + 0x54fff482, 0x54009082, 0x54000003, 0x54fff423, + 0x54009023, 0x54000003, 0x54fff3c3, 0x54008fc3, + 0x54000004, 0x54fff364, 0x54008f64, 0x54000005, + 0x54fff305, 0x54008f05, 0x54000006, 0x54fff2a6, + 0x54008ea6, 0x54000007, 0x54fff247, 0x54008e47, + 0x54000008, 0x54fff1e8, 0x54008de8, 0x54000009, + 0x54fff189, 0x54008d89, 0x5400000a, 0x54fff12a, + 0x54008d2a, 0x5400000b, 0x54fff0cb, 0x54008ccb, + 0x5400000c, 0x54fff06c, 0x54008c6c, 0x5400000d, + 0x54fff00d, 0x54008c0d, 0x5400000e, 0x54ffefae, + 0x54008bae, 0x5400000f, 0x54ffef4f, 0x54008b4f, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f, 0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf, @@ -1686,66 +1687,66 @@ 0x25d8e184, 0x2518e407, 0x05214800, 0x05614800, 0x05a14800, 0x05e14800, 0x05214c00, 0x05614c00, 0x05a14c00, 0x05e14c00, 0x05304001, 0x05314001, - 0x05a18610, 0x05e18610, 0x05271e11, 0x6545e891, - 0x6585e891, 0x65c5e891, 0x6545c891, 0x6585c891, - 0x65c5c891, 0x45b0c210, 0x45f1c231, 0x1e601000, - 0x1e603000, 0x1e621000, 0x1e623000, 0x1e641000, - 0x1e643000, 0x1e661000, 0x1e663000, 0x1e681000, - 0x1e683000, 0x1e6a1000, 0x1e6a3000, 0x1e6c1000, - 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, 0x1e701000, - 0x1e703000, 0x1e721000, 0x1e723000, 0x1e741000, - 0x1e743000, 0x1e761000, 0x1e763000, 0x1e781000, - 0x1e783000, 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, - 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, 0xf8268267, - 0xf82d023c, 0xf8301046, 0xf83d2083, 0xf8263290, - 0xf82d528c, 0xf8284299, 0xf8337160, 0xf8386286, - 0xf8bf820e, 0xf8a600e0, 0xf8af1353, 0xf8a922ea, - 0xf8b53396, 0xf8a251e3, 0xf8b340f4, 0xf8a470fd, - 0xf8a06209, 0xf8f48097, 0xf8f002ea, 0xf8eb10d9, - 0xf8ff21b0, 0xf8f7302c, 0xf8ee52a9, 0xf8f041fa, - 0xf8e471e4, 0xf8e863c6, 0xf864823d, 0xf87d013a, - 0xf86f1162, 0xf87d20e3, 0xf86132bb, 0xf870510e, - 0xf8704336, 0xf86572b4, 0xf8706217, 0xb83e8294, - 0xb8200264, 0xb8381284, 0xb8242358, 0xb8333102, - 0xb828530e, 0xb83042df, 0xb824703f, 0xb82a6194, - 0xb8a080e9, 0xb8b80090, 0xb8bb1146, 0xb8bb21b8, - 0xb8b032df, 0xb8b653f4, 0xb8bd41c9, 0xb8b47287, - 0xb8bc6169, 0xb8ee828c, 0xb8e10138, 0xb8f3126d, - 0xb8f020b0, 0xb8e03183, 0xb8e851ef, 0xb8f041e4, - 0xb8fe7005, 0xb8ea6376, 0xb8638120, 0xb873015d, - 0xb8781284, 0xb86723b8, 0xb86e3175, 0xb87b51ed, - 0xb87f41d1, 0xb863721e, 0xb87660f4, 0xce216874, - 0xce104533, 0xce648c15, 0xce8e3302, 0xce6e82ab, - 0xce6c87d1, 0xcec08063, 0xce638937, 0x25e0c358, - 0x25a1c7d3, 0x0580785a, 0x05426328, 0x05009892, - 0x25a0cc29, 0x2561cec8, 0x058044b3, 0x05401c99, - 0x05006b49, 0x25e0d6f7, 0x2561c528, 0x0583c8bc, - 0x0542522f, 0x05001ec0, 0x25e0de65, 0x25a1c113, - 0x05803cad, 0x0540f3c0, 0x0500ab15, 0x2560c28c, - 0x2561d7c0, 0x05801ed7, 0x0542633b, 0x05003696, - 0x2560d4b4, 0x25e1c918, 0x058021ff, 0x05400e15, - 0x0500f3de, 0x0473025a, 0x04bd05ab, 0x658e0025, - 0x658a08e2, 0x659a0493, 0x043e1062, 0x04f418b4, - 0x046d15bd, 0x04611fce, 0x04d6a07c, 0x04001929, - 0x041a09da, 0x04d098f4, 0x04db10d4, 0x0459a3ad, - 0x041aa029, 0x041919fb, 0x04d39e24, 0x04118302, - 0x04101dba, 0x04d7ae16, 0x04dea571, 0x04180210, - 0x05e786fc, 0x05e4915c, 0x04881cf1, 0x044a0f04, - 0x04090969, 0x048b16c4, 0x044101e4, 0x04dcbf44, - 0x65809745, 0x658d833f, 0x65c68468, 0x65c79b07, - 0x65829e38, 0x049dafca, 0x6582bba8, 0x65c0b7ff, - 0x65c1b4e0, 0x658dbadd, 0x65819a9d, 0x65ed9246, - 0x65b30815, 0x65e6263c, 0x65eebb94, 0x65bad14e, - 0x65efe178, 0x65fc5697, 0x65e07f14, 0x040c55a6, - 0x04977f4d, 0x043d3046, 0x04b733a0, 0x046830a4, - 0x04ed322d, 0x05686948, 0x05bd6c13, 0x65c88ef0, - 0x450db3d7, 0x4540b6d9, 0x043e3979, 0x445896ce, - 0x445a9005, 0x44d98069, 0x445b87ae, 0x04da348e, - 0x04982edb, 0x0499397f, 0x0408338c, 0x04ca309c, - 0x65c721e6, 0x65c63641, 0x65982882, 0x04812b8b, - 0x0e251083, 0x4e3712d5, 0x0e61101f, 0x4e6d118b, - 0x0eba1338, 0x4eb712d5, 0x2e31120f, 0x6e2e11ac, - 0x2e6810e6, 0x6e6f11cd, 0x2eaa1128, 0x6eb1120f, - + 0x05a18610, 0x05e18610, 0x0420bc31, 0x05271e11, + 0x6545e891, 0x6585e891, 0x65c5e891, 0x6545c891, + 0x6585c891, 0x65c5c891, 0x45b0c210, 0x45f1c231, + 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, + 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, + 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, + 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, + 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, + 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, + 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, + 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, + 0xf8268267, 0xf82d023c, 0xf8301046, 0xf83d2083, + 0xf8263290, 0xf82d528c, 0xf8284299, 0xf8337160, + 0xf8386286, 0xf8bf820e, 0xf8a600e0, 0xf8af1353, + 0xf8a922ea, 0xf8b53396, 0xf8a251e3, 0xf8b340f4, + 0xf8a470fd, 0xf8a06209, 0xf8f48097, 0xf8f002ea, + 0xf8eb10d9, 0xf8ff21b0, 0xf8f7302c, 0xf8ee52a9, + 0xf8f041fa, 0xf8e471e4, 0xf8e863c6, 0xf864823d, + 0xf87d013a, 0xf86f1162, 0xf87d20e3, 0xf86132bb, + 0xf870510e, 0xf8704336, 0xf86572b4, 0xf8706217, + 0xb83e8294, 0xb8200264, 0xb8381284, 0xb8242358, + 0xb8333102, 0xb828530e, 0xb83042df, 0xb824703f, + 0xb82a6194, 0xb8a080e9, 0xb8b80090, 0xb8bb1146, + 0xb8bb21b8, 0xb8b032df, 0xb8b653f4, 0xb8bd41c9, + 0xb8b47287, 0xb8bc6169, 0xb8ee828c, 0xb8e10138, + 0xb8f3126d, 0xb8f020b0, 0xb8e03183, 0xb8e851ef, + 0xb8f041e4, 0xb8fe7005, 0xb8ea6376, 0xb8638120, + 0xb873015d, 0xb8781284, 0xb86723b8, 0xb86e3175, + 0xb87b51ed, 0xb87f41d1, 0xb863721e, 0xb87660f4, + 0xce216874, 0xce104533, 0xce648c15, 0xce8e3302, + 0xce6e82ab, 0xce6c87d1, 0xcec08063, 0xce638937, + 0x25e0c358, 0x25a1c7d3, 0x0580785a, 0x05426328, + 0x05009892, 0x25a0cc29, 0x2561cec8, 0x058044b3, + 0x05401c99, 0x05006b49, 0x25e0d6f7, 0x2561c528, + 0x0583c8bc, 0x0542522f, 0x05001ec0, 0x25e0de65, + 0x25a1c113, 0x05803cad, 0x0540f3c0, 0x0500ab15, + 0x2560c28c, 0x2561d7c0, 0x05801ed7, 0x0542633b, + 0x05003696, 0x2560d4b4, 0x25e1c918, 0x058021ff, + 0x05400e15, 0x0500f3de, 0x0473025a, 0x04bd05ab, + 0x658e0025, 0x658a08e2, 0x659a0493, 0x043e1062, + 0x04f418b4, 0x046d15bd, 0x04611fce, 0x04d6a07c, + 0x04001929, 0x041a09da, 0x04d098f4, 0x04db10d4, + 0x0459a3ad, 0x041aa029, 0x041919fb, 0x04d39e24, + 0x04118302, 0x04101dba, 0x04d7ae16, 0x04dea571, + 0x04180210, 0x05e786fc, 0x05e4915c, 0x04881cf1, + 0x044a0f04, 0x04090969, 0x048b16c4, 0x044101e4, + 0x04dcbf44, 0x65809745, 0x658d833f, 0x65c68468, + 0x65c79b07, 0x65829e38, 0x049dafca, 0x6582bba8, + 0x65c0b7ff, 0x65c1b4e0, 0x658dbadd, 0x65819a9d, + 0x65ed9246, 0x65b30815, 0x65e6263c, 0x65eebb94, + 0x65bad14e, 0x65efe178, 0x65fc5697, 0x65e07f14, + 0x040c55a6, 0x04977f4d, 0x043d3046, 0x04b733a0, + 0x046830a4, 0x04ed322d, 0x05686948, 0x05bd6c13, + 0x65c88ef0, 0x450db3d7, 0x4540b6d9, 0x043e3979, + 0x445896ce, 0x445a9005, 0x44d98069, 0x445b87ae, + 0x04da348e, 0x04982edb, 0x0499397f, 0x0408338c, + 0x04ca309c, 0x65c721e6, 0x65c63641, 0x65982882, + 0x04812b8b, 0x0e251083, 0x4e3712d5, 0x0e61101f, + 0x4e6d118b, 0x0eba1338, 0x4eb712d5, 0x2e31120f, + 0x6e2e11ac, 0x2e6810e6, 0x6e6f11cd, 0x2eaa1128, + 0x6eb1120f, }; // END Generated code -- do not edit diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index bb69e5bfe80..88b34841e57 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -2755,6 +2755,36 @@ public class IRNode { vectorNode(EXPAND_BITS_VL, "ExpandBitsV", TYPE_LONG); } + public static final String EXPAND_VB = VECTOR_PREFIX + "EXPAND_VB" + POSTFIX; + static { + vectorNode(EXPAND_VB, "ExpandV", TYPE_BYTE); + } + + public static final String EXPAND_VS = VECTOR_PREFIX + "EXPAND_VS" + POSTFIX; + static { + vectorNode(EXPAND_VS, "ExpandV", TYPE_SHORT); + } + + public static final String EXPAND_VI = VECTOR_PREFIX + "EXPAND_VI" + POSTFIX; + static { + vectorNode(EXPAND_VI, "ExpandV", TYPE_INT); + } + + public static final String EXPAND_VL = VECTOR_PREFIX + "EXPAND_VL" + POSTFIX; + static { + vectorNode(EXPAND_VL, "ExpandV", TYPE_LONG); + } + + public static final String EXPAND_VF = VECTOR_PREFIX + "EXPAND_VF" + POSTFIX; + static { + vectorNode(EXPAND_VF, "ExpandV", TYPE_FLOAT); + } + + public static final String EXPAND_VD = VECTOR_PREFIX + "EXPAND_VD" + POSTFIX; + static { + vectorNode(EXPAND_VD, "ExpandV", TYPE_DOUBLE); + } + public static final String Z_LOAD_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "Z_LOAD_P_WITH_BARRIER_FLAG" + POSTFIX; static { String regex = START + "zLoadP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorExpandTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorExpandTest.java new file mode 100644 index 00000000000..ce8fc0fb7b0 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorExpandTest.java @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.vectorapi; + +import compiler.lib.generators.*; +import compiler.lib.ir_framework.*; +import jdk.incubator.vector.*; +import jdk.test.lib.Asserts; + +/** + * @test + * @bug 8363989 + * @key randomness + * @library /test/lib / + * @summary AArch64: Add missing backend support of VectorAPI expand operation + * @modules jdk.incubator.vector + * + * @run driver compiler.vectorapi.VectorExpandTest + */ + +public class VectorExpandTest { + static final VectorSpecies B_SPECIES = ByteVector.SPECIES_MAX; + static final VectorSpecies S_SPECIES = ShortVector.SPECIES_MAX; + static final VectorSpecies I_SPECIES = IntVector.SPECIES_MAX; + static final VectorSpecies F_SPECIES = FloatVector.SPECIES_MAX; + static final VectorSpecies L_SPECIES = LongVector.SPECIES_MAX; + static final VectorSpecies D_SPECIES = DoubleVector.SPECIES_MAX; + static final int LENGTH = 512; + static final Generators RD = Generators.G; + static byte[] ba, bb; + static short[] sa, sb; + static int[] ia, ib; + static long[] la, lb; + static float[] fa, fb; + static double[] da, db; + static boolean[] ma; + + static { + ba = new byte[LENGTH]; + bb = new byte[LENGTH]; + sa = new short[LENGTH]; + sb = new short[LENGTH]; + ia = new int[LENGTH]; + ib = new int[LENGTH]; + la = new long[LENGTH]; + lb = new long[LENGTH]; + fa = new float[LENGTH]; + fb = new float[LENGTH]; + da = new double[LENGTH]; + db = new double[LENGTH]; + ma = new boolean[LENGTH]; + + Generator iGen = RD.ints(); + Generator lGen = RD.longs(); + Generator fGen = RD.floats(); + Generator dGen = RD.doubles(); + + for (int i = 0; i < LENGTH; i++) { + ba[i] = iGen.next().byteValue(); + sa[i] = iGen.next().shortValue(); + ma[i] = iGen.next() % 2 == 0; + } + RD.fill(iGen, ia); + RD.fill(lGen, la); + RD.fill(fGen, fa); + RD.fill(dGen, da); + } + + @Test + @IR(counts = { IRNode.EXPAND_VB, "= 1" }, applyIfCPUFeature = { "asimd", "true" }) + public static void testVectorExpandByte(ByteVector av, VectorMask m) { + av.expand(m).intoArray(bb, 0); + } + + @Run(test = "testVectorExpandByte") + public static void testVectorExpandByte_runner() { + ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0); + VectorMask m = VectorMask.fromArray(B_SPECIES, ma, 0); + testVectorExpandByte(av, m); + int index = 0; + for (int i = 0; i < m.length(); i++) { + Asserts.assertEquals(m.laneIsSet(i) ? ba[index++] : (byte)0, bb[i]); + } + } + + @Test + @IR(counts = { IRNode.EXPAND_VS, "= 1" }, applyIfCPUFeature = { "asimd", "true" }) + public static void testVectorExpandShort(ShortVector av, VectorMask m) { + av.expand(m).intoArray(sb, 0); + } + + @Run(test = "testVectorExpandShort") + public static void testVectorExpandShort_runner() { + ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0); + VectorMask m = VectorMask.fromArray(S_SPECIES, ma, 0); + testVectorExpandShort(av, m); + int index = 0; + for (int i = 0; i < m.length(); i++) { + Asserts.assertEquals(m.laneIsSet(i) ? sa[index++] : (short)0, sb[i]); + } + } + + @Test + @IR(counts = { IRNode.EXPAND_VI, "= 1" }, applyIfCPUFeature = { "asimd", "true" }) + public static void testVectorExpandInt(IntVector av, VectorMask m) { + av.expand(m).intoArray(ib, 0); + } + + @Run(test = "testVectorExpandInt") + public static void testVectorExpandInt_runner() { + IntVector av = IntVector.fromArray(I_SPECIES, ia, 0); + VectorMask m = VectorMask.fromArray(I_SPECIES, ma, 0); + testVectorExpandInt(av, m); + int index = 0; + for (int i = 0; i < m.length(); i++) { + Asserts.assertEquals(m.laneIsSet(i) ? ia[index++] : (int)0, ib[i]); + } + } + + @Test + @IR(counts = { IRNode.EXPAND_VL, "= 1" }, applyIfCPUFeature = { "asimd", "true" }) + public static void testVectorExpandLong(LongVector av, VectorMask m) { + av.expand(m).intoArray(lb, 0); + } + + @Run(test = "testVectorExpandLong") + public static void testVectorExpandLong_runner() { + LongVector av = LongVector.fromArray(L_SPECIES, la, 0); + VectorMask m = VectorMask.fromArray(L_SPECIES, ma, 0); + testVectorExpandLong(av, m); + int index = 0; + for (int i = 0; i < m.length(); i++) { + Asserts.assertEquals(m.laneIsSet(i) ? la[index++] : (long)0, lb[i]); + } + } + + @Test + @IR(counts = { IRNode.EXPAND_VF, "= 1" }, applyIfCPUFeature = { "asimd", "true" }) + public static void testVectorExpandFloat(FloatVector av, VectorMask m) { + av.expand(m).intoArray(fb, 0); + } + + @Run(test = "testVectorExpandFloat") + public static void testVectorExpandFloat_runner() { + FloatVector av = FloatVector.fromArray(F_SPECIES, fa, 0); + VectorMask m = VectorMask.fromArray(F_SPECIES, ma, 0); + testVectorExpandFloat(av, m); + int index = 0; + for (int i = 0; i < m.length(); i++) { + Asserts.assertEquals(m.laneIsSet(i) ? fa[index++] : (float)0, fb[i]); + } + } + + @Test + @IR(counts = { IRNode.EXPAND_VD, "= 1" }, applyIfCPUFeature = { "asimd", "true" }) + public static void testVectorExpandDouble(DoubleVector av, VectorMask m) { + av.expand(m).intoArray(db, 0); + } + + @Run(test = "testVectorExpandDouble") + public static void testVectorExpandDouble_runner() { + DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, 0); + VectorMask m = VectorMask.fromArray(D_SPECIES, ma, 0); + testVectorExpandDouble(av, m); + int index = 0; + for (int i = 0; i < m.length(); i++) { + Asserts.assertEquals(m.laneIsSet(i) ? da[index++] : (double)0, db[i]); + } + } + + public static void main(String[] args) { + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(10000) + .addFlags("--add-modules=jdk.incubator.vector") + .start(); + } +}