mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8366333: AArch64: Enhance SVE subword type implementation of vector compress
Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Reviewed-by: jbhateja, xgong, galder, vlivanov
This commit is contained in:
parent
0522cf2ed9
commit
2de8d58552
@ -7081,29 +7081,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
|
||||
%}
|
||||
|
||||
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
|
||||
vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
|
||||
vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
|
||||
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
|
||||
$tmp1$$FloatRegister,$tmp2$$FloatRegister,
|
||||
$tmp3$$FloatRegister,$tmp4$$FloatRegister,
|
||||
$ptmp$$PRegister, $pgtmp$$PRegister);
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
|
||||
$ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompressS(vReg dst, vReg src, pReg pg,
|
||||
vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
|
||||
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_dup($tmp1$$FloatRegister, __ H, 0);
|
||||
__ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
|
||||
$tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
|
||||
length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
@ -5069,29 +5069,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
|
||||
%}
|
||||
|
||||
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
|
||||
vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
|
||||
vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
|
||||
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
|
||||
$tmp1$$FloatRegister,$tmp2$$FloatRegister,
|
||||
$tmp3$$FloatRegister,$tmp4$$FloatRegister,
|
||||
$ptmp$$PRegister, $pgtmp$$PRegister);
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
|
||||
$ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcompressS(vReg dst, vReg src, pReg pg,
|
||||
vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
|
||||
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
|
||||
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
|
||||
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
|
||||
match(Set dst (CompressV src pg));
|
||||
format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
__ sve_dup($tmp1$$FloatRegister, __ H, 0);
|
||||
__ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
|
||||
$tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
|
||||
length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
@ -3486,6 +3486,7 @@ public:
|
||||
INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
|
||||
INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
|
||||
INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
|
||||
INSN(sve_splice,0b00000101, 0b101100100); // splice two vectors under predicate control, destructive
|
||||
INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
|
||||
INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
|
||||
INSN(sve_umax, 0b00000100, 0b001001000); // unsigned maximum vectors
|
||||
|
||||
@ -2203,114 +2203,117 @@ void C2_MacroAssembler::sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t l
|
||||
// Pack active elements of src, under the control of mask, into the lowest-numbered elements of dst.
|
||||
// Any remaining elements of dst will be filled with zero.
|
||||
// Clobbers: rscratch1
|
||||
// Preserves: src, mask
|
||||
// Preserves: mask, vzr
|
||||
void C2_MacroAssembler::sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
PRegister pgtmp) {
|
||||
FloatRegister vzr, FloatRegister vtmp,
|
||||
PRegister pgtmp, unsigned vector_length_in_bytes) {
|
||||
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
|
||||
assert_different_registers(dst, src, vtmp1, vtmp2);
|
||||
// When called by sve_compress_byte, src and vtmp may be the same register.
|
||||
assert_different_registers(dst, src, vzr);
|
||||
assert_different_registers(dst, vtmp, vzr);
|
||||
assert_different_registers(mask, pgtmp);
|
||||
|
||||
// Example input: src = 8888 7777 6666 5555 4444 3333 2222 1111
|
||||
// mask = 0001 0000 0000 0001 0001 0000 0001 0001
|
||||
// Expected result: dst = 0000 0000 0000 8888 5555 4444 2222 1111
|
||||
sve_dup(vtmp2, H, 0);
|
||||
// high <-- low
|
||||
// Example input: src = hh gg ff ee dd cc bb aa, one character is 8 bits.
|
||||
// mask = 01 00 00 01 01 00 01 01, one character is 1 bit.
|
||||
// Expected result: dst = 00 00 00 hh ee dd bb aa
|
||||
|
||||
// Extend lowest half to type INT.
|
||||
// dst = 00004444 00003333 00002222 00001111
|
||||
// dst = 00dd 00cc 00bb 00aa
|
||||
sve_uunpklo(dst, S, src);
|
||||
// pgtmp = 00000001 00000000 00000001 00000001
|
||||
// pgtmp = 0001 0000 0001 0001
|
||||
sve_punpklo(pgtmp, mask);
|
||||
// Pack the active elements in size of type INT to the right,
|
||||
// and fill the remainings with zero.
|
||||
// dst = 00000000 00004444 00002222 00001111
|
||||
// dst = 0000 00dd 00bb 00aa
|
||||
sve_compact(dst, S, dst, pgtmp);
|
||||
// Narrow the result back to type SHORT.
|
||||
// dst = 0000 0000 0000 0000 0000 4444 2222 1111
|
||||
sve_uzp1(dst, H, dst, vtmp2);
|
||||
// dst = 00 00 00 00 00 dd bb aa
|
||||
sve_uzp1(dst, H, dst, vzr);
|
||||
|
||||
// Return if the vector length is no more than MaxVectorSize/2, since the
|
||||
// highest half is invalid.
|
||||
if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Count the active elements of lowest half.
|
||||
// rscratch1 = 3
|
||||
sve_cntp(rscratch1, S, ptrue, pgtmp);
|
||||
|
||||
// Repeat to the highest half.
|
||||
// pgtmp = 00000001 00000000 00000000 00000001
|
||||
// pgtmp = 0001 0000 0000 0001
|
||||
sve_punpkhi(pgtmp, mask);
|
||||
// vtmp1 = 00008888 00007777 00006666 00005555
|
||||
sve_uunpkhi(vtmp1, S, src);
|
||||
// vtmp1 = 00000000 00000000 00008888 00005555
|
||||
sve_compact(vtmp1, S, vtmp1, pgtmp);
|
||||
// vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
|
||||
sve_uzp1(vtmp1, H, vtmp1, vtmp2);
|
||||
// vtmp = 00hh 00gg 00ff 00ee
|
||||
sve_uunpkhi(vtmp, S, src);
|
||||
// vtmp = 0000 0000 00hh 00ee
|
||||
sve_compact(vtmp, S, vtmp, pgtmp);
|
||||
// vtmp = 00 00 00 00 00 00 hh ee
|
||||
sve_uzp1(vtmp, H, vtmp, vzr);
|
||||
|
||||
// Compressed low: dst = 0000 0000 0000 0000 0000 4444 2222 1111
|
||||
// Compressed high: vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
|
||||
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
|
||||
// TRUE_CNT is the number of active elements in the compressed low.
|
||||
neg(rscratch1, rscratch1);
|
||||
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
|
||||
sve_index(vtmp2, H, rscratch1, 1);
|
||||
// vtmp1 = 0000 0000 0000 8888 5555 0000 0000 0000
|
||||
sve_tbl(vtmp1, H, vtmp1, vtmp2);
|
||||
|
||||
// Combine the compressed high(after shifted) with the compressed low.
|
||||
// dst = 0000 0000 0000 8888 5555 4444 2222 1111
|
||||
sve_orr(dst, dst, vtmp1);
|
||||
// pgtmp = 00 00 00 00 00 01 01 01
|
||||
sve_whilelt(pgtmp, H, zr, rscratch1);
|
||||
// Compressed low: dst = 00 00 00 00 00 dd bb aa
|
||||
// Compressed high: vtmp = 00 00 00 00 00 00 hh ee
|
||||
// Combine the compressed low with the compressed high:
|
||||
// dst = 00 00 00 hh ee dd bb aa
|
||||
sve_splice(dst, H, pgtmp, vtmp);
|
||||
}
|
||||
|
||||
// Clobbers: rscratch1, rscratch2
|
||||
// Preserves: src, mask
|
||||
void C2_MacroAssembler::sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, FloatRegister vtmp4,
|
||||
PRegister ptmp, PRegister pgtmp) {
|
||||
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
|
||||
PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes) {
|
||||
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
|
||||
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3, vtmp4);
|
||||
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3);
|
||||
assert_different_registers(mask, ptmp, pgtmp);
|
||||
// Example input: src = 88 77 66 55 44 33 22 11
|
||||
// mask = 01 00 00 01 01 00 01 01
|
||||
// Expected result: dst = 00 00 00 88 55 44 22 11
|
||||
// high <-- low
|
||||
// Example input: src = q p n m l k j i h g f e d c b a, one character is 8 bits.
|
||||
// mask = 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 1, one character is 1 bit.
|
||||
// Expected result: dst = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
|
||||
FloatRegister vzr = vtmp3;
|
||||
sve_dup(vzr, B, 0);
|
||||
|
||||
sve_dup(vtmp4, B, 0);
|
||||
// Extend lowest half to type SHORT.
|
||||
// vtmp1 = 0044 0033 0022 0011
|
||||
// vtmp1 = 0h 0g 0f 0e 0d 0c 0b 0a
|
||||
sve_uunpklo(vtmp1, H, src);
|
||||
// ptmp = 0001 0000 0001 0001
|
||||
// ptmp = 00 01 00 00 00 01 00 01
|
||||
sve_punpklo(ptmp, mask);
|
||||
// Pack the active elements in size of type SHORT to the right,
|
||||
// and fill the remainings with zero.
|
||||
// dst = 00 00 00 00 00 0g 0c 0a
|
||||
unsigned extended_size = vector_length_in_bytes << 1;
|
||||
sve_compress_short(dst, vtmp1, ptmp, vzr, vtmp2, pgtmp, extended_size > MaxVectorSize ? MaxVectorSize : extended_size);
|
||||
// Narrow the result back to type BYTE.
|
||||
// dst = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
|
||||
sve_uzp1(dst, B, dst, vzr);
|
||||
|
||||
// Return if the vector length is no more than MaxVectorSize/2, since the
|
||||
// highest half is invalid.
|
||||
if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
|
||||
return;
|
||||
}
|
||||
// Count the active elements of lowest half.
|
||||
// rscratch2 = 3
|
||||
sve_cntp(rscratch2, H, ptrue, ptmp);
|
||||
// Pack the active elements in size of type SHORT to the right,
|
||||
// and fill the remainings with zero.
|
||||
// dst = 0000 0044 0022 0011
|
||||
sve_compress_short(dst, vtmp1, ptmp, vtmp2, vtmp3, pgtmp);
|
||||
// Narrow the result back to type BYTE.
|
||||
// dst = 00 00 00 00 00 44 22 11
|
||||
sve_uzp1(dst, B, dst, vtmp4);
|
||||
|
||||
// Repeat to the highest half.
|
||||
// ptmp = 0001 0000 0000 0001
|
||||
// ptmp = 00 01 00 00 00 00 00 01
|
||||
sve_punpkhi(ptmp, mask);
|
||||
// vtmp1 = 0088 0077 0066 0055
|
||||
// vtmp2 = 0q 0p 0n 0m 0l 0k 0j 0i
|
||||
sve_uunpkhi(vtmp2, H, src);
|
||||
// vtmp1 = 0000 0000 0088 0055
|
||||
sve_compress_short(vtmp1, vtmp2, ptmp, vtmp3, vtmp4, pgtmp);
|
||||
// vtmp1 = 00 00 00 00 00 00 0p 0i
|
||||
sve_compress_short(vtmp1, vtmp2, ptmp, vzr, vtmp2, pgtmp, extended_size - MaxVectorSize);
|
||||
// vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
|
||||
sve_uzp1(vtmp1, B, vtmp1, vzr);
|
||||
|
||||
sve_dup(vtmp4, B, 0);
|
||||
// vtmp1 = 00 00 00 00 00 00 88 55
|
||||
sve_uzp1(vtmp1, B, vtmp1, vtmp4);
|
||||
|
||||
// Compressed low: dst = 00 00 00 00 00 44 22 11
|
||||
// Compressed high: vtmp1 = 00 00 00 00 00 00 88 55
|
||||
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
|
||||
// TRUE_CNT is the number of active elements in the compressed low.
|
||||
neg(rscratch2, rscratch2);
|
||||
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
|
||||
sve_index(vtmp2, B, rscratch2, 1);
|
||||
// vtmp1 = 00 00 00 88 55 00 00 00
|
||||
sve_tbl(vtmp1, B, vtmp1, vtmp2);
|
||||
// Combine the compressed high(after shifted) with the compressed low.
|
||||
// dst = 00 00 00 88 55 44 22 11
|
||||
sve_orr(dst, dst, vtmp1);
|
||||
// ptmp = 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1
|
||||
sve_whilelt(ptmp, B, zr, rscratch2);
|
||||
// Compressed low: dst = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
|
||||
// Compressed high: vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
|
||||
// Combine the compressed low with the compressed high:
|
||||
// dst = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
|
||||
sve_splice(dst, B, ptmp, vtmp1);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {
|
||||
|
||||
@ -173,13 +173,12 @@
|
||||
// lowest-numbered elements of dst. Any remaining elements of dst will
|
||||
// be filled with zero.
|
||||
void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, FloatRegister vtmp4,
|
||||
PRegister ptmp, PRegister pgtmp);
|
||||
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
|
||||
PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes);
|
||||
|
||||
void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
PRegister pgtmp);
|
||||
FloatRegister vzr, FloatRegister vtmp,
|
||||
PRegister pgtmp, unsigned vector_length_in_bytes);
|
||||
|
||||
void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
|
||||
|
||||
|
||||
@ -2143,6 +2143,10 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["facge", "__ sve_fac(Assembler::GE, p1, __ H, p2, z4, z5);", "facge\tp1.h, p2/z, z4.h, z5.h"],
|
||||
["facge", "__ sve_fac(Assembler::GE, p1, __ S, p2, z4, z5);", "facge\tp1.s, p2/z, z4.s, z5.s"],
|
||||
["facge", "__ sve_fac(Assembler::GE, p1, __ D, p2, z4, z5);", "facge\tp1.d, p2/z, z4.d, z5.d"],
|
||||
["splice", "__ sve_splice(z0, __ B, p0, z1);", "splice\tz0.b, p0, z0.b, z1.b"],
|
||||
["splice", "__ sve_splice(z0, __ H, p0, z1);", "splice\tz0.h, p0, z0.h, z1.h"],
|
||||
["splice", "__ sve_splice(z0, __ S, p0, z1);", "splice\tz0.s, p0, z0.s, z1.s"],
|
||||
["splice", "__ sve_splice(z0, __ D, p0, z1);", "splice\tz0.d, p0, z0.d, z1.d"],
|
||||
# SVE2 instructions
|
||||
["histcnt", "__ sve_histcnt(z16, __ S, p0, z16, z16);", "histcnt\tz16.s, p0/z, z16.s, z16.s"],
|
||||
["histcnt", "__ sve_histcnt(z17, __ D, p0, z17, z17);", "histcnt\tz17.d, p0/z, z17.d, z17.d"],
|
||||
|
||||
@ -1156,6 +1156,10 @@
|
||||
__ sve_fac(Assembler::GE, p1, __ H, p2, z4, z5); // facge p1.h, p2/z, z4.h, z5.h
|
||||
__ sve_fac(Assembler::GE, p1, __ S, p2, z4, z5); // facge p1.s, p2/z, z4.s, z5.s
|
||||
__ sve_fac(Assembler::GE, p1, __ D, p2, z4, z5); // facge p1.d, p2/z, z4.d, z5.d
|
||||
__ sve_splice(z0, __ B, p0, z1); // splice z0.b, p0, z0.b, z1.b
|
||||
__ sve_splice(z0, __ H, p0, z1); // splice z0.h, p0, z0.h, z1.h
|
||||
__ sve_splice(z0, __ S, p0, z1); // splice z0.s, p0, z0.s, z1.s
|
||||
__ sve_splice(z0, __ D, p0, z1); // splice z0.d, p0, z0.d, z1.d
|
||||
__ sve_histcnt(z16, __ S, p0, z16, z16); // histcnt z16.s, p0/z, z16.s, z16.s
|
||||
__ sve_histcnt(z17, __ D, p0, z17, z17); // histcnt z17.d, p0/z, z17.d, z17.d
|
||||
|
||||
@ -1445,30 +1449,30 @@
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140004b7, 0x94000000,
|
||||
0x97ffffd4, 0x940004b4, 0x3400000a, 0x34fffa2a,
|
||||
0x3400962a, 0x35000008, 0x35fff9c8, 0x350095c8,
|
||||
0xb400000b, 0xb4fff96b, 0xb400956b, 0xb500001d,
|
||||
0xb5fff91d, 0xb500951d, 0x10000013, 0x10fff8b3,
|
||||
0x100094b3, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36309436, 0x3758000c, 0x375ff7cc, 0x375893cc,
|
||||
0x14000000, 0x17ffffd7, 0x140004bb, 0x94000000,
|
||||
0x97ffffd4, 0x940004b8, 0x3400000a, 0x34fffa2a,
|
||||
0x340096aa, 0x35000008, 0x35fff9c8, 0x35009648,
|
||||
0xb400000b, 0xb4fff96b, 0xb40095eb, 0xb500001d,
|
||||
0xb5fff91d, 0xb500959d, 0x10000013, 0x10fff8b3,
|
||||
0x10009533, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x363094b6, 0x3758000c, 0x375ff7cc, 0x3758944c,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x540091a0, 0x54000001, 0x54fff541, 0x54009141,
|
||||
0x54000002, 0x54fff4e2, 0x540090e2, 0x54000002,
|
||||
0x54fff482, 0x54009082, 0x54000003, 0x54fff423,
|
||||
0x54009023, 0x54000003, 0x54fff3c3, 0x54008fc3,
|
||||
0x54000004, 0x54fff364, 0x54008f64, 0x54000005,
|
||||
0x54fff305, 0x54008f05, 0x54000006, 0x54fff2a6,
|
||||
0x54008ea6, 0x54000007, 0x54fff247, 0x54008e47,
|
||||
0x54000008, 0x54fff1e8, 0x54008de8, 0x54000009,
|
||||
0x54fff189, 0x54008d89, 0x5400000a, 0x54fff12a,
|
||||
0x54008d2a, 0x5400000b, 0x54fff0cb, 0x54008ccb,
|
||||
0x5400000c, 0x54fff06c, 0x54008c6c, 0x5400000d,
|
||||
0x54fff00d, 0x54008c0d, 0x5400000e, 0x54ffefae,
|
||||
0x54008bae, 0x5400000f, 0x54ffef4f, 0x54008b4f,
|
||||
0x54009220, 0x54000001, 0x54fff541, 0x540091c1,
|
||||
0x54000002, 0x54fff4e2, 0x54009162, 0x54000002,
|
||||
0x54fff482, 0x54009102, 0x54000003, 0x54fff423,
|
||||
0x540090a3, 0x54000003, 0x54fff3c3, 0x54009043,
|
||||
0x54000004, 0x54fff364, 0x54008fe4, 0x54000005,
|
||||
0x54fff305, 0x54008f85, 0x54000006, 0x54fff2a6,
|
||||
0x54008f26, 0x54000007, 0x54fff247, 0x54008ec7,
|
||||
0x54000008, 0x54fff1e8, 0x54008e68, 0x54000009,
|
||||
0x54fff189, 0x54008e09, 0x5400000a, 0x54fff12a,
|
||||
0x54008daa, 0x5400000b, 0x54fff0cb, 0x54008d4b,
|
||||
0x5400000c, 0x54fff06c, 0x54008cec, 0x5400000d,
|
||||
0x54fff00d, 0x54008c8d, 0x5400000e, 0x54ffefae,
|
||||
0x54008c2e, 0x5400000f, 0x54ffef4f, 0x54008bcf,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
||||
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
||||
@ -1689,7 +1693,8 @@
|
||||
0x05a14c00, 0x05e14c00, 0x05304001, 0x05314001,
|
||||
0x05a18610, 0x05e18610, 0x0420bc31, 0x05271e11,
|
||||
0x6545e891, 0x6585e891, 0x65c5e891, 0x6545c891,
|
||||
0x6585c891, 0x65c5c891, 0x45b0c210, 0x45f1c231,
|
||||
0x6585c891, 0x65c5c891, 0x052c8020, 0x056c8020,
|
||||
0x05ac8020, 0x05ec8020, 0x45b0c210, 0x45f1c231,
|
||||
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
|
||||
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
|
||||
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
|
||||
|
||||
@ -2840,6 +2840,36 @@ public class IRNode {
|
||||
vectorNode(EXPAND_BITS_VL, "ExpandBitsV", TYPE_LONG);
|
||||
}
|
||||
|
||||
public static final String COMPRESS_VB = VECTOR_PREFIX + "COMPRESS_VB" + POSTFIX;
|
||||
static {
|
||||
vectorNode(COMPRESS_VB, "CompressV", TYPE_BYTE);
|
||||
}
|
||||
|
||||
public static final String COMPRESS_VS = VECTOR_PREFIX + "COMPRESS_VS" + POSTFIX;
|
||||
static {
|
||||
vectorNode(COMPRESS_VS, "CompressV", TYPE_SHORT);
|
||||
}
|
||||
|
||||
public static final String COMPRESS_VI = VECTOR_PREFIX + "COMPRESS_VI" + POSTFIX;
|
||||
static {
|
||||
vectorNode(COMPRESS_VI, "CompressV", TYPE_INT);
|
||||
}
|
||||
|
||||
public static final String COMPRESS_VL = VECTOR_PREFIX + "COMPRESS_VL" + POSTFIX;
|
||||
static {
|
||||
vectorNode(COMPRESS_VL, "CompressV", TYPE_LONG);
|
||||
}
|
||||
|
||||
public static final String COMPRESS_VF = VECTOR_PREFIX + "COMPRESS_VF" + POSTFIX;
|
||||
static {
|
||||
vectorNode(COMPRESS_VF, "CompressV", TYPE_FLOAT);
|
||||
}
|
||||
|
||||
public static final String COMPRESS_VD = VECTOR_PREFIX + "COMPRESS_VD" + POSTFIX;
|
||||
static {
|
||||
vectorNode(COMPRESS_VD, "CompressV", TYPE_DOUBLE);
|
||||
}
|
||||
|
||||
public static final String EXPAND_VB = VECTOR_PREFIX + "EXPAND_VB" + POSTFIX;
|
||||
static {
|
||||
vectorNode(EXPAND_VB, "ExpandV", TYPE_BYTE);
|
||||
|
||||
@ -106,6 +106,7 @@ public class IREncodingPrinter {
|
||||
"avx512_fp16",
|
||||
"avx512_vnni",
|
||||
"avx512_vbmi",
|
||||
"avx512_vbmi2",
|
||||
"avx10_2",
|
||||
"bmi2",
|
||||
// AArch64
|
||||
|
||||
246
test/hotspot/jtreg/compiler/vectorapi/VectorCompressTest.java
Normal file
246
test/hotspot/jtreg/compiler/vectorapi/VectorCompressTest.java
Normal file
@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.generators.*;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.test.lib.Asserts;
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8366333
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @summary IR test for VectorAPI compress
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver compiler.vectorapi.VectorCompressTest
|
||||
*/
|
||||
|
||||
public class VectorCompressTest {
|
||||
static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Float> F_SPECIES = FloatVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
|
||||
static final VectorSpecies<Double> D_SPECIES = DoubleVector.SPECIES_MAX;
|
||||
static final int LENGTH = 512;
|
||||
static final Generators RD = Generators.G;
|
||||
static byte[] ba, bb;
|
||||
static short[] sa, sb;
|
||||
static int[] ia, ib;
|
||||
static long[] la, lb;
|
||||
static float[] fa, fb;
|
||||
static double[] da, db;
|
||||
static boolean[] ma;
|
||||
|
||||
static {
|
||||
ba = new byte[LENGTH];
|
||||
bb = new byte[LENGTH];
|
||||
sa = new short[LENGTH];
|
||||
sb = new short[LENGTH];
|
||||
ia = new int[LENGTH];
|
||||
ib = new int[LENGTH];
|
||||
la = new long[LENGTH];
|
||||
lb = new long[LENGTH];
|
||||
fa = new float[LENGTH];
|
||||
fb = new float[LENGTH];
|
||||
da = new double[LENGTH];
|
||||
db = new double[LENGTH];
|
||||
ma = new boolean[LENGTH];
|
||||
|
||||
Generator<Integer> iGen = RD.ints();
|
||||
Generator<Long> lGen = RD.longs();
|
||||
Generator<Float> fGen = RD.floats();
|
||||
Generator<Double> dGen = RD.doubles();
|
||||
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ba[i] = iGen.next().byteValue();
|
||||
sa[i] = iGen.next().shortValue();
|
||||
ma[i] = iGen.next() % 2 == 0;
|
||||
}
|
||||
RD.fill(iGen, ia);
|
||||
RD.fill(lGen, la);
|
||||
RD.fill(fGen, fa);
|
||||
RD.fill(dGen, da);
|
||||
}
|
||||
|
||||
@DontInline
|
||||
static void verifyVectorCompressByte(int vlen) {
|
||||
int index = 0;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
if (ma[i]) {
|
||||
Asserts.assertEquals(ba[i], bb[index++]);
|
||||
}
|
||||
}
|
||||
for (int i = index; i < vlen; i++) {
|
||||
Asserts.assertEquals((byte)0, bb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@DontInline
|
||||
static void verifyVectorCompressShort(int vlen) {
|
||||
int index = 0;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
if (ma[i]) {
|
||||
Asserts.assertEquals(sa[i], sb[index++]);
|
||||
}
|
||||
}
|
||||
for (int i = index; i < vlen; i++) {
|
||||
Asserts.assertEquals((short)0, sb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@DontInline
|
||||
static void verifyVectorCompressInteger(int vlen) {
|
||||
int index = 0;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
if (ma[i]) {
|
||||
Asserts.assertEquals(ia[i], ib[index++]);
|
||||
}
|
||||
}
|
||||
for (int i = index; i < vlen; i++) {
|
||||
Asserts.assertEquals(0, ib[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@DontInline
|
||||
static void verifyVectorCompressLong(int vlen) {
|
||||
int index = 0;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
if (ma[i]) {
|
||||
Asserts.assertEquals(la[i], lb[index++]);
|
||||
}
|
||||
}
|
||||
for (int i = index; i < vlen; i++) {
|
||||
Asserts.assertEquals(0L, lb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@DontInline
|
||||
static void verifyVectorCompressFloat(int vlen) {
|
||||
int index = 0;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
if (ma[i]) {
|
||||
Asserts.assertEquals(fa[i], fb[index++]);
|
||||
}
|
||||
}
|
||||
for (int i = index; i < vlen; i++) {
|
||||
Asserts.assertEquals(0.0f, fb[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@DontInline
|
||||
static void verifyVectorCompressDouble(int vlen) {
|
||||
int index = 0;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
if (ma[i]) {
|
||||
Asserts.assertEquals(da[i], db[index++]);
|
||||
}
|
||||
}
|
||||
for (int i = index; i < vlen; i++) {
|
||||
Asserts.assertEquals(0.0, db[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.COMPRESS_VB, "= 1" },
|
||||
applyIfCPUFeature = { "sve", "true" })
|
||||
@IR(counts = { IRNode.COMPRESS_VB, "= 1" },
|
||||
applyIfCPUFeatureAnd = {"avx512_vbmi2", "true", "avx512vl", "true"})
|
||||
public static void testVectorCompressByte() {
|
||||
ByteVector av = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||
VectorMask<Byte> m = VectorMask.fromArray(B_SPECIES, ma, 0);
|
||||
av.compress(m).intoArray(bb, 0);
|
||||
verifyVectorCompressByte(B_SPECIES.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.COMPRESS_VS, "= 1" },
|
||||
applyIfCPUFeature = { "sve", "true" })
|
||||
@IR(counts = { IRNode.COMPRESS_VS, "= 1" },
|
||||
applyIfCPUFeatureAnd = {"avx512_vbmi2", "true", "avx512vl", "true"})
|
||||
public static void testVectorCompressShort() {
|
||||
ShortVector av = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||
VectorMask<Short> m = VectorMask.fromArray(S_SPECIES, ma, 0);
|
||||
av.compress(m).intoArray(sb, 0);
|
||||
verifyVectorCompressShort(S_SPECIES.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.COMPRESS_VI, "= 1" },
|
||||
applyIfCPUFeature = { "sve", "true" })
|
||||
@IR(counts = { IRNode.COMPRESS_VI, "= 1" },
|
||||
applyIfCPUFeatureAnd = {"avx512f", "true", "avx512vl", "true"})
|
||||
public static void testVectorCompressInt() {
|
||||
IntVector av = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||
VectorMask<Integer> m = VectorMask.fromArray(I_SPECIES, ma, 0);
|
||||
av.compress(m).intoArray(ib, 0);
|
||||
verifyVectorCompressInteger(I_SPECIES.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.COMPRESS_VL, "= 1" },
|
||||
applyIfCPUFeature = { "sve", "true" })
|
||||
@IR(counts = { IRNode.COMPRESS_VL, "= 1" },
|
||||
applyIfCPUFeatureAnd = {"avx512f", "true", "avx512vl", "true"})
|
||||
public static void testVectorCompressLong() {
|
||||
LongVector av = LongVector.fromArray(L_SPECIES, la, 0);
|
||||
VectorMask<Long> m = VectorMask.fromArray(L_SPECIES, ma, 0);
|
||||
av.compress(m).intoArray(lb, 0);
|
||||
verifyVectorCompressLong(L_SPECIES.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.COMPRESS_VF, "= 1" },
|
||||
applyIfCPUFeature = { "sve", "true" })
|
||||
@IR(counts = { IRNode.COMPRESS_VF, "= 1" },
|
||||
applyIfCPUFeatureAnd = {"avx512f", "true", "avx512vl", "true"})
|
||||
public static void testVectorCompressFloat() {
|
||||
FloatVector av = FloatVector.fromArray(F_SPECIES, fa, 0);
|
||||
VectorMask<Float> m = VectorMask.fromArray(F_SPECIES, ma, 0);
|
||||
av.compress(m).intoArray(fb, 0);
|
||||
verifyVectorCompressFloat(F_SPECIES.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.COMPRESS_VD, "= 1" },
|
||||
applyIfCPUFeature = { "sve", "true" })
|
||||
@IR(counts = { IRNode.COMPRESS_VD, "= 1" },
|
||||
applyIfCPUFeatureAnd = {"avx512f", "true", "avx512vl", "true"})
|
||||
public static void testVectorCompressDouble() {
|
||||
DoubleVector av = DoubleVector.fromArray(D_SPECIES, da, 0);
|
||||
VectorMask<Double> m = VectorMask.fromArray(D_SPECIES, ma, 0);
|
||||
av.compress(m).intoArray(db, 0);
|
||||
verifyVectorCompressDouble(D_SPECIES.length());
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework testFramework = new TestFramework();
|
||||
testFramework.setDefaultWarmup(10000)
|
||||
.addFlags("--add-modules=jdk.incubator.vector")
|
||||
.start();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user