8307572: AArch64: Vector registers are clobbered by some macroassemblers

Reviewed-by: aph, adinn
This commit is contained in:
Ningsheng Jian 2023-05-12 02:05:18 +00:00
parent 9a7b4431ec
commit 33d9a85730
6 changed files with 391 additions and 57 deletions

View File

@ -17105,14 +17105,17 @@ instruct string_compareUU_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI
%}
instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
%{
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
TEMP vtmp0, TEMP vtmp1, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU) "
"# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
ins_encode %{
__ string_indexof($str1$$Register, $str2$$Register,
@ -17126,14 +17129,17 @@ instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
%}
instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
%{
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
TEMP vtmp0, TEMP vtmp1, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL) "
"# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
ins_encode %{
__ string_indexof($str1$$Register, $str2$$Register,
@ -17147,14 +17153,17 @@ instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
%}
instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,iRegINoSp tmp3,
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
%{
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
TEMP tmp6, TEMP vtmp0, TEMP vtmp1, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL) "
"# KILL $str1 cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
ins_encode %{
__ string_indexof($str1$$Register, $str2$$Register,
@ -17168,14 +17177,15 @@ instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
%}
instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU) "
"# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
ins_encode %{
int icnt2 = (int)$int_cnt2$$constant;
@ -17189,14 +17199,15 @@ instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
%}
instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL) "
"# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
ins_encode %{
int icnt2 = (int)$int_cnt2$$constant;
@ -17210,14 +17221,15 @@ instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
%}
instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
%{
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL) "
"# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
ins_encode %{
int icnt2 = (int)$int_cnt2$$constant;
@ -17334,13 +17346,17 @@ instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
iRegP_R10 tmp, rFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (AryEq ary1 ary2));
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
TEMP vtmp6, TEMP vtmp7, KILL cr);
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
ins_encode %{
address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
@ -17355,13 +17371,17 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
iRegP_R10 tmp, rFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (AryEq ary1 ary2));
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
TEMP vtmp6, TEMP vtmp7, KILL cr);
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
ins_encode %{
address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
@ -17391,36 +17411,39 @@ instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg
// fast char[] to byte[] compression
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V0 tmp1, vRegD_V1 tmp2,
vRegD_V2 tmp3, vRegD_V3 tmp4,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
iRegI_R0 result, rFlagsReg cr)
%{
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4,
effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
USE_KILL src, USE_KILL dst, USE len, KILL cr);
format %{ "String Compress $src,$dst,$len -> $result // KILL $src,$dst" %}
format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
ins_encode %{
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
$result$$Register,
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
$tmp3$$FloatRegister, $tmp4$$FloatRegister);
$result$$Register, $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
$vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
%}
ins_pipe(pipe_slow);
%}
// fast byte[] to char[] inflation
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
%{
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
ins_encode %{
address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
$tmp3$$FloatRegister, $tmp4$$Register);
$vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
$vtmp2$$FloatRegister, $tmp$$Register);
if (tpc == NULL) {
ciEnv::current()->record_failure("CodeCache is full");
return;
@ -17431,41 +17454,43 @@ instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len
// encode char[] to byte[] in ISO_8859_1
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V0 vtmp0, vRegD_V1 vtmp1,
vRegD_V2 vtmp2, vRegD_V3 vtmp3,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
iRegI_R0 result, rFlagsReg cr)
%{
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(USE_KILL src, USE_KILL dst, USE len,
KILL vtmp0, KILL vtmp1, KILL vtmp2, KILL vtmp3, KILL cr);
effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
format %{ "Encode ISO array $src,$dst,$len -> $result" %}
format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
ins_encode %{
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
$result$$Register, false,
$vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister);
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
$vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
%}
ins_pipe(pipe_class_memory);
%}
instruct encode_ascii_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
vRegD_V0 vtmp0, vRegD_V1 vtmp1,
vRegD_V2 vtmp2, vRegD_V3 vtmp3,
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
iRegI_R0 result, rFlagsReg cr)
%{
predicate(((EncodeISOArrayNode*)n)->is_ascii());
match(Set result (EncodeISOArray src (Binary dst len)));
effect(USE_KILL src, USE_KILL dst, USE len,
KILL vtmp0, KILL vtmp1, KILL vtmp2, KILL vtmp3, KILL cr);
effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
format %{ "Encode ASCII array $src,$dst,$len -> $result" %}
format %{ "Encode ASCII array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
ins_encode %{
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
$result$$Register, true,
$vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister);
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
$vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
%}
ins_pipe(pipe_class_memory);
%}

View File

@ -46,6 +46,7 @@
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
// Search for str1 in str2 and return index or -1
// Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1.
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
Register cnt2, Register cnt1,
Register tmp1, Register tmp2,

View File

@ -5008,6 +5008,8 @@ address MacroAssembler::count_positives(Register ary1, Register len, Register re
return pc();
}
// Clobbers: rscratch1, rscratch2, rflags
// May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
Register tmp4, Register tmp5, Register result,
Register cnt1, int elem_size) {
@ -5557,10 +5559,12 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
// Using 'umaxv' in the ASCII-case comes with a small penalty but does
// avoid additional bloat.
//
// Clobbers: src, dst, res, rscratch1, rscratch2, rflags
void MacroAssembler::encode_iso_array(Register src, Register dst,
Register len, Register res, bool ascii,
FloatRegister vtmp0, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3)
FloatRegister vtmp2, FloatRegister vtmp3,
FloatRegister vtmp4, FloatRegister vtmp5)
{
Register cnt = res;
Register max = rscratch1;
@ -5579,8 +5583,8 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
br(LT, DONE_32);
ld1(vtmp0, vtmp1, vtmp2, vtmp3, T8H, Address(post(src, 64)));
// Extract lower bytes.
FloatRegister vlo0 = v4;
FloatRegister vlo1 = v5;
FloatRegister vlo0 = vtmp4;
FloatRegister vlo1 = vtmp5;
uzp1(vlo0, T16B, vtmp0, vtmp1);
uzp1(vlo1, T16B, vtmp2, vtmp3);
// Merge bits...
@ -5653,6 +5657,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
}
// Inflate byte[] array to char[].
// Clobbers: src, dst, len, rflags, rscratch1, v0-v6
address MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, Register tmp4) {
@ -5761,8 +5766,9 @@ address MacroAssembler::byte_array_inflate(Register src, Register dst, Register
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
Register res,
FloatRegister tmp0, FloatRegister tmp1,
FloatRegister tmp2, FloatRegister tmp3) {
encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3);
FloatRegister tmp2, FloatRegister tmp3,
FloatRegister tmp4, FloatRegister tmp5) {
encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
// Adjust result: res == len ? len : 0
cmp(len, res);
csel(res, res, zr, EQ);

View File

@ -1393,12 +1393,14 @@ public:
void char_array_compress(Register src, Register dst, Register len,
Register res,
FloatRegister vtmp0, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3);
FloatRegister vtmp2, FloatRegister vtmp3,
FloatRegister vtmp4, FloatRegister vtmp5);
void encode_iso_array(Register src, Register dst,
Register len, Register res, bool ascii,
FloatRegister vtmp0, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3);
FloatRegister vtmp2, FloatRegister vtmp3,
FloatRegister vtmp4, FloatRegister vtmp5);
void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,

View File

@ -5151,6 +5151,7 @@ class StubGenerator: public StubCodeGenerator {
// result = r0 - return value. Already contains "false"
// cnt1 = r10 - amount of elements left to check, reduced by wordSize
// r3-r5 are reserved temporary registers
// Clobbers: v0-v7 when UseSIMDForArrayEquals, rscratch1, rscratch2
address generate_large_array_equals() {
Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
@ -5734,6 +5735,8 @@ class StubGenerator: public StubCodeGenerator {
// R2 = cnt1
// R3 = str1
// R4 = cnt2
// Clobbers: rscratch1, rscratch2, v0, v1, rflags
//
// This generic linear code use few additional ideas, which makes it faster:
// 1) we can safely keep at least 1st register of pattern(since length >= 8)
// in order to skip initial loading(help in systems with 1 ld pipeline)
@ -6048,6 +6051,7 @@ class StubGenerator: public StubCodeGenerator {
// R3 = len >> 3
// V0 = 0
// v1 = loaded 8 bytes
// Clobbers: r0, r1, r3, rscratch1, rflags, v0-v6
address generate_large_byte_array_inflate() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "large_byte_array_inflate");

View File

@ -0,0 +1,296 @@
/*
* Copyright (c) 2023, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8307572
* @summary Verify vector register clobbering in some aarch64 intrinsics
* @library /compiler/patches /test/lib
* @build java.base/java.lang.Helper
* @run main/othervm -Xbatch -XX:CompileThreshold=100 -XX:-TieredCompilation compiler.c2.aarch64.TestIntrinsicsRegStress
*/
package compiler.c2.aarch64;
import java.util.Arrays;
public class TestIntrinsicsRegStress {
final int LENGTH = 1024;
final int ITER = 10000;
final int NUM = 32;
byte[] ba;
char[] ca;
char[] cb;
float[] fv;
String str;
String[] strings;
String needle = "01234567890123456789";
public void init() {
ca = new char[LENGTH];
fv = new float[NUM];
strings = new String[NUM];
for (int i = 0; i < LENGTH; i++) {
ca[i] = (char) ('a' + i % NUM);
}
cb = ca.clone();
str = new String(ca);
for (int i = 0; i < NUM; i++) {
fv[i] = 1;
}
for (int i = 0; i < NUM; i++) {
strings[i] = str.substring(i) + needle;
}
}
public void checkIndexOf(int iter) {
float t0 = 0;
float t1 = fv[1] * fv[0];
float t2 = fv[2] * fv[0];
float t3 = fv[3] * fv[0];
float t4 = fv[4] * fv[0];
float t5 = fv[5] * fv[0];
float t6 = fv[6] * fv[0];
float t7 = fv[7] * fv[0];
float t8 = fv[8] * fv[0];
float t9 = fv[9] * fv[0];
float t10 = fv[10] * fv[0];
float t11 = fv[11] * fv[0];
float t12 = fv[12] * fv[0];
float t13 = fv[13] * fv[0];
float t14 = fv[14] * fv[0];
float t15 = fv[15] * fv[0];
float t16 = fv[16] * fv[0];
float t17 = fv[17] * fv[0];
float t18 = fv[18] * fv[0];
float t19 = fv[19] * fv[0];
float t20 = fv[20] * fv[0];
float t21 = fv[21] * fv[0];
float t22 = fv[22] * fv[0];
float t23 = fv[23] * fv[0];
float t24 = fv[24] * fv[0];
float t25 = fv[25] * fv[0];
float t26 = fv[26] * fv[0];
float t27 = fv[27] * fv[0];
float t28 = fv[28] * fv[0];
float t29 = fv[29] * fv[0];
float t30 = fv[30] * fv[0];
int result = strings[iter % NUM].indexOf(needle);
if (result > LENGTH - NUM / 2) {
// Use fp registers as many as possible and try to make them
// live across above intrinsic function.
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+ t29 - t30; // 0
}
fv[31] += t0 + t2 - t11 + t16 - t29;
}
public void testIndexOf() {
for (int i = 0; i < ITER; i++) {
checkIndexOf(i);
}
}
public void checkArraysEquals() {
float t0 = 0;
float t1 = fv[1] * fv[0];
float t2 = fv[2] * fv[0];
float t3 = fv[3] * fv[0];
float t4 = fv[4] * fv[0];
float t5 = fv[5] * fv[0];
float t6 = fv[6] * fv[0];
float t7 = fv[7] * fv[0];
float t8 = fv[8] * fv[0];
float t9 = fv[9] * fv[0];
float t10 = fv[10] * fv[0];
float t11 = fv[11] * fv[0];
float t12 = fv[12] * fv[0];
float t13 = fv[13] * fv[0];
float t14 = fv[14] * fv[0];
float t15 = fv[15] * fv[0];
float t16 = fv[16] * fv[0];
float t17 = fv[17] * fv[0];
float t18 = fv[18] * fv[0];
float t19 = fv[19] * fv[0];
float t20 = fv[20] * fv[0];
float t21 = fv[21] * fv[0];
float t22 = fv[22] * fv[0];
float t23 = fv[23] * fv[0];
float t24 = fv[24] * fv[0];
float t25 = fv[25] * fv[0];
float t26 = fv[26] * fv[0];
float t27 = fv[27] * fv[0];
float t28 = fv[28] * fv[0];
float t29 = fv[29] * fv[0];
float t30 = fv[30] * fv[0];
if (Arrays.equals(ca, cb)) {
// Use fp registers as many as possible and try to make them
// live across above intrinsic function.
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+ t29 - t30; // 0
}
fv[31] += t0 + t2 - t11 + t16 - t29;
}
public void testArraysEquals() {
for (int i = 0; i < ITER; i++) {
checkArraysEquals();
}
}
public void checkCompress(int iter) {
float t0 = 0;
float t1 = fv[1] * fv[0];
float t2 = fv[2] * fv[0];
float t3 = fv[3] * fv[0];
float t4 = fv[4] * fv[0];
float t5 = fv[5] * fv[0];
float t6 = fv[6] * fv[0];
float t7 = fv[7] * fv[0];
float t8 = fv[8] * fv[0];
float t9 = fv[9] * fv[0];
float t10 = fv[10] * fv[0];
float t11 = fv[11] * fv[0];
float t12 = fv[12] * fv[0];
float t13 = fv[13] * fv[0];
float t14 = fv[14] * fv[0];
float t15 = fv[15] * fv[0];
float t16 = fv[16] * fv[0];
float t17 = fv[17] * fv[0];
float t18 = fv[18] * fv[0];
float t19 = fv[19] * fv[0];
float t20 = fv[20] * fv[0];
float t21 = fv[21] * fv[0];
float t22 = fv[22] * fv[0];
float t23 = fv[23] * fv[0];
float t24 = fv[24] * fv[0];
float t25 = fv[25] * fv[0];
float t26 = fv[26] * fv[0];
float t27 = fv[27] * fv[0];
float t28 = fv[28] * fv[0];
float t29 = fv[29] * fv[0];
float t30 = fv[30] * fv[0];
ba = Helper.compressChar(ca, 0, LENGTH, 0, LENGTH);
if (ba[iter % LENGTH] > (byte) ('a' + 5)) {
// Use fp registers as many as possible and try to make them
// live across above intrinsic function.
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+ t29 - t30; // 0
}
fv[31] += t0 + t2 - t11 + t16 - t29;
}
public void testCompress() {
for (int i = 0; i < ITER; i++) {
checkCompress(i);
}
}
public void checkInflate(int iter) {
float t0 = 0;
float t1 = fv[1] * fv[0];
float t2 = fv[2] * fv[0];
float t3 = fv[3] * fv[0];
float t4 = fv[4] * fv[0];
float t5 = fv[5] * fv[0];
float t6 = fv[6] * fv[0];
float t7 = fv[7] * fv[0];
float t8 = fv[8] * fv[0];
float t9 = fv[9] * fv[0];
float t10 = fv[10] * fv[0];
float t11 = fv[11] * fv[0];
float t12 = fv[12] * fv[0];
float t13 = fv[13] * fv[0];
float t14 = fv[14] * fv[0];
float t15 = fv[15] * fv[0];
float t16 = fv[16] * fv[0];
float t17 = fv[17] * fv[0];
float t18 = fv[18] * fv[0];
float t19 = fv[19] * fv[0];
float t20 = fv[20] * fv[0];
float t21 = fv[21] * fv[0];
float t22 = fv[22] * fv[0];
float t23 = fv[23] * fv[0];
float t24 = fv[24] * fv[0];
float t25 = fv[25] * fv[0];
float t26 = fv[26] * fv[0];
float t27 = fv[27] * fv[0];
float t28 = fv[28] * fv[0];
float t29 = fv[29] * fv[0];
float t30 = fv[30] * fv[0];
str.getChars(0, LENGTH, ca, 0);
if (ca[iter % LENGTH] > (byte) ('a' + NUM / 2)) {
// Use fp registers as many as possible and try to make them
// live across above intrinsic function.
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+ t29 - t30; // 0
}
fv[31] += t0 + t2 - t11 + t16 - t29;
}
public void testInflate() {
for (int i = 0; i < ITER; i++) {
checkInflate(i);
}
}
public void verifyAndReset() {
if (fv[31] != 1.0) {
throw new RuntimeException("Failed with " + Float.toString(fv[31]));
} else {
System.out.println("Success!");
}
fv[31] = 1.0f;
}
public static void main(String[] args) {
TestIntrinsicsRegStress t = new TestIntrinsicsRegStress();
t.init();
t.testIndexOf();
t.verifyAndReset();
t.testArraysEquals();
t.verifyAndReset();
t.testCompress();
t.verifyAndReset();
t.testInflate();
t.verifyAndReset();
}
}