mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-19 18:07:49 +00:00
8307572: AArch64: Vector registers are clobbered by some macroassemblers
Reviewed-by: aph, adinn
This commit is contained in:
parent
9a7b4431ec
commit
33d9a85730
@ -17105,14 +17105,17 @@ instruct string_compareUU_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI
|
||||
%}
|
||||
|
||||
instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
|
||||
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
||||
iRegINoSp tmp3, iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
|
||||
TEMP vtmp0, TEMP vtmp1, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU) "
|
||||
"# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
|
||||
|
||||
ins_encode %{
|
||||
__ string_indexof($str1$$Register, $str2$$Register,
|
||||
@ -17126,14 +17129,17 @@ instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
|
||||
%}
|
||||
|
||||
instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
|
||||
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
|
||||
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
|
||||
TEMP vtmp0, TEMP vtmp1, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL) "
|
||||
"# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
|
||||
|
||||
ins_encode %{
|
||||
__ string_indexof($str1$$Register, $str2$$Register,
|
||||
@ -17147,14 +17153,17 @@ instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
|
||||
%}
|
||||
|
||||
instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
|
||||
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,iRegINoSp tmp3,
|
||||
iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
|
||||
TEMP tmp6, TEMP vtmp0, TEMP vtmp1, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL) "
|
||||
"# KILL $str1 cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}
|
||||
|
||||
ins_encode %{
|
||||
__ string_indexof($str1$$Register, $str2$$Register,
|
||||
@ -17168,14 +17177,15 @@ instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
|
||||
%}
|
||||
|
||||
instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
||||
iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
|
||||
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
|
||||
iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU) "
|
||||
"# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
|
||||
|
||||
ins_encode %{
|
||||
int icnt2 = (int)$int_cnt2$$constant;
|
||||
@ -17189,14 +17199,15 @@ instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
%}
|
||||
|
||||
instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
||||
iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
|
||||
immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
|
||||
iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL) "
|
||||
"# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
|
||||
|
||||
ins_encode %{
|
||||
int icnt2 = (int)$int_cnt2$$constant;
|
||||
@ -17210,14 +17221,15 @@ instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
%}
|
||||
|
||||
instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
||||
immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
||||
iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
|
||||
immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
|
||||
iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
|
||||
effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
|
||||
TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
|
||||
format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL) "
|
||||
"# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}
|
||||
|
||||
ins_encode %{
|
||||
int icnt2 = (int)$int_cnt2$$constant;
|
||||
@ -17334,13 +17346,17 @@ instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
|
||||
|
||||
instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||
iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
|
||||
vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
|
||||
iRegP_R10 tmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
|
||||
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
|
||||
TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
|
||||
TEMP vtmp6, TEMP vtmp7, KILL cr);
|
||||
|
||||
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
|
||||
format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
|
||||
ins_encode %{
|
||||
address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||
@ -17355,13 +17371,17 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||
|
||||
instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
|
||||
iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
|
||||
vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
|
||||
iRegP_R10 tmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
|
||||
effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
|
||||
TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
|
||||
TEMP vtmp6, TEMP vtmp7, KILL cr);
|
||||
|
||||
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
|
||||
format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
|
||||
ins_encode %{
|
||||
address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
|
||||
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
||||
@ -17391,36 +17411,39 @@ instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg
|
||||
|
||||
// fast char[] to byte[] compression
|
||||
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 tmp1, vRegD_V1 tmp2,
|
||||
vRegD_V2 tmp3, vRegD_V3 tmp4,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
|
||||
vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4,
|
||||
effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
|
||||
USE_KILL src, USE_KILL dst, USE len, KILL cr);
|
||||
|
||||
format %{ "String Compress $src,$dst,$len -> $result // KILL $src,$dst" %}
|
||||
format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
|
||||
ins_encode %{
|
||||
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||
$result$$Register,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
|
||||
$tmp3$$FloatRegister, $tmp4$$FloatRegister);
|
||||
$result$$Register, $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
|
||||
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
|
||||
$vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// fast byte[] to char[] inflation
|
||||
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
|
||||
vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
|
||||
instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
|
||||
vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
|
||||
TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
|
||||
USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||
format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
|
||||
ins_encode %{
|
||||
address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$FloatRegister, $tmp2$$FloatRegister,
|
||||
$tmp3$$FloatRegister, $tmp4$$Register);
|
||||
$vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
|
||||
$vtmp2$$FloatRegister, $tmp$$Register);
|
||||
if (tpc == NULL) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
@ -17431,41 +17454,43 @@ instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1,
|
||||
vRegD_V2 vtmp2, vRegD_V3 vtmp3,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
|
||||
vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!((EncodeISOArrayNode*)n)->is_ascii());
|
||||
match(Set result (EncodeISOArray src (Binary dst len)));
|
||||
effect(USE_KILL src, USE_KILL dst, USE len,
|
||||
KILL vtmp0, KILL vtmp1, KILL vtmp2, KILL vtmp3, KILL cr);
|
||||
effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
|
||||
KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
|
||||
|
||||
format %{ "Encode ISO array $src,$dst,$len -> $result" %}
|
||||
format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
|
||||
ins_encode %{
|
||||
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
|
||||
$result$$Register, false,
|
||||
$vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
|
||||
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister);
|
||||
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
|
||||
$vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct encode_ascii_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1,
|
||||
vRegD_V2 vtmp2, vRegD_V3 vtmp3,
|
||||
vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
|
||||
vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
|
||||
iRegI_R0 result, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((EncodeISOArrayNode*)n)->is_ascii());
|
||||
match(Set result (EncodeISOArray src (Binary dst len)));
|
||||
effect(USE_KILL src, USE_KILL dst, USE len,
|
||||
KILL vtmp0, KILL vtmp1, KILL vtmp2, KILL vtmp3, KILL cr);
|
||||
effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
|
||||
KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);
|
||||
|
||||
format %{ "Encode ASCII array $src,$dst,$len -> $result" %}
|
||||
format %{ "Encode ASCII array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
|
||||
ins_encode %{
|
||||
__ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
|
||||
$result$$Register, true,
|
||||
$vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
|
||||
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister);
|
||||
$vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
|
||||
$vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
@ -46,6 +46,7 @@
|
||||
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
|
||||
|
||||
// Search for str1 in str2 and return index or -1
|
||||
// Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1.
|
||||
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
|
||||
Register cnt2, Register cnt1,
|
||||
Register tmp1, Register tmp2,
|
||||
|
||||
@ -5008,6 +5008,8 @@ address MacroAssembler::count_positives(Register ary1, Register len, Register re
|
||||
return pc();
|
||||
}
|
||||
|
||||
// Clobbers: rscratch1, rscratch2, rflags
|
||||
// May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
|
||||
address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
|
||||
Register tmp4, Register tmp5, Register result,
|
||||
Register cnt1, int elem_size) {
|
||||
@ -5557,10 +5559,12 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
|
||||
// Using 'umaxv' in the ASCII-case comes with a small penalty but does
|
||||
// avoid additional bloat.
|
||||
//
|
||||
// Clobbers: src, dst, res, rscratch1, rscratch2, rflags
|
||||
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
Register len, Register res, bool ascii,
|
||||
FloatRegister vtmp0, FloatRegister vtmp1,
|
||||
FloatRegister vtmp2, FloatRegister vtmp3)
|
||||
FloatRegister vtmp2, FloatRegister vtmp3,
|
||||
FloatRegister vtmp4, FloatRegister vtmp5)
|
||||
{
|
||||
Register cnt = res;
|
||||
Register max = rscratch1;
|
||||
@ -5579,8 +5583,8 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
br(LT, DONE_32);
|
||||
ld1(vtmp0, vtmp1, vtmp2, vtmp3, T8H, Address(post(src, 64)));
|
||||
// Extract lower bytes.
|
||||
FloatRegister vlo0 = v4;
|
||||
FloatRegister vlo1 = v5;
|
||||
FloatRegister vlo0 = vtmp4;
|
||||
FloatRegister vlo1 = vtmp5;
|
||||
uzp1(vlo0, T16B, vtmp0, vtmp1);
|
||||
uzp1(vlo1, T16B, vtmp2, vtmp3);
|
||||
// Merge bits...
|
||||
@ -5653,6 +5657,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
}
|
||||
|
||||
// Inflate byte[] array to char[].
|
||||
// Clobbers: src, dst, len, rflags, rscratch1, v0-v6
|
||||
address MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, Register tmp4) {
|
||||
@ -5761,8 +5766,9 @@ address MacroAssembler::byte_array_inflate(Register src, Register dst, Register
|
||||
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
|
||||
Register res,
|
||||
FloatRegister tmp0, FloatRegister tmp1,
|
||||
FloatRegister tmp2, FloatRegister tmp3) {
|
||||
encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3);
|
||||
FloatRegister tmp2, FloatRegister tmp3,
|
||||
FloatRegister tmp4, FloatRegister tmp5) {
|
||||
encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
|
||||
// Adjust result: res == len ? len : 0
|
||||
cmp(len, res);
|
||||
csel(res, res, zr, EQ);
|
||||
|
||||
@ -1393,12 +1393,14 @@ public:
|
||||
void char_array_compress(Register src, Register dst, Register len,
|
||||
Register res,
|
||||
FloatRegister vtmp0, FloatRegister vtmp1,
|
||||
FloatRegister vtmp2, FloatRegister vtmp3);
|
||||
FloatRegister vtmp2, FloatRegister vtmp3,
|
||||
FloatRegister vtmp4, FloatRegister vtmp5);
|
||||
|
||||
void encode_iso_array(Register src, Register dst,
|
||||
Register len, Register res, bool ascii,
|
||||
FloatRegister vtmp0, FloatRegister vtmp1,
|
||||
FloatRegister vtmp2, FloatRegister vtmp3);
|
||||
FloatRegister vtmp2, FloatRegister vtmp3,
|
||||
FloatRegister vtmp4, FloatRegister vtmp5);
|
||||
|
||||
void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
|
||||
FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
|
||||
|
||||
@ -5151,6 +5151,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// result = r0 - return value. Already contains "false"
|
||||
// cnt1 = r10 - amount of elements left to check, reduced by wordSize
|
||||
// r3-r5 are reserved temporary registers
|
||||
// Clobbers: v0-v7 when UseSIMDForArrayEquals, rscratch1, rscratch2
|
||||
address generate_large_array_equals() {
|
||||
Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
|
||||
tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
|
||||
@ -5734,6 +5735,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// R2 = cnt1
|
||||
// R3 = str1
|
||||
// R4 = cnt2
|
||||
// Clobbers: rscratch1, rscratch2, v0, v1, rflags
|
||||
//
|
||||
// This generic linear code use few additional ideas, which makes it faster:
|
||||
// 1) we can safely keep at least 1st register of pattern(since length >= 8)
|
||||
// in order to skip initial loading(help in systems with 1 ld pipeline)
|
||||
@ -6048,6 +6051,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// R3 = len >> 3
|
||||
// V0 = 0
|
||||
// v1 = loaded 8 bytes
|
||||
// Clobbers: r0, r1, r3, rscratch1, rflags, v0-v6
|
||||
address generate_large_byte_array_inflate() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "large_byte_array_inflate");
|
||||
|
||||
@ -0,0 +1,296 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8307572
|
||||
* @summary Verify vector register clobbering in some aarch64 intrinsics
|
||||
* @library /compiler/patches /test/lib
|
||||
* @build java.base/java.lang.Helper
|
||||
* @run main/othervm -Xbatch -XX:CompileThreshold=100 -XX:-TieredCompilation compiler.c2.aarch64.TestIntrinsicsRegStress
|
||||
*/
|
||||
|
||||
package compiler.c2.aarch64;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class TestIntrinsicsRegStress {
|
||||
|
||||
final int LENGTH = 1024;
|
||||
final int ITER = 10000;
|
||||
final int NUM = 32;
|
||||
|
||||
byte[] ba;
|
||||
char[] ca;
|
||||
char[] cb;
|
||||
float[] fv;
|
||||
|
||||
String str;
|
||||
String[] strings;
|
||||
String needle = "01234567890123456789";
|
||||
|
||||
public void init() {
|
||||
ca = new char[LENGTH];
|
||||
fv = new float[NUM];
|
||||
strings = new String[NUM];
|
||||
for (int i = 0; i < LENGTH; i++) {
|
||||
ca[i] = (char) ('a' + i % NUM);
|
||||
}
|
||||
cb = ca.clone();
|
||||
str = new String(ca);
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
fv[i] = 1;
|
||||
}
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
strings[i] = str.substring(i) + needle;
|
||||
}
|
||||
}
|
||||
|
||||
public void checkIndexOf(int iter) {
|
||||
float t0 = 0;
|
||||
float t1 = fv[1] * fv[0];
|
||||
float t2 = fv[2] * fv[0];
|
||||
float t3 = fv[3] * fv[0];
|
||||
float t4 = fv[4] * fv[0];
|
||||
float t5 = fv[5] * fv[0];
|
||||
float t6 = fv[6] * fv[0];
|
||||
float t7 = fv[7] * fv[0];
|
||||
float t8 = fv[8] * fv[0];
|
||||
float t9 = fv[9] * fv[0];
|
||||
float t10 = fv[10] * fv[0];
|
||||
float t11 = fv[11] * fv[0];
|
||||
float t12 = fv[12] * fv[0];
|
||||
float t13 = fv[13] * fv[0];
|
||||
float t14 = fv[14] * fv[0];
|
||||
float t15 = fv[15] * fv[0];
|
||||
float t16 = fv[16] * fv[0];
|
||||
float t17 = fv[17] * fv[0];
|
||||
float t18 = fv[18] * fv[0];
|
||||
float t19 = fv[19] * fv[0];
|
||||
float t20 = fv[20] * fv[0];
|
||||
float t21 = fv[21] * fv[0];
|
||||
float t22 = fv[22] * fv[0];
|
||||
float t23 = fv[23] * fv[0];
|
||||
float t24 = fv[24] * fv[0];
|
||||
float t25 = fv[25] * fv[0];
|
||||
float t26 = fv[26] * fv[0];
|
||||
float t27 = fv[27] * fv[0];
|
||||
float t28 = fv[28] * fv[0];
|
||||
float t29 = fv[29] * fv[0];
|
||||
float t30 = fv[30] * fv[0];
|
||||
|
||||
int result = strings[iter % NUM].indexOf(needle);
|
||||
|
||||
if (result > LENGTH - NUM / 2) {
|
||||
// Use fp registers as many as possible and try to make them
|
||||
// live across above intrinsic function.
|
||||
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
|
||||
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
|
||||
+ t29 - t30; // 0
|
||||
}
|
||||
fv[31] += t0 + t2 - t11 + t16 - t29;
|
||||
}
|
||||
|
||||
public void testIndexOf() {
|
||||
for (int i = 0; i < ITER; i++) {
|
||||
checkIndexOf(i);
|
||||
}
|
||||
}
|
||||
|
||||
public void checkArraysEquals() {
|
||||
float t0 = 0;
|
||||
float t1 = fv[1] * fv[0];
|
||||
float t2 = fv[2] * fv[0];
|
||||
float t3 = fv[3] * fv[0];
|
||||
float t4 = fv[4] * fv[0];
|
||||
float t5 = fv[5] * fv[0];
|
||||
float t6 = fv[6] * fv[0];
|
||||
float t7 = fv[7] * fv[0];
|
||||
float t8 = fv[8] * fv[0];
|
||||
float t9 = fv[9] * fv[0];
|
||||
float t10 = fv[10] * fv[0];
|
||||
float t11 = fv[11] * fv[0];
|
||||
float t12 = fv[12] * fv[0];
|
||||
float t13 = fv[13] * fv[0];
|
||||
float t14 = fv[14] * fv[0];
|
||||
float t15 = fv[15] * fv[0];
|
||||
float t16 = fv[16] * fv[0];
|
||||
float t17 = fv[17] * fv[0];
|
||||
float t18 = fv[18] * fv[0];
|
||||
float t19 = fv[19] * fv[0];
|
||||
float t20 = fv[20] * fv[0];
|
||||
float t21 = fv[21] * fv[0];
|
||||
float t22 = fv[22] * fv[0];
|
||||
float t23 = fv[23] * fv[0];
|
||||
float t24 = fv[24] * fv[0];
|
||||
float t25 = fv[25] * fv[0];
|
||||
float t26 = fv[26] * fv[0];
|
||||
float t27 = fv[27] * fv[0];
|
||||
float t28 = fv[28] * fv[0];
|
||||
float t29 = fv[29] * fv[0];
|
||||
float t30 = fv[30] * fv[0];
|
||||
|
||||
if (Arrays.equals(ca, cb)) {
|
||||
// Use fp registers as many as possible and try to make them
|
||||
// live across above intrinsic function.
|
||||
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
|
||||
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
|
||||
+ t29 - t30; // 0
|
||||
}
|
||||
fv[31] += t0 + t2 - t11 + t16 - t29;
|
||||
}
|
||||
|
||||
public void testArraysEquals() {
|
||||
for (int i = 0; i < ITER; i++) {
|
||||
checkArraysEquals();
|
||||
}
|
||||
}
|
||||
|
||||
public void checkCompress(int iter) {
|
||||
float t0 = 0;
|
||||
float t1 = fv[1] * fv[0];
|
||||
float t2 = fv[2] * fv[0];
|
||||
float t3 = fv[3] * fv[0];
|
||||
float t4 = fv[4] * fv[0];
|
||||
float t5 = fv[5] * fv[0];
|
||||
float t6 = fv[6] * fv[0];
|
||||
float t7 = fv[7] * fv[0];
|
||||
float t8 = fv[8] * fv[0];
|
||||
float t9 = fv[9] * fv[0];
|
||||
float t10 = fv[10] * fv[0];
|
||||
float t11 = fv[11] * fv[0];
|
||||
float t12 = fv[12] * fv[0];
|
||||
float t13 = fv[13] * fv[0];
|
||||
float t14 = fv[14] * fv[0];
|
||||
float t15 = fv[15] * fv[0];
|
||||
float t16 = fv[16] * fv[0];
|
||||
float t17 = fv[17] * fv[0];
|
||||
float t18 = fv[18] * fv[0];
|
||||
float t19 = fv[19] * fv[0];
|
||||
float t20 = fv[20] * fv[0];
|
||||
float t21 = fv[21] * fv[0];
|
||||
float t22 = fv[22] * fv[0];
|
||||
float t23 = fv[23] * fv[0];
|
||||
float t24 = fv[24] * fv[0];
|
||||
float t25 = fv[25] * fv[0];
|
||||
float t26 = fv[26] * fv[0];
|
||||
float t27 = fv[27] * fv[0];
|
||||
float t28 = fv[28] * fv[0];
|
||||
float t29 = fv[29] * fv[0];
|
||||
float t30 = fv[30] * fv[0];
|
||||
|
||||
ba = Helper.compressChar(ca, 0, LENGTH, 0, LENGTH);
|
||||
|
||||
if (ba[iter % LENGTH] > (byte) ('a' + 5)) {
|
||||
// Use fp registers as many as possible and try to make them
|
||||
// live across above intrinsic function.
|
||||
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
|
||||
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
|
||||
+ t29 - t30; // 0
|
||||
}
|
||||
fv[31] += t0 + t2 - t11 + t16 - t29;
|
||||
}
|
||||
|
||||
public void testCompress() {
|
||||
for (int i = 0; i < ITER; i++) {
|
||||
checkCompress(i);
|
||||
}
|
||||
}
|
||||
|
||||
public void checkInflate(int iter) {
|
||||
float t0 = 0;
|
||||
float t1 = fv[1] * fv[0];
|
||||
float t2 = fv[2] * fv[0];
|
||||
float t3 = fv[3] * fv[0];
|
||||
float t4 = fv[4] * fv[0];
|
||||
float t5 = fv[5] * fv[0];
|
||||
float t6 = fv[6] * fv[0];
|
||||
float t7 = fv[7] * fv[0];
|
||||
float t8 = fv[8] * fv[0];
|
||||
float t9 = fv[9] * fv[0];
|
||||
float t10 = fv[10] * fv[0];
|
||||
float t11 = fv[11] * fv[0];
|
||||
float t12 = fv[12] * fv[0];
|
||||
float t13 = fv[13] * fv[0];
|
||||
float t14 = fv[14] * fv[0];
|
||||
float t15 = fv[15] * fv[0];
|
||||
float t16 = fv[16] * fv[0];
|
||||
float t17 = fv[17] * fv[0];
|
||||
float t18 = fv[18] * fv[0];
|
||||
float t19 = fv[19] * fv[0];
|
||||
float t20 = fv[20] * fv[0];
|
||||
float t21 = fv[21] * fv[0];
|
||||
float t22 = fv[22] * fv[0];
|
||||
float t23 = fv[23] * fv[0];
|
||||
float t24 = fv[24] * fv[0];
|
||||
float t25 = fv[25] * fv[0];
|
||||
float t26 = fv[26] * fv[0];
|
||||
float t27 = fv[27] * fv[0];
|
||||
float t28 = fv[28] * fv[0];
|
||||
float t29 = fv[29] * fv[0];
|
||||
float t30 = fv[30] * fv[0];
|
||||
|
||||
str.getChars(0, LENGTH, ca, 0);
|
||||
|
||||
if (ca[iter % LENGTH] > (byte) ('a' + NUM / 2)) {
|
||||
// Use fp registers as many as possible and try to make them
|
||||
// live across above intrinsic function.
|
||||
t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
|
||||
- t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
|
||||
+ t29 - t30; // 0
|
||||
}
|
||||
fv[31] += t0 + t2 - t11 + t16 - t29;
|
||||
}
|
||||
|
||||
public void testInflate() {
|
||||
for (int i = 0; i < ITER; i++) {
|
||||
checkInflate(i);
|
||||
}
|
||||
}
|
||||
|
||||
public void verifyAndReset() {
|
||||
if (fv[31] != 1.0) {
|
||||
throw new RuntimeException("Failed with " + Float.toString(fv[31]));
|
||||
} else {
|
||||
System.out.println("Success!");
|
||||
}
|
||||
fv[31] = 1.0f;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestIntrinsicsRegStress t = new TestIntrinsicsRegStress();
|
||||
t.init();
|
||||
|
||||
t.testIndexOf();
|
||||
t.verifyAndReset();
|
||||
|
||||
t.testArraysEquals();
|
||||
t.verifyAndReset();
|
||||
|
||||
t.testCompress();
|
||||
t.verifyAndReset();
|
||||
|
||||
t.testInflate();
|
||||
t.verifyAndReset();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user