8307572: AArch64: Vector registers are clobbered by some macroassemblers

Reviewed-by: aph, adinn
2026-05-19 18:07:49 +00:00 · 2023-05-12 02:05:18 +00:00 · 2023-05-12 02:05:18 +00:00 · 33d9a85730
commit 33d9a85730
parent 9a7b4431ec
6 changed files with 391 additions and 57 deletions
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -17105,14 +17105,17 @@ instruct string_compareUU_sve(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI
 %}

 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
-       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+                          iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                          iRegINoSp tmp3, iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
 %{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
+         TEMP vtmp0, TEMP vtmp1, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU) "
+            "# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}

  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
@ -17126,14 +17129,17 @@ instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
 %}

 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
-       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+                          iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
 %{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
+         TEMP vtmp0, TEMP vtmp1, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL) "
+            "# KILL $str1 $cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}

  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
@ -17147,14 +17153,17 @@ instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
 %}

 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
-       iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+                          iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, rFlagsReg cr)
 %{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
+         TEMP tmp6, TEMP vtmp0, TEMP vtmp1, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL) "
+            "# KILL $str1 cnt1 $str2 $cnt2 $tmp1 $tmp2 $tmp3 $tmp4 $tmp5 $tmp6 V0-V1 cr" %}

  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
@ -17168,14 +17177,15 @@ instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2
 %}

 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-                 immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+                              immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
+                              iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU) "
+            "# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}

  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
@ -17189,14 +17199,15 @@ instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
 %}

 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-                 immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+                              immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
+                              iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL) "
+            "# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}

  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
@ -17210,14 +17221,15 @@ instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
 %}

 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-                 immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+                              immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1,
+                              iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
 %{
  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL) "
+            "# KILL $str1 $cnt1 $str2 $tmp1 $tmp2 $tmp3 $tmp4 cr" %}

  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
@ -17334,13 +17346,17 @@ instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,

 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
                       iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
+                       vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                       vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
                       iRegP_R10 tmp, rFlagsReg cr)
 %{
  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
+         TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
+         TEMP vtmp6, TEMP vtmp7, KILL cr);

-  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
+  format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
  ins_encode %{
    address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
                                   $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
@ -17355,13 +17371,17 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,

 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
                       iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
+                       vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                       vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, vRegD_V7 vtmp7,
                       iRegP_R10 tmp, rFlagsReg cr)
 %{
  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (AryEq ary1 ary2));
-  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3,
+         TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
+         TEMP vtmp6, TEMP vtmp7, KILL cr);

-  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
+  format %{ "Array Equals $ary1,ary2 -> $result # KILL $ary1 $ary2 $tmp $tmp1 $tmp2 $tmp3 V0-V7 cr" %}
  ins_encode %{
    address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
                                   $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
@ -17391,36 +17411,39 @@ instruct count_positives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg

 // fast char[] to byte[] compression
 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
-                         vRegD_V0 tmp1, vRegD_V1 tmp2,
-                         vRegD_V2 tmp3, vRegD_V3 tmp4,
+                         vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
+                         vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
                         iRegI_R0 result, rFlagsReg cr)
 %{
  match(Set result (StrCompressedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4,
+  effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP vtmp5,
         USE_KILL src, USE_KILL dst, USE len, KILL cr);

-  format %{ "String Compress $src,$dst,$len -> $result  // KILL $src,$dst" %}
+  format %{ "String Compress $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
  ins_encode %{
    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
-                           $result$$Register,
-                           $tmp1$$FloatRegister, $tmp2$$FloatRegister,
-                           $tmp3$$FloatRegister, $tmp4$$FloatRegister);
+                           $result$$Register, $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
+                           $vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
+                           $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
 %}

 // fast byte[] to char[] inflation
-instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
-                        vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
+instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len, iRegP_R3 tmp,
+                        vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                        vRegD_V4 vtmp4, vRegD_V5 vtmp5, vRegD_V6 vtmp6, rFlagsReg cr)
 %{
  match(Set dummy (StrInflatedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
+  effect(TEMP vtmp0, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3,
+         TEMP vtmp4, TEMP vtmp5, TEMP vtmp6, TEMP tmp,
+         USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);

-  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
+  format %{ "String Inflate $src,$dst # KILL $tmp $src $dst $len V0-V6 cr" %}
  ins_encode %{
    address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
-                                        $tmp1$$FloatRegister, $tmp2$$FloatRegister,
-                                        $tmp3$$FloatRegister, $tmp4$$Register);
+                                        $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
+                                        $vtmp2$$FloatRegister, $tmp$$Register);
    if (tpc == NULL) {
      ciEnv::current()->record_failure("CodeCache is full");
      return;
@ -17431,41 +17454,43 @@ instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len

 // encode char[] to byte[] in ISO_8859_1
 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
-                          vRegD_V0 vtmp0, vRegD_V1 vtmp1,
-                          vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                          vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
+                          vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
                          iRegI_R0 result, rFlagsReg cr)
 %{
  predicate(!((EncodeISOArrayNode*)n)->is_ascii());
  match(Set result (EncodeISOArray src (Binary dst len)));
-  effect(USE_KILL src, USE_KILL dst, USE len,
-         KILL vtmp0, KILL vtmp1, KILL vtmp2, KILL vtmp3, KILL cr);
+  effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
+         KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);

-  format %{ "Encode ISO array $src,$dst,$len -> $result" %}
+  format %{ "Encode ISO array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
  ins_encode %{
    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
                        $result$$Register, false,
                        $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
-                        $vtmp2$$FloatRegister, $vtmp3$$FloatRegister);
+                        $vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
+                        $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
  %}
  ins_pipe(pipe_class_memory);
 %}

 instruct encode_ascii_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
-                            vRegD_V0 vtmp0, vRegD_V1 vtmp1,
-                            vRegD_V2 vtmp2, vRegD_V3 vtmp3,
+                            vRegD_V0 vtmp0, vRegD_V1 vtmp1, vRegD_V2 vtmp2,
+                            vRegD_V3 vtmp3, vRegD_V4 vtmp4, vRegD_V5 vtmp5,
                            iRegI_R0 result, rFlagsReg cr)
 %{
  predicate(((EncodeISOArrayNode*)n)->is_ascii());
  match(Set result (EncodeISOArray src (Binary dst len)));
-  effect(USE_KILL src, USE_KILL dst, USE len,
-         KILL vtmp0, KILL vtmp1, KILL vtmp2, KILL vtmp3, KILL cr);
+  effect(USE_KILL src, USE_KILL dst, USE len, KILL vtmp0, KILL vtmp1,
+         KILL vtmp2, KILL vtmp3, KILL vtmp4, KILL vtmp5, KILL cr);

-  format %{ "Encode ASCII array $src,$dst,$len -> $result" %}
+  format %{ "Encode ASCII array $src,$dst,$len -> $result # KILL $src $dst V0-V5 cr" %}
  ins_encode %{
    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
                        $result$$Register, true,
                        $vtmp0$$FloatRegister, $vtmp1$$FloatRegister,
-                        $vtmp2$$FloatRegister, $vtmp3$$FloatRegister);
+                        $vtmp2$$FloatRegister, $vtmp3$$FloatRegister,
+                        $vtmp4$$FloatRegister, $vtmp5$$FloatRegister);
  %}
  ins_pipe(pipe_class_memory);
 %}
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@ -46,6 +46,7 @@
 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);

 // Search for str1 in str2 and return index or -1
+// Clobbers: rscratch1, rscratch2, rflags. May also clobber v0-v1, when icnt1==-1.
 void C2_MacroAssembler::string_indexof(Register str2, Register str1,
                                       Register cnt2, Register cnt1,
                                       Register tmp1, Register tmp2,
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@ -5008,6 +5008,8 @@ address MacroAssembler::count_positives(Register ary1, Register len, Register re
  return pc();
 }

+// Clobbers: rscratch1, rscratch2, rflags
+// May also clobber v0-v7 when (!UseSimpleArrayEquals && UseSIMDForArrayEquals)
 address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
                                      Register tmp4, Register tmp5, Register result,
                                      Register cnt1, int elem_size) {
@ -5557,10 +5559,12 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
 //       Using 'umaxv' in the ASCII-case comes with a small penalty but does
 //       avoid additional bloat.
 //
+// Clobbers: src, dst, res, rscratch1, rscratch2, rflags
 void MacroAssembler::encode_iso_array(Register src, Register dst,
                                      Register len, Register res, bool ascii,
                                      FloatRegister vtmp0, FloatRegister vtmp1,
-                                      FloatRegister vtmp2, FloatRegister vtmp3)
+                                      FloatRegister vtmp2, FloatRegister vtmp3,
+                                      FloatRegister vtmp4, FloatRegister vtmp5)
 {
  Register cnt = res;
  Register max = rscratch1;
@ -5579,8 +5583,8 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
    br(LT, DONE_32);
    ld1(vtmp0, vtmp1, vtmp2, vtmp3, T8H, Address(post(src, 64)));
    // Extract lower bytes.
-    FloatRegister vlo0 = v4;
-    FloatRegister vlo1 = v5;
+    FloatRegister vlo0 = vtmp4;
+    FloatRegister vlo1 = vtmp5;
    uzp1(vlo0, T16B, vtmp0, vtmp1);
    uzp1(vlo1, T16B, vtmp2, vtmp3);
    // Merge bits...
@ -5653,6 +5657,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
 }

 // Inflate byte[] array to char[].
+// Clobbers: src, dst, len, rflags, rscratch1, v0-v6
 address MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
                                           FloatRegister vtmp1, FloatRegister vtmp2,
                                           FloatRegister vtmp3, Register tmp4) {
@ -5761,8 +5766,9 @@ address MacroAssembler::byte_array_inflate(Register src, Register dst, Register
 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
                                         Register res,
                                         FloatRegister tmp0, FloatRegister tmp1,
-                                         FloatRegister tmp2, FloatRegister tmp3) {
-  encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3);
+                                         FloatRegister tmp2, FloatRegister tmp3,
+                                         FloatRegister tmp4, FloatRegister tmp5) {
+  encode_iso_array(src, dst, len, res, false, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
  // Adjust result: res == len ? len : 0
  cmp(len, res);
  csel(res, res, zr, EQ);
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@ -1393,12 +1393,14 @@ public:
  void char_array_compress(Register src, Register dst, Register len,
                           Register res,
                           FloatRegister vtmp0, FloatRegister vtmp1,
-                           FloatRegister vtmp2, FloatRegister vtmp3);
+                           FloatRegister vtmp2, FloatRegister vtmp3,
+                           FloatRegister vtmp4, FloatRegister vtmp5);

  void encode_iso_array(Register src, Register dst,
                        Register len, Register res, bool ascii,
                        FloatRegister vtmp0, FloatRegister vtmp1,
-                        FloatRegister vtmp2, FloatRegister vtmp3);
+                        FloatRegister vtmp2, FloatRegister vtmp3,
+                        FloatRegister vtmp4, FloatRegister vtmp5);

  void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
                FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@ -5151,6 +5151,7 @@ class StubGenerator: public StubCodeGenerator {
  // result = r0 - return value. Already contains "false"
  // cnt1 = r10 - amount of elements left to check, reduced by wordSize
  // r3-r5 are reserved temporary registers
+  // Clobbers: v0-v7 when UseSIMDForArrayEquals, rscratch1, rscratch2
  address generate_large_array_equals() {
    Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
        tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
@ -5734,6 +5735,8 @@ class StubGenerator: public StubCodeGenerator {
  // R2 = cnt1
  // R3 = str1
  // R4 = cnt2
+  // Clobbers: rscratch1, rscratch2, v0, v1, rflags
+  //
  // This generic linear code use few additional ideas, which makes it faster:
  // 1) we can safely keep at least 1st register of pattern(since length >= 8)
  // in order to skip initial loading(help in systems with 1 ld pipeline)
@ -6048,6 +6051,7 @@ class StubGenerator: public StubCodeGenerator {
  // R3 = len >> 3
  // V0 = 0
  // v1 = loaded 8 bytes
+  // Clobbers: r0, r1, r3, rscratch1, rflags, v0-v6
  address generate_large_byte_array_inflate() {
    __ align(CodeEntryAlignment);
    StubCodeMark mark(this, "StubRoutines", "large_byte_array_inflate");
--- a/test/hotspot/jtreg/compiler/c2/aarch64/TestIntrinsicsRegStress.java
+++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestIntrinsicsRegStress.java
@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8307572
+ * @summary Verify vector register clobbering in some aarch64 intrinsics
+ * @library /compiler/patches /test/lib
+ * @build java.base/java.lang.Helper
+ * @run main/othervm -Xbatch -XX:CompileThreshold=100 -XX:-TieredCompilation compiler.c2.aarch64.TestIntrinsicsRegStress
+ */
+
+package compiler.c2.aarch64;
+
+import java.util.Arrays;
+
+public class TestIntrinsicsRegStress {
+
+    final int LENGTH = 1024;
+    final int ITER = 10000;
+    final int NUM = 32;
+
+    byte[] ba;
+    char[] ca;
+    char[] cb;
+    float[] fv;
+
+    String str;
+    String[] strings;
+    String needle = "01234567890123456789";
+
+    public void init() {
+        ca = new char[LENGTH];
+        fv = new float[NUM];
+        strings = new String[NUM];
+        for (int i = 0; i < LENGTH; i++) {
+            ca[i] = (char) ('a' + i % NUM);
+        }
+        cb = ca.clone();
+        str = new String(ca);
+        for (int i = 0; i < NUM; i++) {
+            fv[i] = 1;
+        }
+        for (int i = 0; i < NUM; i++) {
+            strings[i] = str.substring(i) + needle;
+        }
+    }
+
+    public void checkIndexOf(int iter) {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        int result = strings[iter % NUM].indexOf(needle);
+
+        if (result > LENGTH - NUM / 2) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testIndexOf() {
+        for (int i = 0; i < ITER; i++) {
+            checkIndexOf(i);
+        }
+    }
+
+    public void checkArraysEquals() {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        if (Arrays.equals(ca, cb)) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testArraysEquals() {
+        for (int i = 0; i < ITER; i++) {
+            checkArraysEquals();
+        }
+    }
+
+    public void checkCompress(int iter) {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        ba = Helper.compressChar(ca, 0, LENGTH, 0, LENGTH);
+
+        if (ba[iter % LENGTH] > (byte) ('a' + 5)) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testCompress() {
+        for (int i = 0; i < ITER; i++) {
+            checkCompress(i);
+        }
+    }
+
+    public void checkInflate(int iter) {
+        float t0 = 0;
+        float t1 = fv[1] * fv[0];
+        float t2 = fv[2] * fv[0];
+        float t3 = fv[3] * fv[0];
+        float t4 = fv[4] * fv[0];
+        float t5 = fv[5] * fv[0];
+        float t6 = fv[6] * fv[0];
+        float t7 = fv[7] * fv[0];
+        float t8 = fv[8] * fv[0];
+        float t9 = fv[9] * fv[0];
+        float t10 = fv[10] * fv[0];
+        float t11 = fv[11] * fv[0];
+        float t12 = fv[12] * fv[0];
+        float t13 = fv[13] * fv[0];
+        float t14 = fv[14] * fv[0];
+        float t15 = fv[15] * fv[0];
+        float t16 = fv[16] * fv[0];
+        float t17 = fv[17] * fv[0];
+        float t18 = fv[18] * fv[0];
+        float t19 = fv[19] * fv[0];
+        float t20 = fv[20] * fv[0];
+        float t21 = fv[21] * fv[0];
+        float t22 = fv[22] * fv[0];
+        float t23 = fv[23] * fv[0];
+        float t24 = fv[24] * fv[0];
+        float t25 = fv[25] * fv[0];
+        float t26 = fv[26] * fv[0];
+        float t27 = fv[27] * fv[0];
+        float t28 = fv[28] * fv[0];
+        float t29 = fv[29] * fv[0];
+        float t30 = fv[30] * fv[0];
+
+        str.getChars(0, LENGTH, ca, 0);
+
+        if (ca[iter % LENGTH] > (byte) ('a' + NUM / 2)) {
+            // Use fp registers as many as possible and try to make them
+            // live across above intrinsic function.
+            t0 += t1 - t2 + t3 - t4 + t5 - t6 + t7 - t8 + t9 - t10 + t11 - t12 + t13 - t14 + t15
+                    - t16 + t17 - t18 + t19 - t20 + t21 - t22 + t23 - t24 + t25 - t26 + t27 - t28
+                    + t29 - t30; // 0
+        }
+        fv[31] += t0 + t2 - t11 + t16 - t29;
+    }
+
+    public void testInflate() {
+        for (int i = 0; i < ITER; i++) {
+            checkInflate(i);
+        }
+    }
+
+    public void verifyAndReset() {
+        if (fv[31] != 1.0) {
+            throw new RuntimeException("Failed with " + Float.toString(fv[31]));
+        } else {
+            System.out.println("Success!");
+        }
+        fv[31] = 1.0f;
+    }
+
+    public static void main(String[] args) {
+        TestIntrinsicsRegStress t = new TestIntrinsicsRegStress();
+        t.init();
+
+        t.testIndexOf();
+        t.verifyAndReset();
+
+        t.testArraysEquals();
+        t.verifyAndReset();
+
+        t.testCompress();
+        t.verifyAndReset();
+
+        t.testInflate();
+        t.verifyAndReset();
+    }
+}