diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index 2ff93c9e288..b9899995531 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -1671,24 +1671,42 @@ instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
 
 // vector not - predicated
 
-instruct vnotI_masked(vReg dst_src, immI_M1 m1, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnotI_masked(vReg dst, vReg src, immI_M1 m1, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
-  format %{ "vnotI_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (XorV (Binary src (Replicate m1)) pg));
+  format %{ "vnotI_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_not($dst$$FloatRegister, get_reg_variant(this),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vnotL_masked(vReg dst_src, immL_M1 m1, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnotL_masked(vReg dst, vReg src, immL_M1 m1, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
-  format %{ "vnotL_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (XorV (Binary src (Replicate m1)) pg));
+  format %{ "vnotL_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_not($dst$$FloatRegister, get_reg_variant(this),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -1985,62 +2003,116 @@ instruct vabsD(vReg dst, vReg src) %{
 
 // vector abs - predicated
 
-instruct vabsB_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsB_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (AbsVB dst_src pg));
-  format %{ "vabsB_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (AbsVB src pg));
+  format %{ "vabsB_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_abs($dst_src$$FloatRegister, __ B, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_abs($dst$$FloatRegister, __ B, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsS_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsS_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (AbsVS dst_src pg));
-  format %{ "vabsS_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (AbsVS src pg));
+  format %{ "vabsS_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_abs($dst_src$$FloatRegister, __ H, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_abs($dst$$FloatRegister, __ H, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsI_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsI_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (AbsVI dst_src pg));
-  format %{ "vabsI_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (AbsVI src pg));
+  format %{ "vabsI_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_abs($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_abs($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsL_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (AbsVL dst_src pg));
-  format %{ "vabsL_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (AbsVL src pg));
+  format %{ "vabsL_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_abs($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_abs($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsF_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsF_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (AbsVF dst_src pg));
-  format %{ "vabsF_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (AbsVF src pg));
+  format %{ "vabsF_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_fabs($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_fabs($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vabsD_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vabsD_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (AbsVD dst_src pg));
-  format %{ "vabsD_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (AbsVD src pg));
+  format %{ "vabsD_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_fabs($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_fabs($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -2158,44 +2230,80 @@ instruct vnegD(vReg dst, vReg src) %{
 
 // vector neg - predicated
 
-instruct vnegI_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegI_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (NegVI dst_src pg));
-  format %{ "vnegI_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (NegVI src pg));
+  format %{ "vnegI_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    __ sve_neg($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_neg($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegL_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (NegVL dst_src pg));
-  format %{ "vnegL_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (NegVL src pg));
+  format %{ "vnegL_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_neg($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_neg($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegF_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegF_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (NegVF dst_src pg));
-  format %{ "vnegF_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (NegVF src pg));
+  format %{ "vnegF_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_fneg($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_fneg($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vnegD_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnegD_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (NegVD dst_src pg));
-  format %{ "vnegD_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (NegVD src pg));
+  format %{ "vnegD_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_fneg($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_fneg($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -2251,22 +2359,40 @@ instruct vsqrtD(vReg dst, vReg src) %{
 
 // vector sqrt - predicated
 
-instruct vsqrtF_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vsqrtF_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (SqrtVF dst_src pg));
-  format %{ "vsqrtF_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (SqrtVF src pg));
+  format %{ "vsqrtF_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_fsqrt($dst_src$$FloatRegister, __ S, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_fsqrt($dst$$FloatRegister, __ S, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vsqrtD_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vsqrtD_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (SqrtVD dst_src pg));
-  format %{ "vsqrtD_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (SqrtVD src pg));
+  format %{ "vsqrtD_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_fsqrt($dst_src$$FloatRegister, __ D, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_fsqrt($dst$$FloatRegister, __ D, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -5331,9 +5457,7 @@ instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx,
     __ sve_index($tmp$$FloatRegister, size, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
                $tmp$$FloatRegister, (int)($idx$$constant) - 16);
-    if ($dst$$FloatRegister != $src$$FloatRegister) {
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-    }
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -5356,9 +5480,7 @@ instruct insertI_index_ge32(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg t
     __ sve_dup($tmp2$$FloatRegister, size, (int)($idx$$constant));
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
                $tmp1$$FloatRegister, $tmp2$$FloatRegister);
-    if ($dst$$FloatRegister != $src$$FloatRegister) {
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-    }
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -5392,9 +5514,7 @@ instruct insertL_gt128b(vReg dst, vReg src, iRegL val, immI idx,
     __ sve_index($tmp$$FloatRegister, __ D, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
                $tmp$$FloatRegister, (int)($idx$$constant) - 16);
-    if ($dst$$FloatRegister != $src$$FloatRegister) {
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-    }
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -5432,7 +5552,7 @@ instruct insertF_index_lt32(vReg dst, vReg src, vRegF val, immI idx,
     __ sve_index($dst$$FloatRegister, __ S, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
                $dst$$FloatRegister, (int)($idx$$constant) - 16);
-    __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+    __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
@@ -5451,7 +5571,7 @@ instruct insertF_index_ge32(vReg dst, vReg src, vRegF val, immI idx, vReg tmp,
     __ sve_dup($dst$$FloatRegister, __ S, (int)($idx$$constant));
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
                $tmp$$FloatRegister, $dst$$FloatRegister);
-    __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+    __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
@@ -5486,7 +5606,7 @@ instruct insertD_gt128b(vReg dst, vReg src, vRegD val, immI idx,
     __ sve_index($dst$$FloatRegister, __ D, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
                $dst$$FloatRegister, (int)($idx$$constant) - 16);
-    __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+    __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
@@ -5656,8 +5776,12 @@ instruct extractF(vRegF dst, vReg src, immI idx) %{
       __ ins($dst$$FloatRegister, __ S, $src$$FloatRegister, 0, index);
     } else {
       assert(UseSVE > 0, "must be sve");
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-      __ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << 2);
+      __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+      // Although dst and src hold the same value after movprfx, we must use src
+      // (not dst) as the second source of ext. The movprfx destination register
+      // must not appear in any source operand of the following instruction
+      // except as the destructive operand.
+      __ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << 2);
     }
   %}
   ins_pipe(pipe_slow);
@@ -5677,8 +5801,12 @@ instruct extractD(vRegD dst, vReg src, immI idx) %{
       __ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, index);
     } else {
       assert(UseSVE > 0, "must be sve");
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-      __ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << 3);
+      __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+      // Although dst and src hold the same value after movprfx, we must use src
+      // (not dst) as the second source of ext. The movprfx destination register
+      // must not appear in any source operand of the following instruction
+      // except as the destructive operand.
+      __ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << 3);
     }
   %}
   ins_pipe(pipe_slow);
@@ -6855,25 +6983,43 @@ instruct vpopcountL(vReg dst, vReg src) %{
 
 // vector popcount - predicated
 
-instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vpopcountI_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (PopCountVI dst_src pg));
-  format %{ "vpopcountI_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (PopCountVI src pg));
+  format %{ "vpopcountI_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    __ sve_cnt($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vpopcountL_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (PopCountVL dst_src pg));
-  format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (PopCountVL src pg));
+  format %{ "vpopcountL_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_cnt($dst_src$$FloatRegister, __ D,
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_cnt($dst$$FloatRegister, __ D,
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -7240,14 +7386,23 @@ instruct vcountLeadingZeros(vReg dst, vReg src) %{
 // The dst and src should use the same register to make sure the
 // inactive lanes in dst save the same elements as src.
 
-instruct vcountLeadingZeros_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vcountLeadingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (CountLeadingZerosV dst_src pg));
-  format %{ "vcountLeadingZeros_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (CountLeadingZerosV src pg));
+  format %{ "vcountLeadingZeros_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    __ sve_clz($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_clz($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -7296,19 +7451,26 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
   ins_pipe(pipe_slow);
 %}
 
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vcountTrailingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (CountTrailingZerosV dst_src pg));
-  format %{ "vcountTrailingZeros_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (CountTrailingZerosV src pg));
+  format %{ "vcountTrailingZeros_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
-    __ sve_rbit($dst_src$$FloatRegister, size,
-                $pg$$PRegister, $dst_src$$FloatRegister);
-    __ sve_clz($dst_src$$FloatRegister, size,
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_rbit($dst$$FloatRegister, size,
+                $pg$$PRegister, $src$$FloatRegister);
+    __ sve_clz($dst$$FloatRegister, size,
+               $pg$$PRegister, $dst$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -7347,14 +7509,23 @@ instruct vreverse(vReg dst, vReg src) %{
 // The dst and src should use the same register to make sure the
 // inactive lanes in dst save the same elements as src.
 
-instruct vreverse_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vreverse_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (ReverseV dst_src pg));
-  format %{ "vreverse_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (ReverseV src pg));
+  format %{ "vreverse_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    __ sve_rbit($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_rbit($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -7393,19 +7564,28 @@ instruct vreverseBytes(vReg dst, vReg src) %{
   ins_pipe(pipe_slow);
 %}
 
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vreverseBytes_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (ReverseBytesV dst_src pg));
-  format %{ "vreverseBytes_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (ReverseBytesV src pg));
+  format %{ "vreverseBytes_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     if (bt == T_BYTE) {
-      // do nothing
+      if ($dst$$FloatRegister != $src$$FloatRegister) {
+        __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+      }
     } else {
-      __ sve_revb($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
-                  $pg$$PRegister, $dst_src$$FloatRegister);
+      __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+      // Although dst and src hold the same value after movprfx, we must use src
+      // (not dst) as the source of the following instruction. The movprfx
+      // destination register must not appear in any source operand of the
+      // following instruction except as the destructive operand.
+      __ sve_revb($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+                  $pg$$PRegister, $src$$FloatRegister);
     }
   %}
   ins_pipe(pipe_slow);
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index c5df949dfb6..a53efd43d5d 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -899,13 +899,22 @@ dnl
 dnl VECTOR_NOT_PREDICATE($1  )
 dnl VECTOR_NOT_PREDICATE(type)
 define(`VECTOR_NOT_PREDICATE', `
-instruct vnot$1_masked`'(vReg dst_src, imm$1_M1 m1, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vnot$1_masked`'(vReg dst, vReg src, imm$1_M1 m1, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (XorV (Binary dst_src (Replicate m1)) pg));
-  format %{ "vnot$1_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (XorV (Binary src (Replicate m1)) pg));
+  format %{ "vnot$1_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_not($dst_src$$FloatRegister, get_reg_variant(this),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_not($dst$$FloatRegister, get_reg_variant(this),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}')dnl
@@ -1042,14 +1051,23 @@ dnl
 dnl UNARY_OP_PREDICATE($1,        $2,      $3  )
 dnl UNARY_OP_PREDICATE(rule_name, op_name, insn)
 define(`UNARY_OP_PREDICATE', `
-instruct $1_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct $1_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src ($2 dst_src pg));
-  format %{ "$1_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst ($2 src pg));
+  format %{ "$1_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
-    __ $3($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ $3($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}')dnl
@@ -1057,12 +1075,21 @@ dnl
 dnl UNARY_OP_PREDICATE_WITH_SIZE($1,        $2,      $3,   $4  )
 dnl UNARY_OP_PREDICATE_WITH_SIZE(rule_name, op_name, insn, size)
 define(`UNARY_OP_PREDICATE_WITH_SIZE', `
-instruct $1_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct $1_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src ($2 dst_src pg));
-  format %{ "$1_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst ($2 src pg));
+  format %{ "$1_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ $3($dst_src$$FloatRegister, __ $4, $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ $3($dst$$FloatRegister, __ $4, $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}')dnl
@@ -3368,9 +3395,7 @@ instruct insertI_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx,
     __ sve_index($tmp$$FloatRegister, size, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
                $tmp$$FloatRegister, (int)($idx$$constant) - 16);
-    if ($dst$$FloatRegister != $src$$FloatRegister) {
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-    }
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -3393,9 +3418,7 @@ instruct insertI_index_ge32(vReg dst, vReg src, iRegIorL2I val, immI idx, vReg t
     __ sve_dup($tmp2$$FloatRegister, size, (int)($idx$$constant));
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, size, ptrue,
                $tmp1$$FloatRegister, $tmp2$$FloatRegister);
-    if ($dst$$FloatRegister != $src$$FloatRegister) {
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-    }
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, size, $pgtmp$$PRegister, $val$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -3429,9 +3452,7 @@ instruct insertL_gt128b(vReg dst, vReg src, iRegL val, immI idx,
     __ sve_index($tmp$$FloatRegister, __ D, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
                $tmp$$FloatRegister, (int)($idx$$constant) - 16);
-    if ($dst$$FloatRegister != $src$$FloatRegister) {
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-    }
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$Register);
   %}
   ins_pipe(pipe_slow);
@@ -3469,7 +3490,7 @@ instruct insertF_index_lt32(vReg dst, vReg src, vRegF val, immI idx,
     __ sve_index($dst$$FloatRegister, __ S, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
                $dst$$FloatRegister, (int)($idx$$constant) - 16);
-    __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+    __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
@@ -3488,7 +3509,7 @@ instruct insertF_index_ge32(vReg dst, vReg src, vRegF val, immI idx, vReg tmp,
     __ sve_dup($dst$$FloatRegister, __ S, (int)($idx$$constant));
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ S, ptrue,
                $tmp$$FloatRegister, $dst$$FloatRegister);
-    __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+    __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ S, $pgtmp$$PRegister, $val$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
@@ -3523,7 +3544,7 @@ instruct insertD_gt128b(vReg dst, vReg src, vRegD val, immI idx,
     __ sve_index($dst$$FloatRegister, __ D, -16, 1);
     __ sve_cmp(Assembler::EQ, $pgtmp$$PRegister, __ D, ptrue,
                $dst$$FloatRegister, (int)($idx$$constant) - 16);
-    __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+    __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
     __ sve_cpy($dst$$FloatRegister, __ D, $pgtmp$$PRegister, $val$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
@@ -3621,8 +3642,12 @@ instruct extract$1(vReg$1 dst, vReg src, immI idx) %{
       __ ins($dst$$FloatRegister, __ $4, $src$$FloatRegister, 0, index);
     } else {
       assert(UseSVE > 0, "must be sve");
-      __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
-      __ sve_ext($dst$$FloatRegister, $dst$$FloatRegister, index << $5);
+      __ sve_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+      // Although dst and src hold the same value after movprfx, we must use src
+      // (not dst) as the second source of ext. The movprfx destination register
+      // must not appear in any source operand of the following instruction
+      // except as the destructive operand.
+      __ sve_ext($dst$$FloatRegister, $src$$FloatRegister, index << $5);
     }
   %}
   ins_pipe(pipe_slow);
@@ -4682,13 +4707,22 @@ instruct vpopcountL(vReg dst, vReg src) %{
 // vector popcount - predicated
 UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt)
 
-instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vpopcountL_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (PopCountVL dst_src pg));
-  format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (PopCountVL src pg));
+  format %{ "vpopcountL_masked $dst, $pg, $src" %}
   ins_encode %{
-    __ sve_cnt($dst_src$$FloatRegister, __ D,
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_cnt($dst$$FloatRegister, __ D,
+               $pg$$PRegister, $src$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -5100,19 +5134,26 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
   ins_pipe(pipe_slow);
 %}
 
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vcountTrailingZeros_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vcountTrailingZeros_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (CountTrailingZerosV dst_src pg));
-  format %{ "vcountTrailingZeros_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (CountTrailingZerosV src pg));
+  format %{ "vcountTrailingZeros_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     Assembler::SIMD_RegVariant size = __ elemType_to_regVariant(bt);
-    __ sve_rbit($dst_src$$FloatRegister, size,
-                $pg$$PRegister, $dst_src$$FloatRegister);
-    __ sve_clz($dst_src$$FloatRegister, size,
-               $pg$$PRegister, $dst_src$$FloatRegister);
+    __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+    // Although dst and src hold the same value after movprfx, we must use src
+    // (not dst) as the source of the following instruction. The movprfx
+    // destination register must not appear in any source operand of the
+    // following instruction except as the destructive operand.
+    __ sve_rbit($dst$$FloatRegister, size,
+                $pg$$PRegister, $src$$FloatRegister);
+    __ sve_clz($dst$$FloatRegister, size,
+               $pg$$PRegister, $dst$$FloatRegister);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -5186,19 +5227,28 @@ instruct vreverseBytes(vReg dst, vReg src) %{
   ins_pipe(pipe_slow);
 %}
 
-// The dst and src should use the same register to make sure the
-// inactive lanes in dst save the same elements as src.
-instruct vreverseBytes_masked(vReg dst_src, pRegGov pg) %{
+// The Java Vector API specification requires that for masked unary operations,
+// suppressed lanes are filled from the first vector operand (see "Masked
+// Operations" in Vector.java around line 568). So we use movprfx to copy src
+// into dst before emitting the predicated instruction.
+instruct vreverseBytes_masked(vReg dst, vReg src, pRegGov pg) %{
   predicate(UseSVE > 0);
-  match(Set dst_src (ReverseBytesV dst_src pg));
-  format %{ "vreverseBytes_masked $dst_src, $pg, $dst_src" %}
+  match(Set dst (ReverseBytesV src pg));
+  format %{ "vreverseBytes_masked $dst, $pg, $src" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     if (bt == T_BYTE) {
-      // do nothing
+      if ($dst$$FloatRegister != $src$$FloatRegister) {
+        __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+      }
     } else {
-      __ sve_revb($dst_src$$FloatRegister, __ elemType_to_regVariant(bt),
-                  $pg$$PRegister, $dst_src$$FloatRegister);
+      __ maybe_movprfx($dst$$FloatRegister, $src$$FloatRegister);
+      // Although dst and src hold the same value after movprfx, we must use src
+      // (not dst) as the source of the following instruction. The movprfx
+      // destination register must not appear in any source operand of the
+      // following instruction except as the destructive operand.
+      __ sve_revb($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+                  $pg$$PRegister, $src$$FloatRegister);
     }
   %}
   ins_pipe(pipe_slow);
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 67dc4966d64..cb9e308197e 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -2494,8 +2494,12 @@ void C2_MacroAssembler::sve_extract_integral(Register dst, BasicType bt, FloatRe
       smov(dst, src, size, idx);
     }
   } else {
-    sve_orr(vtmp, src, src);
-    sve_ext(vtmp, vtmp, idx << size);
+    sve_movprfx(vtmp, src);
+    // Although vtmp and src hold the same value after movprfx, we must use src
+    // (not vtmp) as the second source of ext. The movprfx destination register
+    // must not appear in any source operand of the following instruction except
+    // as the destructive operand.
+    sve_ext(vtmp, src, idx << size);
     if (bt == T_INT || bt == T_LONG) {
       umov(dst, vtmp, size, 0);
     } else {
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index a52ad112560..ac5bae22384 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -7278,3 +7278,26 @@ void MacroAssembler::neon_vector_rotate(FloatRegister dst, SIMD_Arrangement T,
     sli(dst, T, src, lshift);
   }
 }
+
+void MacroAssembler::try_to_replace_prev_vector_copy_with_movprfx(FloatRegister dst) {
+  if (code_section()->is_empty()) {
+    return;
+  }
+
+  address prev = pc() - NativeInstruction::instruction_size;
+  uint32_t insn = nativeInstruction_at(prev)->encoding();
+  if (!NativeInstruction::is_neon_vector_mov_alias(insn) &&
+      !NativeInstruction::is_sve_vector_mov_alias(insn)) {
+    return;
+  }
+
+  // The destructive instruction must reuse the mov alias destination.
+  uint32_t rd = Instruction_aarch64::extract(insn, 4, 0);
+  if (rd != (uint32_t)dst->encoding()) {
+    return;
+  }
+
+  uint32_t rn = Instruction_aarch64::extract(insn, 9, 5);
+  Instruction_aarch64::patch(prev, 31, 0,
+                             NativeInstruction::encode_sve_movprfx(rd, rn));
+}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index ad8827bd9c0..b1050b45731 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -1734,7 +1734,103 @@ public:
 private:
   // Check the current thread doesn't need a cross modify fence.
   void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
+  void try_to_replace_prev_vector_copy_with_movprfx(FloatRegister dst);
 
+public:
+  void maybe_movprfx(FloatRegister dst, FloatRegister src) {
+    if (dst != src) {
+      sve_movprfx(dst, src);
+    }
+  }
+
+// Wrappers for SVE explicit destructive instructions, overriding the
+// same-signature Assembler entry points to enable movprfx fusion optimization.
+//
+// Implicit destructive instructions (e.g. predicated unary ops like sve_abs/
+// sve_neg/sve_not, whose ISA encoding allows Zd != Zn but whose use as a Java
+// Vector API masked operation requires pass-through of the first source) are
+// not covered here. For those, the .ad file is responsible for emitting
+// movprfx explicitly via maybe_movprfx() before the destructive op.
+#define SVE_DESTRUCTIVE_BINARY_INS(NAME)                                       \
+  using Assembler::NAME;                                                       \
+  void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg,                 \
+            FloatRegister Zm) {                                                \
+    if (Zd != Zm) {                                                            \
+      try_to_replace_prev_vector_copy_with_movprfx(Zd);                        \
+    }                                                                          \
+    Assembler::NAME(Zd, T, Pg, Zm);                                            \
+  }
+
+#define SVE_DESTRUCTIVE_BINARY_5(I1, I2, I3, I4, I5)                           \
+  SVE_DESTRUCTIVE_BINARY_INS(I1); SVE_DESTRUCTIVE_BINARY_INS(I2);              \
+  SVE_DESTRUCTIVE_BINARY_INS(I3); SVE_DESTRUCTIVE_BINARY_INS(I4);              \
+  SVE_DESTRUCTIVE_BINARY_INS(I5);
+
+  SVE_DESTRUCTIVE_BINARY_5(sve_add,  sve_and,   sve_asr,   sve_bic,   sve_eor)
+  SVE_DESTRUCTIVE_BINARY_5(sve_fabd, sve_fadd,  sve_fdiv,  sve_fmax,  sve_fmin)
+  SVE_DESTRUCTIVE_BINARY_5(sve_fmul, sve_fsub,  sve_lsl,   sve_lsr,   sve_mul)
+  SVE_DESTRUCTIVE_BINARY_5(sve_orr,  sve_smax,  sve_smin,  sve_sqadd, sve_sqsub)
+  SVE_DESTRUCTIVE_BINARY_5(sve_sub,  sve_uqadd, sve_uqsub, sve_umax,  sve_umin)
+
+#undef SVE_DESTRUCTIVE_BINARY_INS
+#undef SVE_DESTRUCTIVE_BINARY_5
+
+#define SVE_DESTRUCTIVE_SHIFT_IMM_INS(NAME)                                    \
+  void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int shift) {    \
+    try_to_replace_prev_vector_copy_with_movprfx(Zd);                          \
+    Assembler::NAME(Zd, T, Pg, shift);                                         \
+  }
+
+  SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_asr);
+  SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_lsl);
+  SVE_DESTRUCTIVE_SHIFT_IMM_INS(sve_lsr);
+
+#undef SVE_DESTRUCTIVE_SHIFT_IMM_INS
+
+#define SVE_DESTRUCTIVE_UNPRED_IMM_INS(NAME, IMM_TYPE)                         \
+  void NAME(FloatRegister Zd, SIMD_RegVariant T, IMM_TYPE imm) {               \
+    try_to_replace_prev_vector_copy_with_movprfx(Zd);                          \
+    Assembler::NAME(Zd, T, imm);                                               \
+  }
+
+  SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_add, unsigned);
+  SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_sub, unsigned);
+  SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_and, uint64_t);
+  SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_eor, uint64_t);
+  SVE_DESTRUCTIVE_UNPRED_IMM_INS(sve_orr, uint64_t);
+
+#undef SVE_DESTRUCTIVE_UNPRED_IMM_INS
+
+#define SVE_DESTRUCTIVE_TERNARY_INS(NAME)                                      \
+  using Assembler::NAME;                                                       \
+  void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg,                 \
+            FloatRegister Zn, FloatRegister Zm) {                              \
+    if (Zd != Zn && Zd != Zm) {                                                \
+      try_to_replace_prev_vector_copy_with_movprfx(Zd);                        \
+    }                                                                          \
+    Assembler::NAME(Zd, T, Pg, Zn, Zm);                                        \
+  }
+
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fmad);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fmla);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fmls);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fmsb);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmad);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmla);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmls);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_fnmsb);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_mla);
+  SVE_DESTRUCTIVE_TERNARY_INS(sve_mls);
+
+#undef SVE_DESTRUCTIVE_TERNARY_INS
+
+  using Assembler::sve_eor3;
+  void sve_eor3(FloatRegister Zd, FloatRegister Zm, FloatRegister Zk) {
+    if (Zd != Zm && Zd != Zk) {
+      try_to_replace_prev_vector_copy_with_movprfx(Zd);
+    }
+    Assembler::sve_eor3(Zd, Zm, Zk);
+  }
 };
 
 #ifdef ASSERT
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index 4bccbc59582..57bb9a91533 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -140,6 +140,29 @@ public:
       Instruction_aarch64::extract(insn, 23, 23) == 0b0 &&
       Instruction_aarch64::extract(insn, 26, 25) == 0b00;
   }
+
+  static bool is_neon_vector_mov_alias(uint32_t insn) {
+    if (Instruction_aarch64::extract(insn, 31, 31) != 0 ||
+        Instruction_aarch64::extract(insn, 29, 21) != 0b001110101 ||
+        Instruction_aarch64::extract(insn, 15, 10) != 0b000111) {
+      return false;
+    }
+    return Instruction_aarch64::extract(insn, 9, 5) ==
+           Instruction_aarch64::extract(insn, 20, 16);
+  }
+
+  static bool is_sve_vector_mov_alias(uint32_t insn) {
+    if (Instruction_aarch64::extract(insn, 31, 21) != 0b00000100011 ||
+        Instruction_aarch64::extract(insn, 15, 10) != 0b001100) {
+      return false;
+    }
+    return Instruction_aarch64::extract(insn, 9, 5) ==
+           Instruction_aarch64::extract(insn, 20, 16);
+  }
+
+  static uint32_t encode_sve_movprfx(uint32_t dst, uint32_t src) {
+    return 0x1082f << 10 | (src << 5) | dst;
+  }
 };
 
 inline NativeInstruction* nativeInstruction_at(address address) {
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index 46849b329cb..8e9af2b7b8a 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -6276,6 +6276,24 @@ class StubGenerator: public StubCodeGenerator {
   // static int implKyberNttMult(
   //              short[] result, short[] ntta, short[] nttb, short[] zetas) {}
   //
+  // The actual algorithm that is used here differs from the one in the Java
+  // implementation, it uses Montgomery multiplications instead of Barrett
+  // reduction, but the end result modulo MLKEM_Q is the same. This is the
+  // Java equivalent of this intrinsic implementation:
+  // static void implKyberNttMultJava(short[] result, short[] ntta, short[] nttb) {
+  //         for (int m = 0; m < ML_KEM_N / 2; m++) {
+  //             int a0 = ntta[2 * m];
+  //             int a1 = ntta[2 * m + 1];
+  //             int b0 = nttb[2 * m];
+  //             int b1 = nttb[2 * m + 1];
+  //             int r = montMul(a0, b0) +
+  //                     montMul(montMul(a1, b1), MONT_ZETAS_FOR_NTT_MULT[m]);
+  //             result[2 * m] = (short) montMul(r, MONT_R_SQUARE_MOD_Q);
+  //             result[2 * m + 1] = (short) montMul(
+  //                     (montMul(a0, b1) + montMul(a1, b0)), MONT_R_SQUARE_MOD_Q);
+  //          }
+  // }
+  //
   // result (short[256]) = c_rarg0
   // ntta (short[256]) = c_rarg1
   // nttb (short[256]) = c_rarg2
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index a9f34b148c6..1270471d150 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -2226,39 +2226,12 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
 }
 
 
+// kills recv
 void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias,
                                         ciMethodData *md, ciProfileData *data,
-                                        Register recv, Register tmp1, Label* update_done) {
-  uint i;
-  for (i = 0; i < VirtualCallData::row_limit(); i++) {
-    Label next_test;
-    // See if the receiver is receiver[n].
-    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
-    __ verify_klass_ptr(tmp1);
-    __ cmpd(CR0, recv, tmp1);
-    __ bne(CR0, next_test);
-
-    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
-    __ addi(tmp1, tmp1, DataLayout::counter_increment);
-    __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
-    __ b(*update_done);
-
-    __ bind(next_test);
-  }
-
-  // Didn't find receiver; find next empty slot and fill it in.
-  for (i = 0; i < VirtualCallData::row_limit(); i++) {
-    Label next_test;
-    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
-    __ cmpdi(CR0, tmp1, 0);
-    __ bne(CR0, next_test);
-    __ li(tmp1, DataLayout::counter_increment);
-    __ std(recv, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
-    __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
-    __ b(*update_done);
-
-    __ bind(next_test);
-  }
+                                        Register recv, Register tmp) {
+  int mdp_offset = md->byte_offset_of_slot(data, in_ByteSize(0)) - mdo_offset_bias;
+  __ profile_receiver_type(recv, mdo, mdp_offset, tmp, noreg);
 }
 
 
@@ -2320,15 +2293,9 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L
     __ b(*obj_is_null);
     __ bind(not_null);
 
-    Label update_done;
     Register recv = klass_RInfo;
     __ load_klass(recv, obj);
-    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1, &update_done);
-    const int slot_offset = md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias;
-    __ ld(Rtmp1, slot_offset, mdo);
-    __ addi(Rtmp1, Rtmp1, DataLayout::counter_increment);
-    __ std(Rtmp1, slot_offset, mdo);
-    __ bind(update_done);
+    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1); // kills recv
   } else {
     __ cmpdi(CR0, obj, 0);
     __ beq(CR0, *obj_is_null);
@@ -2427,15 +2394,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
       __ b(done);
       __ bind(not_null);
 
-      Label update_done;
       Register recv = klass_RInfo;
       __ load_klass(recv, value);
-      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1, &update_done);
-      const int slot_offset = md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias;
-      __ ld(Rtmp1, slot_offset, mdo);
-      __ addi(Rtmp1, Rtmp1, DataLayout::counter_increment);
-      __ std(Rtmp1, slot_offset, mdo);
-      __ bind(update_done);
+      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1); // kills recv
     } else {
       __ cmpdi(CR0, value, 0);
       __ beq(CR0, done);
@@ -2648,55 +2609,27 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
       // We know the type that will be seen at this call site; we can
       // statically update the MethodData* rather than needing to do
       // dynamic tests on the receiver type.
-
-      // NOTE: we should probably put a lock around this search to
-      // avoid collisions by concurrent compilations.
       ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
-      uint i;
-      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+      for (uint i = 0; i < VirtualCallData::row_limit(); i++) {
         ciKlass* receiver = vc_data->receiver(i);
         if (known_klass->equals(receiver)) {
-          __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
-          __ addi(tmp1, tmp1, DataLayout::counter_increment);
-          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          __ increment_mem64(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias,
+                             DataLayout::counter_increment, tmp1);
           return;
         }
       }
 
-      // Receiver type not found in profile data; select an empty slot.
-
-      // Note that this is less efficient than it should be because it
-      // always does a write to the receiver part of the
-      // VirtualCallData rather than just the first time.
-      for (i = 0; i < VirtualCallData::row_limit(); i++) {
-        ciKlass* receiver = vc_data->receiver(i);
-        if (receiver == nullptr) {
-          metadata2reg(known_klass->constant_encoding(), tmp1);
-          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) - mdo_offset_bias, mdo);
-
-          __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
-          __ addi(tmp1, tmp1, DataLayout::counter_increment);
-          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
-          return;
-        }
-      }
+      // Receiver type is not found in profile data.
+      // Fall back to runtime helper to handle the rest at runtime.
+      metadata2reg(known_klass->constant_encoding(), recv);
     } else {
       __ load_klass(recv, recv);
-      Label update_done;
-      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
-      // Receiver did not match any saved receiver and there is no empty row for it.
-      // Increment total counter to indicate polymorphic case.
-      __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
-      __ addi(tmp1, tmp1, DataLayout::counter_increment);
-      __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
-
-      __ bind(update_done);
     }
+    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1); // kills recv
   } else {
     // Static call
-    __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
-    __ addi(tmp1, tmp1, DataLayout::counter_increment);
-    __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+    __ increment_mem64(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias,
+                       DataLayout::counter_increment, tmp1);
   }
 }
 
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
index 7399a4544e6..5a065d364b2 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.hpp
@@ -52,7 +52,7 @@ friend class ArrayCopyStub;
   // Record the type of the receiver in ReceiverTypeData.
   void type_profile_helper(Register mdo, int mdo_offset_bias,
                            ciMethodData *md, ciProfileData *data,
-                           Register recv, Register tmp1, Label* update_done);
+                           Register recv, Register tmp);
   // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
   void setup_md_access(ciMethod* method, int bci,
                        ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp
index 6b6a792117d..7d2e22b5965 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.cpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -319,7 +319,7 @@ void frame::patch_pc(Thread* thread, address pc) {
 
 #ifdef ASSERT
   {
-    frame f(this->sp(), pc, this->unextended_sp());
+    frame f(sp(), unextended_sp(), fp(), pc, cb(), oop_map(), is_heap_frame());
     assert(f.is_deoptimized_frame() == this->is_deoptimized_frame() && f.pc() == this->pc() && f.raw_pc() == this->raw_pc(),
            "must be (f.is_deoptimized_frame(): %d this->is_deoptimized_frame(): %d "
            "f.pc(): " INTPTR_FORMAT " this->pc(): " INTPTR_FORMAT " f.raw_pc(): " INTPTR_FORMAT " this->raw_pc(): " INTPTR_FORMAT ")",
diff --git a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
index cedcb399a83..123e6d8a0b1 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
@@ -81,7 +81,7 @@ inline void frame::setup(kind knd) {
   // Continuation frames on the java heap are not aligned.
   // When thawing interpreted frames the sp can be unaligned (see new_stack_frame()).
   assert(_on_heap ||
-         ((is_aligned(_sp, alignment_in_bytes) || is_interpreted_frame() || is_deoptimized_frame()) &&
+         ((is_aligned(_sp, alignment_in_bytes) || is_interpreted_frame()) &&
           (is_aligned(_fp, alignment_in_bytes) || !is_fully_initialized())),
          "invalid alignment sp:" PTR_FORMAT " unextended_sp:" PTR_FORMAT " fp:" PTR_FORMAT, p2i(_sp), p2i(_unextended_sp), p2i(_fp));
 }
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
index 275ff92c699..45af9bfc252 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, 2026, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2025 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,8 +264,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
   void profile_switch_default(Register scratch1, Register scratch2);
   void profile_switch_case(Register index, Register scratch1,Register scratch2, Register scratch3);
   void profile_null_seen(Register Rscratch1, Register Rscratch2);
-  void record_klass_in_profile(Register receiver, Register scratch1, Register scratch2);
-  void record_klass_in_profile_helper(Register receiver, Register scratch1, Register scratch2, int start_row, Label& done);
 
   // Argument and return type profiling.
   void profile_obj_type(Register obj, Register mdo_addr_base, RegisterOrConstant mdo_addr_offs, Register tmp, Register tmp2);
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index a1798289b62..789f8da9574 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -1348,7 +1348,7 @@ void InterpreterMacroAssembler::profile_virtual_call(Register Rreceiver,
   test_method_data_pointer(profile_continue);
 
   // Record the receiver type.
-  record_klass_in_profile(Rreceiver, Rscratch1, Rscratch2);
+  profile_receiver_type(Rreceiver, R28_mdx, 0, Rscratch1, Rscratch2);
 
   // The method data pointer needs to be updated to reflect the new target.
   update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
@@ -1367,7 +1367,7 @@ void InterpreterMacroAssembler::profile_typecheck(Register Rklass, Register Rscr
       mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
 
       // Record the object type.
-      record_klass_in_profile(Rklass, Rscratch1, Rscratch2);
+      profile_receiver_type(Rklass, R28_mdx, 0, Rscratch1, Rscratch2);
     }
 
     // The method data pointer needs to be updated.
@@ -1481,88 +1481,6 @@ void InterpreterMacroAssembler::profile_null_seen(Register Rscratch1, Register R
   }
 }
 
-void InterpreterMacroAssembler::record_klass_in_profile(Register Rreceiver,
-                                                        Register Rscratch1, Register Rscratch2) {
-  assert(ProfileInterpreter, "must be profiling");
-  assert_different_registers(Rreceiver, Rscratch1, Rscratch2);
-
-  Label done;
-  record_klass_in_profile_helper(Rreceiver, Rscratch1, Rscratch2, 0, done);
-  bind (done);
-}
-
-void InterpreterMacroAssembler::record_klass_in_profile_helper(
-                                        Register receiver, Register scratch1, Register scratch2,
-                                        int start_row, Label& done) {
-  if (TypeProfileWidth == 0) {
-    increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
-    return;
-  }
-
-  int last_row = VirtualCallData::row_limit() - 1;
-  assert(start_row <= last_row, "must be work left to do");
-  // Test this row for both the receiver and for null.
-  // Take any of three different outcomes:
-  //   1. found receiver => increment count and goto done
-  //   2. found null => keep looking for case 1, maybe allocate this cell
-  //   3. found something else => keep looking for cases 1 and 2
-  // Case 3 is handled by a recursive call.
-  for (int row = start_row; row <= last_row; row++) {
-    Label next_test;
-    bool test_for_null_also = (row == start_row);
-
-    // See if the receiver is receiver[n].
-    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
-    test_mdp_data_at(recvr_offset, receiver, next_test, scratch1);
-    // delayed()->tst(scratch);
-
-    // The receiver is receiver[n]. Increment count[n].
-    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
-    increment_mdp_data_at(count_offset, scratch1, scratch2);
-    b(done);
-    bind(next_test);
-
-    if (test_for_null_also) {
-      Label found_null;
-      // Failed the equality check on receiver[n]... Test for null.
-      if (start_row == last_row) {
-        // The only thing left to do is handle the null case.
-        // Scratch1 contains test_out from test_mdp_data_at.
-        cmpdi(CR0, scratch1, 0);
-        beq(CR0, found_null);
-        // Receiver did not match any saved receiver and there is no empty row for it.
-        // Increment total counter to indicate polymorphic case.
-        increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch1, scratch2);
-        b(done);
-        bind(found_null);
-        break;
-      }
-      // Since null is rare, make it be the branch-taken case.
-      cmpdi(CR0, scratch1, 0);
-      beq(CR0, found_null);
-
-      // Put all the "Case 3" tests here.
-      record_klass_in_profile_helper(receiver, scratch1, scratch2, start_row + 1, done);
-
-      // Found a null. Keep searching for a matching receiver,
-      // but remember that this is an empty (unused) slot.
-      bind(found_null);
-    }
-  }
-
-  // In the fall-through case, we found no matching receiver, but we
-  // observed the receiver[start_row] is null.
-
-  // Fill in the receiver field and increment the count.
-  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
-  set_mdp_data_at(recvr_offset, receiver);
-  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
-  li(scratch1, DataLayout::counter_increment);
-  set_mdp_data_at(count_offset, scratch1);
-  if (start_row > 0) {
-    b(done);
-  }
-}
 
 // Argument and return type profilig.
 // kills: tmp, tmp2, R0, CR0, CR1
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 95d58d470c8..0d6c272decb 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -4329,6 +4329,173 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen,
   bind(L_done);
 }   // multiply_to_len
 
+void MacroAssembler::increment_mem64(Register base, RegisterOrConstant ind_or_offs, int val, Register tmp) {
+  ld(tmp, ind_or_offs, base);
+  addi(tmp, tmp, val);
+  std(tmp, ind_or_offs, base);
+}
+
+// Handle the receiver type profile update given the "recv" klass.
+//
+// Normally updates the ReceiverData (RD) that starts at "mdp" + "mdp_offset".
+// If there are no matching or claimable receiver entries in RD, updates
+// the polymorphic counter.
+//
+// This code expected to run by either the interpreter or JIT-ed code, without
+// extra synchronization. For safety, receiver cells are claimed atomically, which
+// avoids grossly misrepresenting the profiles under concurrent updates. For speed,
+// counter updates are not atomic.
+//
+void MacroAssembler::profile_receiver_type(Register recv, Register mdp, int mdp_offset, Register tmp1, Register tmp2) {
+  assert_different_registers(recv, mdp, tmp1, tmp2);
+
+  int base_receiver_offset   = in_bytes(ReceiverTypeData::receiver_offset(0));
+  int poly_count_offset      = in_bytes(CounterData::count_offset());
+  int receiver_step          = in_bytes(ReceiverTypeData::receiver_offset(1)) - base_receiver_offset;
+  int receiver_to_count_step = in_bytes(ReceiverTypeData::receiver_count_offset(0)) - base_receiver_offset;
+
+  // Adjust for MDP offsets.
+  base_receiver_offset += mdp_offset;
+  poly_count_offset    += mdp_offset;
+
+#ifdef ASSERT
+  // We are about to walk the MDO slots without asking for offsets.
+  // Check that our math hits all the right spots.
+  for (uint c = 0; c < ReceiverTypeData::row_limit(); c++) {
+    int real_recv_offset  = mdp_offset + in_bytes(ReceiverTypeData::receiver_offset(c));
+    int real_count_offset = mdp_offset + in_bytes(ReceiverTypeData::receiver_count_offset(c));
+    int offset = base_receiver_offset + receiver_step*c;
+    int count_offset = offset + receiver_to_count_step;
+    assert(offset == real_recv_offset, "receiver slot math");
+    assert(count_offset == real_count_offset, "receiver count math");
+  }
+  int real_poly_count_offset = mdp_offset + in_bytes(CounterData::count_offset());
+  assert(poly_count_offset == real_poly_count_offset, "poly counter math");
+#endif
+
+  // Corner case: no profile table. Increment poly counter and exit.
+  if (ReceiverTypeData::row_limit() == 0) {
+    increment_mem64(mdp, poly_count_offset, DataLayout::counter_increment, tmp1);
+    return;
+  }
+
+  Label L_loop_search_receiver, L_loop_search_empty;
+  Label L_restart, L_found_recv, L_found_empty, L_count_update;
+  Register offset = tmp1, count = tmp2;
+
+  // The code here recognizes three major cases:
+  //   A. Fastest: receiver found in the table
+  //   B. Fast: no receiver in the table, and the table is full
+  //   C. Slow: no receiver in the table, free slots in the table
+  //
+  // The case A performance is most important, as perfectly-behaved code would end up
+  // there, especially with larger TypeProfileWidth. The case B performance is
+  // important as well, this is where bulk of code would land for normally megamorphic
+  // cases. The case C performance is not essential, its job is to deal with installation
+  // races, we optimize for code density instead. Case C needs to make sure that receiver
+  // rows are only claimed once. This makes sure we never overwrite a row for another
+  // receiver and never duplicate the receivers in the list, making profile type-accurate.
+  //
+  // It is very tempting to handle these cases in a single loop, and claim the first slot
+  // without checking the rest of the table. But, profiling code should tolerate free slots
+  // in the table, as class unloading can clear them. After such cleanup, the receiver
+  // we need might be _after_ the free slot. Therefore, we need to let at least full scan
+  // to complete, before trying to install new slots. Splitting the code in several tight
+  // loops also helpfully optimizes for cases A and B.
+  //
+  // This code is effectively:
+  //
+  // restart:
+  //   // Fastest: receiver is already installed
+  //   for (i = 0; i < receiver_count(); i++) {
+  //     if (receiver(i) == recv) goto found_recv(i);
+  //   }
+  //
+  //   // Fast: no receiver, but profile is full
+  //   for (i = 0; i < receiver_count(); i++) {
+  //     if (receiver(i) == null) goto found_null(i);
+  //   }
+  //   goto polymorphic
+  //
+  //   // Slow: try to install receiver
+  // found_null(i):
+  //   CAS(&receiver(i), null, recv);
+  //   goto restart
+  //
+  // polymorphic:
+  //   count++;
+  //   return
+  //
+  // found_recv(i):
+  //   *receiver_count(i)++
+  //
+
+  if (count != noreg) {
+    li(count, ReceiverTypeData::row_limit());
+  }
+
+  bind(L_restart);
+
+  // Fastest: receiver is already installed
+  if (count != noreg) {
+    mtctr(count);
+  } else {
+    li(R0, ReceiverTypeData::row_limit());
+    mtctr(R0);
+  }
+  li(offset, base_receiver_offset);
+  bind(L_loop_search_receiver);
+    ldx(R0, offset, mdp);
+    cmpd(CR0, R0, recv);
+    beq(CR0, L_found_recv);
+    addi(offset, offset, receiver_step);
+  bdnz(L_loop_search_receiver);
+
+  // Fast: no receiver, but profile is full
+  if (count != noreg) {
+    mtctr(count);
+  } else {
+    li(R0, ReceiverTypeData::row_limit());
+    mtctr(R0);
+  }
+  li(offset, base_receiver_offset);
+  bind(L_loop_search_empty);
+    ldx(R0, offset, mdp);
+    cmpdi(CR0, R0, 0);
+    beq(CR0, L_found_empty);
+    addi(offset, offset, receiver_step);
+  bdnz(L_loop_search_empty);
+
+  // Polymorphic: Increment polymorphic counter instead of receiver slot.
+  li(offset, poly_count_offset);
+  b(L_count_update);
+
+  // Slow: try to install receiver
+  bind(L_found_empty);
+
+  // Atomically swing receiver slot: null -> recv.
+  {
+    Register receiver_addr = offset;
+    add(receiver_addr, mdp, offset); // kills offset
+    cmpxchgd(CR0, R0, RegisterOrConstant(0), recv, receiver_addr, MemBarNone, cmpxchgx_hint_atomic_update(),
+             noreg, nullptr, /* check without ldarx first */ false, /* weak */ true);
+  }
+
+  // CAS success means the slot now has the receiver we want. CAS failure means
+  // something had claimed the slot concurrently: it can be the same receiver we want,
+  // or something else. Since this is a slow path, we can optimize for code density,
+  // and just restart the search from the beginning.
+  b(L_restart);
+
+  // Found a receiver, convert its slot offset to corresponding count offset.
+  bind(L_found_recv);
+  addi(offset, offset, receiver_to_count_step);
+
+  // Counter update
+  bind(L_count_update);
+  increment_mem64(mdp, offset, DataLayout::counter_increment, /* temp */ (count != noreg) ? count : recv);
+}
+
 #ifdef ASSERT
 void MacroAssembler::asm_assert(AsmAssertCond cond, const char *msg) {
   Label ok;
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index 21ab192373f..bbfa75f5151 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -870,6 +870,12 @@ class MacroAssembler: public Assembler {
                        Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
                        Register tmp11, Register tmp12, Register tmp13);
 
+  // non-atomic 64-bit memory increment by simm16
+  void increment_mem64(Register base, RegisterOrConstant ind_or_offs, int val, Register tmp);
+
+  // Bytecode profiling (tmp2 = noreg is allowed, but then recv is killed)
+  void profile_receiver_type(Register recv, Register mdp, int mdp_offset, Register tmp1, Register tmp2);
+
   // Emitters for CRC32 calculation.
   // A note on invertCRC:
   //   Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
index 443f3e3d17f..bb56acb3f38 100644
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@@ -1221,6 +1221,7 @@ void InterpreterMacroAssembler::notify_method_exit(
   // track stack depth.  If it is possible to enter interp_only_mode we add
   // the code to check if the event should be sent.
   if (mode == NotifyJVMTI && (JvmtiExport::can_post_interpreter_events() || JvmtiExport::can_post_frame_pop())) {
+    Label L;
     // Note: frame::interpreter_frame_result has a dependency on how the
     // method result is saved across the call to post_method_exit. If this
     // is changed then the interpreter_frame_result implementation will
@@ -1228,8 +1229,18 @@ void InterpreterMacroAssembler::notify_method_exit(
 
     // template interpreter will leave the result on the top of the stack.
     push(state);
+
+    ld(t1, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+    beqz(t1, L);  // if (thread->jvmti_thread_state() == nullptr) exit;
+
+    lwu(t1, Address(t1, JvmtiThreadState::frame_pop_cnt_offset()));
+    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
+    orr(t0, t0, t1);
+    beqz(t0, L);
+
     call_VM(noreg,
             CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+    bind(L);
     pop(state);
   }
 
diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp
index 27741ee3bb1..2b06f9ee80c 100644
--- a/src/hotspot/cpu/x86/frame_x86.cpp
+++ b/src/hotspot/cpu/x86/frame_x86.cpp
@@ -298,7 +298,7 @@ void frame::patch_pc(Thread* thread, address pc) {
 
 #ifdef ASSERT
   {
-    frame f(this->sp(), this->unextended_sp(), this->fp(), pc);
+    frame f(sp(), unextended_sp(), fp(), pc, cb(), oop_map(), is_heap_frame());
     assert(f.is_deoptimized_frame() == this->is_deoptimized_frame() && f.pc() == this->pc() && f.raw_pc() == this->raw_pc(),
       "must be (f.is_deoptimized_frame(): %d this->is_deoptimized_frame(): %d "
       "f.pc(): " INTPTR_FORMAT " this->pc(): " INTPTR_FORMAT " f.raw_pc(): " INTPTR_FORMAT " this->raw_pc(): " INTPTR_FORMAT ")",
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
index 13b1c942213..c35a2a1bba6 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_kyber.cpp
@@ -631,6 +631,27 @@ address generate_kyberInverseNtt_avx512(StubGenerator *stubgen,
 }
 
 // Kyber multiply polynomials in the NTT domain.
+// Implements
+// static int implKyberNttMult(
+//              short[] result, short[] ntta, short[] nttb, short[] zetas) {}
+//
+// The actual algorithm that is used here differs from the one in the Java
+// implementation, it uses Montgomery multiplications instead of Barrett
+// reduction, but the end result modulo MLKEM_Q is the same. This is the
+// Java equivalent of this intrinsic implementation:
+// static void implKyberNttMultJava(short[] result, short[] ntta, short[] nttb) {
+//         for (int m = 0; m < ML_KEM_N / 2; m++) {
+//             int a0 = ntta[2 * m];
+//             int a1 = ntta[2 * m + 1];
+//             int b0 = nttb[2 * m];
+//             int b1 = nttb[2 * m + 1];
+//             int r = montMul(a0, b0) +
+//                     montMul(montMul(a1, b1), MONT_ZETAS_FOR_NTT_MULT[m]);
+//             result[2 * m] = (short) montMul(r, MONT_R_SQUARE_MOD_Q);
+//             result[2 * m + 1] = (short) montMul(
+//                     (montMul(a0, b1) + montMul(a1, b0)), MONT_R_SQUARE_MOD_Q);
+//          }
+// }
 //
 // result (short[256]) = c_rarg0
 // ntta (short[256]) = c_rarg1
diff --git a/src/hotspot/share/cds/archiveUtils.cpp b/src/hotspot/share/cds/archiveUtils.cpp
index 7985c62d67b..bfaa1d6644c 100644
--- a/src/hotspot/share/cds/archiveUtils.cpp
+++ b/src/hotspot/share/cds/archiveUtils.cpp
@@ -303,7 +303,8 @@ public:
     AllocGapNode* node = allocate_node(gap, Empty{});
     insert(gap, node);
 
-    log_trace(aot, alloc)("adding a gap of %zu bytes @ %p (total = %zu) in %zu blocks", gap_bytes, gap_bottom, _total_gap_bytes, size());
+    log_trace(aot, alloc)("adding a gap of %zu bytes @ %p (total = %zu, used = %zu) in %zu blocks",
+                          gap_bytes, gap_bottom, _total_gap_bytes, _total_gap_bytes_used, size());
     return gap_bytes;
   }
 
@@ -325,29 +326,25 @@ public:
 
     remove(node);
 
-    precond(_total_gap_bytes >= num_bytes);
-    _total_gap_bytes -= num_bytes;
     _total_gap_bytes_used += num_bytes;
     _total_gap_allocs++;
     DEBUG_ONLY(node = nullptr); // Don't use it anymore!
 
     precond(gap_bytes >= num_bytes);
     if (gap_bytes > num_bytes) {
-      gap_bytes -= num_bytes;
-      gap_bottom += num_bytes;
-
-      AllocGap gap(gap_bytes, gap_bottom); // constructor checks alignment
+      AllocGap gap(gap_bytes - num_bytes, gap_bottom + num_bytes); // constructor checks alignment
       AllocGapNode* new_node = allocate_node(gap, Empty{});
       insert(gap, new_node);
     }
+    size_t unfilled_bytes = _total_gap_bytes - _total_gap_bytes_used;
     log_trace(aot, alloc)("%zu bytes @ %p in a gap of %zu bytes (used gaps %zu times, remain gap = %zu bytes in %zu blocks)",
-                          num_bytes, result, gap_bytes, _total_gap_allocs, _total_gap_bytes, size());
+                          num_bytes, result, gap_bytes, _total_gap_allocs, unfilled_bytes, size());
     return result;
   }
 };
 
-size_t DumpRegion::_total_gap_bytes = 0;
-size_t DumpRegion::_total_gap_bytes_used = 0;
+size_t DumpRegion::_total_gap_bytes = 0; // All the gaps that have ever been created
+size_t DumpRegion::_total_gap_bytes_used = 0; // All the gaps that have been used
 size_t DumpRegion::_total_gap_allocs = 0;
 DumpRegion::AllocGapTree DumpRegion::_gap_tree;
 
@@ -418,20 +415,21 @@ void DumpRegion::report_gaps(DumpAllocStats* stats) {
       });
 
   double unfilled_percent = 0.0;
+  size_t unfilled_bytes = _total_gap_bytes - _total_gap_bytes_used;
   if (_gap_tree.size() > 0) {
-    unfilled_percent = percent_of(_total_gap_bytes, _total_gap_allocs);
+    unfilled_percent = percent_of(unfilled_bytes, _total_gap_bytes);
     if (unfilled_percent > 5.0) {
       // We have a limited number of small objects, so some small gaps may remain
       // unfilled. If more than 5% of the gaps are unfilled, this likely indicates
       // a systematic error that should be investigated. Otherwise, do not warn to
       // avoid noise.
-      log_warning(aot)("Unexpected %zu gaps (%zu bytes) for Klass alignment",
-                       _gap_tree.size(), _total_gap_bytes);
+      log_warning(aot)("Unexpected %zu gaps (%zu bytes, %.2f%%) for Klass alignment",
+                       _gap_tree.size(), _total_gap_bytes, unfilled_percent);
     }
   }
   if (_total_gap_allocs > 0) {
     log_info(aot)("Allocated %zu objects of %zu bytes in gaps (remain = %zu bytes, %.2f%%)",
-                  _total_gap_allocs, _total_gap_bytes_used, _total_gap_bytes, unfilled_percent);
+                  _total_gap_allocs, _total_gap_bytes_used, unfilled_bytes, unfilled_percent);
   }
 }
 
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
index c880af7fd49..ca4dfc71c61 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.cpp
@@ -358,7 +358,7 @@ size_t ShenandoahGenerationalHeuristics::select_aged_regions(ShenandoahInPlacePr
 
 // Having chosen the collection set, adjust the budgets for generational mode based on its composition.  Note
 // that young_generation->available() now knows about recently discovered immediate garbage.
-void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap* const heap,
+void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahGenerationalHeap* const heap,
                                                                  ShenandoahCollectionSet* const collection_set) {
   shenandoah_assert_generational();
   // We may find that old_evacuation_reserve and/or loaned_for_young_evacuation are not fully consumed, in which case we may
@@ -481,6 +481,16 @@ void ShenandoahGenerationalHeuristics::adjust_evacuation_budgets(ShenandoahHeap*
 
   if (add_regions_to_young > 0) {
     assert(excess_old >= add_regions_to_young * region_size_bytes, "Cannot xfer more than excess old");
+    if (heap->age_census()->is_always_tenure()) {
+      // Cap excess_old at one min-PLAB per worker so this much stays in old's promotion reserve
+      // instead of being transferred to young.
+      const size_t min_plab_total = heap->plab_min_size() * HeapWordSize * heap->workers()->max_workers();
+      if (excess_old > min_plab_total) {
+        excess_old = min_plab_total;
+        // Avoid underflowing excess_old when we subtract below.
+        add_regions_to_young = 0;
+      }
+    }
     excess_old -= add_regions_to_young * region_size_bytes;
     log_debug(gc, ergo)("Before start of evacuation, total_promotion reserve is young_advance_promoted_reserve: %zu "
                         "plus excess: old: %zu", young_advance_promoted_reserve_used, excess_old);
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
index a0e4ab78d5c..1860e3d4c0f 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGenerationalHeuristics.hpp
@@ -92,7 +92,7 @@ private:
 
   // Adjust evacuation budgets after choosing collection set.  On entry, the instance variable _regions_to_xfer
   // represents regions to be transferred to old based on decisions made in top_off_collection_set()
-  void adjust_evacuation_budgets(ShenandoahHeap* const heap,
+  void adjust_evacuation_budgets(ShenandoahGenerationalHeap* const heap,
                                  ShenandoahCollectionSet* const collection_set);
 
 protected:
diff --git a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
index e4ac576aa6f..d9f3bdee828 100644
--- a/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
+++ b/src/hotspot/share/gc/shenandoah/heuristics/shenandoahGlobalHeuristics.cpp
@@ -179,6 +179,12 @@ void ShenandoahGlobalHeuristics::choose_global_collection_set(ShenandoahCollecti
   size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + original_young_evac_reserve;
   size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
 
+  // Admit every region with any garbage so every live object gets a chance to be promoted.
+  if (heap->age_census()->is_always_tenure()) {
+    ignore_threshold = 0;
+    min_garbage = SIZE_MAX;
+  }
+
   ShenandoahGlobalCSetBudget budget(region_size_bytes,
                                     shared_reserve_regions * region_size_bytes,
                                     garbage_threshold, ignore_threshold, min_garbage,
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp
index 4989c929b32..8fa497802fd 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.cpp
@@ -34,7 +34,7 @@ ShenandoahAgeCensus::ShenandoahAgeCensus()
 }
 
 ShenandoahAgeCensus::ShenandoahAgeCensus(uint max_workers)
-  : _max_workers(max_workers)
+  : _max_workers(max_workers), _always_tenure(false)
 {
   if (ShenandoahGenerationalMinTenuringAge > ShenandoahGenerationalMaxTenuringAge) {
     vm_exit_during_initialization(
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp
index c140f445e21..5636dee3ae2 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAgeCensus.hpp
@@ -121,6 +121,8 @@ class ShenandoahAgeCensus: public CHeapObj<mtGC> {
 
   uint _max_workers;                 // Maximum number of workers for parallel tasks
 
+  bool _always_tenure;               // When true, every age is tenurable.
+
   // Mortality rate of a cohort, given its population in
   // previous and current epochs
   double mortality_rate(size_t prev_pop, size_t cur_pop);
@@ -150,9 +152,9 @@ class ShenandoahAgeCensus: public CHeapObj<mtGC> {
     return _tenuring_threshold[prev];
   }
 
-  // Override the tenuring threshold for the current epoch. This is used to
-  // cause everything to be promoted for a whitebox full gc request.
-  void set_tenuring_threshold(uint threshold) { _tenuring_threshold[_epoch] = threshold; }
+  // Set always tenure mode. Currently only used by ShenandoahTenuringOverride
+  // to force is_tenurable() to be true for every age during WB.fullGC tests.
+  void set_always_tenure(bool always_tenure) { _always_tenure = always_tenure; }
 
 #ifndef PRODUCT
   // Return the sum of size of objects of all ages recorded in the
@@ -187,11 +189,13 @@ class ShenandoahAgeCensus: public CHeapObj<mtGC> {
   // Visible for testing. Use is_tenurable for consistent tenuring comparisons.
   uint tenuring_threshold() const { return _tenuring_threshold[_epoch]; }
 
-  // Return true if this age is at or above the tenuring threshold.
+  // Return true if this age is at or above the tenuring threshold, or if always tenure is enabled.
   bool is_tenurable(uint age) const {
-    return age >= tenuring_threshold();
+    return age >= tenuring_threshold() || _always_tenure;
   }
 
+  bool is_always_tenure() const { return _always_tenure; }
+
   // Update the local age table for worker_id by size for
   // given obj_age, region_age, and region_youth
   CENSUS_NOISE(void add(uint obj_age, uint region_age, uint region_youth, size_t size, uint worker_id);)
@@ -244,24 +248,22 @@ class ShenandoahAgeCensus: public CHeapObj<mtGC> {
   void print();
 };
 
-// RAII object that temporarily overrides the tenuring threshold for the
-// duration of a scope, restoring the original value on destruction.
-// Used to force promotion of all young objects during whitebox full GCs.
+// RAII object that enables ShenandoahAgeCensus always tenure mode for the
+// duration of a scope and disables it on destruction. Used to force promotion
+// of all young objects during whitebox full GCs.
 class ShenandoahTenuringOverride : public StackObj {
   ShenandoahAgeCensus* _census;
-  uint _saved_threshold;
   bool _active;
 public:
   ShenandoahTenuringOverride(bool active, ShenandoahAgeCensus* census) :
-    _census(census), _saved_threshold(0), _active(active) {
+    _census(census), _active(active) {
     if (_active) {
-      _saved_threshold = _census->tenuring_threshold();
-      _census->set_tenuring_threshold(0);
+      _census->set_always_tenure(true);
     }
   }
   ~ShenandoahTenuringOverride() {
     if (_active) {
-      _census->set_tenuring_threshold(_saved_threshold);
+      _census->set_always_tenure(false);
     }
   }
 };
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp b/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp
index cfa79fc055e..e3267517e05 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahCollectorPolicy.cpp
@@ -205,8 +205,15 @@ void ShenandoahCollectorPolicy::print_gc_stats(outputStream* out) const {
   out->print_cr("enough regions with no live objects to skip evacuation.");
   out->cr();
 
+  size_t gc_attempts = 0;
+  for (int c = 0; c < GCCause::_last_gc_cause; c++) {
+    gc_attempts += _collection_cause_counts[c];
+  }
+
   size_t completed_gcs = _success_full_gcs + _success_degenerated_gcs + _success_concurrent_gcs + _success_old_gcs;
-  out->print_cr("%5zu Completed GCs", completed_gcs);
+  size_t cancelled_gcs = gc_attempts - completed_gcs;
+  out->print_cr("%5zu GC attempts. %zu Completed GCs (%.2f%%).",
+    gc_attempts, completed_gcs, percent_of(completed_gcs, gc_attempts));
 
   size_t explicit_requests = 0;
   size_t implicit_requests = 0;
@@ -220,7 +227,7 @@ void ShenandoahCollectorPolicy::print_gc_stats(outputStream* out) const {
         implicit_requests += cause_count;
       }
       const char* desc = GCCause::to_string(cause);
-      out->print_cr("  %5zu caused by %s (%.2f%%)", cause_count, desc, percent_of(cause_count, completed_gcs));
+      out->print_cr("  %5zu caused by %s (%.2f%%)", cause_count, desc, percent_of(cause_count, gc_attempts));
     }
   }
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
index b3a48f85114..94f3409ac41 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahGenerationalControlThread.cpp
@@ -269,7 +269,7 @@ void ShenandoahGenerationalControlThread::run_gc_cycle(const ShenandoahGCRequest
     // Cannot uncommit bitmap slices during concurrent reset
     ShenandoahNoUncommitMark forbid_region_uncommit(_heap);
 
-    // When a whitebox full GC is requested, set the tenuring threshold to zero
+    // When a whitebox full GC is requested, set the age census to always tenure
     // so that all young objects are promoted to old. This ensures that tests
     // using WB.fullGC() to promote objects to old gen will not loop forever.
     ShenandoahTenuringOverride tenuring_override(request.cause == GCCause::_wb_full_gc,
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahPLAB.cpp b/src/hotspot/share/gc/shenandoah/shenandoahPLAB.cpp
index 3d98d431b86..f139f94fc8b 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahPLAB.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahPLAB.cpp
@@ -39,7 +39,7 @@ ShenandoahPLAB::ShenandoahPLAB() :
   _promoted(0),
   _promotion_failure_count(0),
   _promotion_failure_words(0),
-  _allows_promotion(false),
+  _allows_promotion(true),
   _retries_enabled(false),
   _heap(ShenandoahGenerationalHeap::heap()) {
   _plab = new PLAB(PLAB::min_size());
diff --git a/src/hotspot/share/gc/z/zRelocate.cpp b/src/hotspot/share/gc/z/zRelocate.cpp
index 1c2a4078904..d69475e62a3 100644
--- a/src/hotspot/share/gc/z/zRelocate.cpp
+++ b/src/hotspot/share/gc/z/zRelocate.cpp
@@ -642,7 +642,7 @@ private:
     const zaddress to_addr = _forwarding->insert(from_addr, allocated_addr, &cursor);
     if (to_addr != allocated_addr) {
       // Already relocated, undo allocation
-      _allocator->undo_alloc_object(to_page, to_addr, size);
+      _allocator->undo_alloc_object(to_page, allocated_addr, size);
       increase_other_forwarded(size);
     }
 
diff --git a/src/hotspot/share/nmt/mallocSiteTable.cpp b/src/hotspot/share/nmt/mallocSiteTable.cpp
index 0b8dc8e5fa0..d0b4bbe379f 100644
--- a/src/hotspot/share/nmt/mallocSiteTable.cpp
+++ b/src/hotspot/share/nmt/mallocSiteTable.cpp
@@ -32,6 +32,7 @@
 Atomic<MallocSiteHashtableEntry*>*  MallocSiteTable::_table = nullptr;
 const NativeCallStack* MallocSiteTable::_hash_entry_allocation_stack = nullptr;
 const MallocSiteHashtableEntry* MallocSiteTable::_hash_entry_allocation_site = nullptr;
+Atomic<size_t> MallocSiteTable::_entry_count(0);
 
 /*
  * Initialize malloc site table.
@@ -82,6 +83,7 @@ bool MallocSiteTable::initialize() {
   // Add the allocation site to hashtable.
   int index = hash_to_index(entry.hash());
   _table[index].store_relaxed(const_cast<MallocSiteHashtableEntry*>(&entry));
+  _entry_count.add_then_fetch(1ul, memory_order_relaxed);
 
   return true;
 }
@@ -127,6 +129,7 @@ MallocSite* MallocSiteTable::lookup_or_add(const NativeCallStack& key, uint32_t*
 
     // swap in the head
     if (_table[index].compare_set(nullptr, entry)) {
+      _entry_count.add_then_fetch(1ul, memory_order_relaxed);
       *marker = build_marker(index, 0);
       return entry->data();
     }
@@ -152,6 +155,7 @@ MallocSite* MallocSiteTable::lookup_or_add(const NativeCallStack& key, uint32_t*
       if (head->atomic_insert(entry)) {
         pos_idx ++;
         *marker = build_marker(index, pos_idx);
+        _entry_count.add_then_fetch(1ul, memory_order_relaxed);
         return entry->data();
       }
       // contended, other thread won
@@ -236,6 +240,10 @@ void MallocSiteTable::print_tuning_statistics(outputStream* st) {
   }
 
   st->print_cr("Malloc allocation site table:");
+#ifdef ASSERT
+  // This is solely for testing
+  st->print_cr("\tExpected entry count: %zu", _entry_count.load_relaxed());
+#endif
   st->print_cr("\tTotal entries: %d", total_entries);
   st->print_cr("\tEmpty entries (no outstanding mallocs): %d (%2.2f%%)",
                   empty_entries, ((float)empty_entries * 100) / (float)total_entries);
diff --git a/src/hotspot/share/nmt/mallocSiteTable.hpp b/src/hotspot/share/nmt/mallocSiteTable.hpp
index d78e6ebba6e..22b944301d3 100644
--- a/src/hotspot/share/nmt/mallocSiteTable.hpp
+++ b/src/hotspot/share/nmt/mallocSiteTable.hpp
@@ -196,6 +196,7 @@ class MallocSiteTable : AllStatic {
  private:
   // The callsite hashtable. It has to be a static table,
   // since malloc call can come from C runtime linker.
+  static Atomic<size_t>                     _entry_count;
   static Atomic<MallocSiteHashtableEntry*>* _table;
   static const NativeCallStack*             _hash_entry_allocation_stack;
   static const MallocSiteHashtableEntry*    _hash_entry_allocation_site;
diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp
index 78bfc1ab5b8..3112bb6b169 100644
--- a/src/hotspot/share/opto/graphKit.cpp
+++ b/src/hotspot/share/opto/graphKit.cpp
@@ -3401,7 +3401,7 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
 
   // Null check; get casted pointer; set region slot 3
   Node* null_ctl = top();
-  Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null, safe_for_replace, speculative_not_null);
+  Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null, false /*safe_for_replace*/, speculative_not_null);
 
   // If not_null_obj is dead, only null-path is taken
   if (stopped()) {              // Doing instance-of on a null?
@@ -3429,7 +3429,7 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
     // a speculative type use it to perform an exact cast.
     ciKlass* spec_obj_type = obj_type->speculative_type();
     if (spec_obj_type != nullptr || data != nullptr) {
-      cast_obj = maybe_cast_profiled_receiver(not_null_obj, improved_klass_ptr_type, spec_obj_type, safe_for_replace);
+      cast_obj = maybe_cast_profiled_receiver(not_null_obj, improved_klass_ptr_type, spec_obj_type, false /*safe_for_replace*/);
       if (cast_obj != nullptr) {
         if (failure_control != nullptr) // failure is now impossible
           (*failure_control) = top();
@@ -3467,24 +3467,17 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
   region->init_req(_obj_path, control());
   phi   ->init_req(_obj_path, cast_obj);
 
-  // A merge of null or Casted-NotNull obj
-  Node* res = _gvn.transform(phi);
-
-  // Note I do NOT always 'replace_in_map(obj,result)' here.
-  //  if( tk->klass()->can_be_primary_super()  )
-    // This means that if I successfully store an Object into an array-of-String
-    // I 'forget' that the Object is really now known to be a String.  I have to
-    // do this because we don't have true union types for interfaces - if I store
-    // a Baz into an array-of-Interface and then tell the optimizer it's an
-    // Interface, I forget that it's also a Baz and cannot do Baz-like field
-    // references to it.  FIX THIS WHEN UNION TYPES APPEAR!
-  //  replace_in_map( obj, res );
-
   // Return final merged results
   set_control( _gvn.transform(region) );
   record_for_igvn(region);
 
-  return record_profiled_receiver_for_speculation(res);
+  // A merge of null or Casted-NotNull obj
+  Node* res = _gvn.transform(phi);
+  res = record_profiled_receiver_for_speculation(res);
+  if (safe_for_replace) {
+    replace_in_map(obj, res);
+  }
+  return res;
 }
 
 //------------------------------next_monitor-----------------------------------
@@ -4208,13 +4201,13 @@ Node* GraphKit::load_String_value(Node* str, bool set_ctrl) {
   const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(),
                                                      false, nullptr, 0);
   const TypePtr* value_field_type = string_type->add_offset(value_offset);
-  const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull,
+  const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::BotPTR,
                                                   TypeAry::make(TypeInt::BYTE, TypeInt::POS),
                                                   ciTypeArrayKlass::make(T_BYTE), true, 0);
   Node* p = basic_plus_adr(str, str, value_offset);
   Node* load = access_load_at(str, p, value_field_type, value_type, T_OBJECT,
                               IN_HEAP | (set_ctrl ? C2_CONTROL_DEPENDENT_LOAD : 0) | MO_UNORDERED);
-  return load;
+  return must_be_not_null(load, true);
 }
 
 Node* GraphKit::load_String_coder(Node* str, bool set_ctrl) {
diff --git a/src/hotspot/share/opto/ifnode.cpp b/src/hotspot/share/opto/ifnode.cpp
index 9f99874d76a..347d63ef57c 100644
--- a/src/hotspot/share/opto/ifnode.cpp
+++ b/src/hotspot/share/opto/ifnode.cpp
@@ -654,6 +654,12 @@ Node* IfNode::up_one_dom(Node *curr, bool linear_only) {
 
 //------------------------------filtered_int_type--------------------------------
 // Return a possibly more restrictive type for val based on condition control flow for an if
+//
+// Important: we only parse if val is on the lhs. This is a limitation, but it makes
+//            optimizations simpler. We rely on canonicalization to get us to this
+//            shape, which works well for comparisions with constants, as they are
+//            canonicalized to the rhs. This may not happen with variables, and so
+//            the optimization may not work for those cases, when val stays on the rhs.
 const TypeInt* IfNode::filtered_int_type(PhaseGVN* gvn, Node* val, Node* if_proj) {
   assert(if_proj &&
          (if_proj->Opcode() == Op_IfTrue || if_proj->Opcode() == Op_IfFalse), "expecting an if projection");
@@ -663,11 +669,14 @@ const TypeInt* IfNode::filtered_int_type(PhaseGVN* gvn, Node* val, Node* if_proj
       BoolNode* bol = iff->in(1)->as_Bool();
       if (bol->in(1) && bol->in(1)->is_Cmp()) {
         const CmpNode* cmp  = bol->in(1)->as_Cmp();
+        // Val is always the lhs of the comparision: val <test> cmp2
         if (cmp->in(1) == val) {
+          assert(cmp->Opcode() == Op_CmpI, "signed comparison required");
           const TypeInt* cmp2_t = gvn->type(cmp->in(2))->isa_int();
           if (cmp2_t != nullptr) {
             jint lo = cmp2_t->_lo;
             jint hi = cmp2_t->_hi;
+            // Negate the test if we are on the false branch.
             BoolTest::mask msk = if_proj->Opcode() == Op_IfTrue ? bol->_test._test : bol->_test.negate();
             switch (msk) {
             case BoolTest::ne: {
@@ -675,8 +684,12 @@ const TypeInt* IfNode::filtered_int_type(PhaseGVN* gvn, Node* val, Node* if_proj
               const TypeInt* val_t = gvn->type(val)->isa_int();
               if (val_t != nullptr && !val_t->singleton() && cmp2_t->is_con()) {
                 if (val_t->_lo == lo) {
+                  // Condition leading to if_proj: val != val->lo
+                  //   val in [val->lo + 1, val->hi]
                   return TypeInt::make(val_t->_lo + 1, val_t->_hi, val_t->_widen);
                 } else if (val_t->_hi == hi) {
+                  // Condition leading to if_proj: val != val->hi
+                  //   val in [val->lo, val->hi - 1]
                   return TypeInt::make(val_t->_lo, val_t->_hi - 1, val_t->_widen);
                 }
               }
@@ -684,28 +697,38 @@ const TypeInt* IfNode::filtered_int_type(PhaseGVN* gvn, Node* val, Node* if_proj
               return nullptr;
             }
             case BoolTest::eq:
+              // Condition leading to if_proj: val == cmp2
+              //   val in cmp2_t
               return cmp2_t;
             case BoolTest::lt:
-              lo = TypeInt::INT->_lo;
+              // Condition leading to if_proj: val < cmp2
+              //   val in [min_int .. max(min_int, cmp2->_hi - 1)]
+              lo = min_jint;
               if (hi != min_jint) {
                 hi = hi - 1;
               }
               break;
             case BoolTest::le:
-              lo = TypeInt::INT->_lo;
+              // Condition leading to if_proj: val <= cmp2
+              //   val in [min_int .. cmp2->_hi]
+              lo = min_jint;
               break;
             case BoolTest::gt:
+              // Condition leading to if_proj: val > cmp2
+              //   val in [min(cmp2->_lo + 1, max_int) .. max_int]
               if (lo != max_jint) {
                 lo = lo + 1;
               }
-              hi = TypeInt::INT->_hi;
+              hi = max_jint;
               break;
             case BoolTest::ge:
-              // lo unchanged
-              hi = TypeInt::INT->_hi;
+              // Condition leading to if_proj: val >= cmp2
+              //   val in [cmp2->_lo .. max_int]
+              hi = max_jint;
               break;
             default:
-              break;
+              assert(false, "impossible case");
+              return nullptr;
             }
             const TypeInt* rtn_t = TypeInt::make(lo, hi, cmp2_t->_widen);
             return rtn_t;
@@ -902,219 +925,523 @@ bool IfNode::has_only_uncommon_traps(IfProjNode* proj, IfProjNode*& success, IfP
   return false;
 }
 
-// Check that the 2 CmpI can be folded into as single CmpU and proceed with the folding
-bool IfNode::fold_compares_helper(IfProjNode* proj, IfProjNode* success, IfProjNode* fail, PhaseIterGVN* igvn) {
-  Node* this_cmp = in(1)->in(1);
-  BoolNode* this_bool = in(1)->as_Bool();
-  IfNode* dom_iff = proj->in(0)->as_If();
-  BoolNode* dom_bool = dom_iff->in(1)->as_Bool();
-  Node* lo = dom_iff->in(1)->in(1)->in(2);
-  Node* orig_lo = lo;
-  Node* hi = this_cmp->in(2);
-  Node* n = this_cmp->in(1);
-  IfProjNode* otherproj = proj->other_if_proj();
+// We are given the following code shape with two CmpI:
+//
+//               n  v1
+//               |  |
+//               cmp1
+//                |
+//       entry  bool1(test1)
+//           |  |
+//           iff1
+//           |   \
+//        middle  fail1-------------+
+//           |                      |
+//           |   n  v2              |
+//           |   |  |               |
+//       maybe   cmp2               |
+//  null-check    |                 |
+//           |  bool2(test2)        |
+//           |  |                   |
+//           iff2                   |
+//           |   \                  v
+//          succ  fail2----> go to same region
+//                           or uncommon trap
+//
+// 1. In some cases, we can prove that succ cannot be reached,
+//    and we can fold away the iff2. Example:
+//
+//      if (n < -1 && n > 1) { succ } else { fail }
+//      // 1st condition: n in [min_int .. -2]
+//      // 2nd condition: n in [2 ..  max_int]
+//      // -> no overlap -> constant fold iff2 towards fail2
+//      //
+//      // Equivalent, if we flip everything:
+//      if (n >= -1 || n <= 1) { fail } else { succ }
+//
+// 2. In other cases, we can replace the two CmpI with
+//    a single CmpU. We fold iff1 towards middle, and
+//    replace the iff2 condition with the CmpU. Example:
+//
+//      if (n >= 0 && n < 10) { succ } else { fail }
+//      // transformed to:
+//      if (n <u 10) { succ } else { fail }
+//
+//      if (n < 0 || n >= arr.length) { throw ArrayOutOfBoundsException }
+//      // transformed to:
+//      if (n >=u arr.length) { throw ArrayOutOfBoundsException }
+//
+// Note1: we assume that the CmpI nodes are canonicalized to the
+// point where n is always on the lhs. This is a limitation,
+// but as long as v1 and v2 are constants they will eventually
+// be canonicalized to the rhs. For variables, this may not always
+// happen.
+//
+// Note2: We are flexible about the IfProj nodes: middle and succ
+// could both be either IfTrue or IfFalse.
+//
+// Note3: Surrounding code has a different naming scheme!
+// In has_only_uncommon_traps, the path towards the
+// uncommon trap (e.g. failed range check) is called
+// "success", while the path that does not go to
+// the uncommon trap (e.g. in-bounds access) is called
+// "fail". I think that is counter-intuitive, so I now
+// used a different naming scheme here.
+//
+// Return true iff we could perform one of the optimizations.
+bool IfNode::fold_compares_helper(IfProjNode* middle, IfProjNode* fail2, IfProjNode* succ, PhaseIterGVN* igvn) {
+  assert(fail2->in(0) == this, "link iff2->fail2");
+  assert(succ->in(0) == this,  "link iff2->succ");
 
-  const TypeInt* lo_type = IfNode::filtered_int_type(igvn, n, otherproj);
-  const TypeInt* hi_type = IfNode::filtered_int_type(igvn, n, success);
+  IfNode* iff1 = middle->in(0)->as_If();
+  IfNode* iff2 = this;
+  BoolNode* bool1 = iff1->in(1)->as_Bool();
+  BoolNode* bool2 = iff2->in(1)->as_Bool();
+  CmpNode* cmp1 = bool1->in(1)->as_Cmp();
+  CmpNode* cmp2 = bool2->in(1)->as_Cmp();
+  assert(cmp1->Opcode() == Op_CmpI, "comparisons must be CmpI");
+  assert(cmp2->Opcode() == Op_CmpI, "comparisons must be CmpI");
 
-  BoolTest::mask lo_test = dom_bool->_test._test;
-  BoolTest::mask hi_test = this_bool->_test._test;
-  BoolTest::mask cond = hi_test;
+  IfProjNode* fail1 = middle->other_if_proj();
 
-  PhaseTransform::SpeculativeProgressGuard progress_guard(igvn);
-  // convert:
-  //
-  //          dom_bool = x {<,<=,>,>=} a
-  //                           / \
-  //     proj = {True,False}  /   \ otherproj = {False,True}
-  //                         /
-  //        this_bool = x {<,<=} b
-  //                       / \
-  //  fail = {True,False} /   \ success = {False,True}
-  //                     /
-  //
-  // (Second test guaranteed canonicalized, first one may not have
-  // been canonicalized yet)
-  //
-  // into:
-  //
-  // cond = (x - lo) {<u,<=u,>u,>=u} adjusted_lim
-  //                       / \
-  //                 fail /   \ success
-  //                     /
-  //
+  Node* v1 = cmp1->in(2);
+  Node* v2 = cmp2->in(2);
+  Node* n = cmp1->in(1);
+  assert(cmp2->in(1) == n, "n must be lhs in both CmpI");
 
-  // Figure out which of the two tests sets the upper bound and which
-  // sets the lower bound if any.
-  Node* adjusted_lim = nullptr;
-  if (lo_type != nullptr && hi_type != nullptr && hi_type->_lo > lo_type->_hi &&
-      hi_type->_hi == max_jint && lo_type->_lo == min_jint && lo_test != BoolTest::ne) {
-    assert((dom_bool->_test.is_less() && !proj->_con) ||
-           (dom_bool->_test.is_greater() && proj->_con), "incorrect test");
-
-    // this_bool = <
-    //   dom_bool = >= (proj = True) or dom_bool = < (proj = False)
-    //     x in [a, b[ on the fail (= True) projection, b > a-1 (because of hi_type->_lo > lo_type->_hi test above):
-    //     lo = a, hi = b, adjusted_lim = b-a, cond = <u
-    //   dom_bool = > (proj = True) or dom_bool = <= (proj = False)
-    //     x in ]a, b[ on the fail (= True) projection, b > a:
-    //     lo = a+1, hi = b, adjusted_lim = b-a-1, cond = <u
-    // this_bool = <=
-    //   dom_bool = >= (proj = True) or dom_bool = < (proj = False)
-    //     x in [a, b] on the fail (= True) projection, b+1 > a-1:
-    //     lo = a, hi = b, adjusted_lim = b-a+1, cond = <u
-    //     lo = a, hi = b, adjusted_lim = b-a, cond = <=u doesn't work because b = a - 1 is possible, then b-a = -1
-    //   dom_bool = > (proj = True) or dom_bool = <= (proj = False)
-    //     x in ]a, b] on the fail (= True) projection b+1 > a:
-    //     lo = a+1, hi = b, adjusted_lim = b-a, cond = <u
-    //     lo = a+1, hi = b, adjusted_lim = b-a-1, cond = <=u doesn't work because a = b is possible, then b-a-1 = -1
-
-    if (hi_test == BoolTest::lt) {
-      if (lo_test == BoolTest::gt || lo_test == BoolTest::le) {
-        lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
+  // Optimization 1: try to prove that succ is not reachable.
+  // Which values of n can pass iff1 to middle AND iff2 to succ?
+  const TypeInt* type_middle = filtered_int_type(igvn, n, middle);
+  if (type_middle != nullptr) {
+    const TypeInt* type_succ = filtered_int_type(igvn, n, succ);
+    if (type_succ != nullptr) {
+      if (type_middle->filter(type_succ) == Type::TOP) {
+        // The intersection is empty -> succ is not reachable.
+        // Fold iff2 towards fail2 (and away from succ).
+        igvn->replace_input_of(iff2, 1, igvn->intcon(fail2->_con));
+        return true; // success: succ not reachable
       }
-    } else if (hi_test == BoolTest::le) {
-      if (lo_test == BoolTest::ge || lo_test == BoolTest::lt) {
-        adjusted_lim = igvn->transform(new SubINode(hi, lo));
-        adjusted_lim = igvn->transform(new AddINode(adjusted_lim, igvn->intcon(1)));
-        cond = BoolTest::lt;
-      } else if (lo_test == BoolTest::gt || lo_test == BoolTest::le) {
-        adjusted_lim = igvn->transform(new SubINode(hi, lo));
-        lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
-        cond = BoolTest::lt;
-      } else {
-        assert(false, "unhandled lo_test: %d", lo_test);
-        return false;
-      }
-    } else {
-      assert(igvn->_worklist.member(in(1)) && in(1)->Value(igvn) != igvn->type(in(1)), "unhandled hi_test: %d", hi_test);
-      return false;
     }
-    // this test was canonicalized
-    assert(this_bool->_test.is_less() && fail->_con, "incorrect test");
-  } else if (lo_type != nullptr && hi_type != nullptr && lo_type->_lo > hi_type->_hi &&
-             lo_type->_hi == max_jint && hi_type->_lo == min_jint && lo_test != BoolTest::ne) {
+  }
 
-    // this_bool = <
-    //   dom_bool = < (proj = True) or dom_bool = >= (proj = False)
-    //     x in [b, a[ on the fail (= False) projection, a > b-1 (because of lo_type->_lo > hi_type->_hi above):
-    //     lo = b, hi = a, adjusted_lim = a-b, cond = >=u
-    //   dom_bool = <= (proj = True) or dom_bool = > (proj = False)
-    //     x in [b, a] on the fail (= False) projection, a+1 > b-1:
-    //     lo = b, hi = a, adjusted_lim = a-b+1, cond = >=u
-    //     lo = b, hi = a, adjusted_lim = a-b, cond = >u doesn't work because a = b - 1 is possible, then b-a = -1
-    // this_bool = <=
-    //   dom_bool = < (proj = True) or dom_bool = >= (proj = False)
-    //     x in ]b, a[ on the fail (= False) projection, a > b:
-    //     lo = b+1, hi = a, adjusted_lim = a-b-1, cond = >=u
-    //   dom_bool = <= (proj = True) or dom_bool = > (proj = False)
-    //     x in ]b, a] on the fail (= False) projection, a+1 > b:
-    //     lo = b+1, hi = a, adjusted_lim = a-b, cond = >=u
-    //     lo = b+1, hi = a, adjusted_lim = a-b-1, cond = >u doesn't work because a = b is possible, then b-a-1 = -1
+  // Optimization 2: try to replace the two CmpI with one CmpU
+  // We can handle the following 4 cases:
+  //     Input: two CmpI            Output: one CmpU           Assumption
+  //     --------------------       -------------------------  -------------------
+  // a)  (n >  lo && n <  hi)  ->   n - lo - 1 <u  hi - lo - 1  (assuming lo <  hi)
+  //     (n >  2  && n <  5 )       n - 3      <u  2
+  //     range: [3, 4]
+  //
+  // b)  (n >  lo && n <= hi)  ->   n - lo - 1 <u  hi - lo      (assuming lo <= hi)
+  //     (n >  2  && n <= 5 )       n - 3      <u  3
+  //     range: [3, 4, 5]
+  //
+  // c)  (n >= lo && n <  hi)  ->   n - lo     <u  hi - lo      (assuming lo <= hi)
+  //     (n >= 2  && n <  5 )       n - 2      <u  3
+  //     range: [2, 3, 4]
+  //
+  // d)  (n >= lo && n <= hi)  ->   n - lo     <=u hi - lo      (assuming lo <= hi)
+  //     (n >= 2  && n <= 5 )       n - 2      <=u 3
+  //     range: [2, 3, 4, 5]
+  //
+  // Note1: the rhs of the CmpU indicates the cardinality of the range,
+  //        allowing n to have exactly that many different values.
+  //
+  // Note2: all 4 case have an assumption: lo must be sufficiently smaller
+  //        than hi. Below, and with the use of Lemma1 from below, we will
+  //        prove that this implies that the rhs of the CmpU never
+  //        underflows or overflows, which is critical for correctness.
+  //
+  // Below, we will prove and implement each of these cases. But first,
+  // we must handle the combinations of IfTrue/IfFalse projections for
+  // middle and succ, and extract which one is the lower bound (lo) and
+  // which one the upper bound (hi).
+  //
+  // <---- lower bound -----> <----------- succ -------------> <---- upper bound ----->
+  // [min_int .. lo_type->hi] [lo_type->hi+1 .. hi_type->lo-1] [hi_type->lo .. max_int]
+  //                         ^                                ^
+  //                     n {>/>=} lo                      n {</<=} hi
+  //
+  // The trick is then to "shift down" the succ range, to create only
+  // a single transition point.
+  //
+  // <----------- succ -------------> <------------ unsigned upper bound ------------->
+  // [0           ..                ] [                      ..               max_uint]
+  //                                 ^
+  //                               CmpU
 
-    swap(lo, hi);
-    swap(lo_type, hi_type);
-    swap(lo_test, hi_test);
+  BoolTest::mask test1 = bool1->_test._test;
+  BoolTest::mask test2 = bool2->_test._test;
+  if (middle->Opcode() == Op_IfFalse) { test1 = BoolTest::negate_mask(test1); }
+  if (succ->Opcode()   == Op_IfFalse) { test2 = BoolTest::negate_mask(test2); }
 
-    assert((dom_bool->_test.is_less() && proj->_con) ||
-           (dom_bool->_test.is_greater() && !proj->_con), "incorrect test");
-
-    cond = (hi_test == BoolTest::le || hi_test == BoolTest::gt) ? BoolTest::gt : BoolTest::ge;
-
-    if (lo_test == BoolTest::lt) {
-      if (hi_test == BoolTest::lt || hi_test == BoolTest::ge) {
-        cond = BoolTest::ge;
-      } else if (hi_test == BoolTest::le || hi_test == BoolTest::gt) {
-        adjusted_lim = igvn->transform(new SubINode(hi, lo));
-        adjusted_lim = igvn->transform(new AddINode(adjusted_lim, igvn->intcon(1)));
-        cond = BoolTest::ge;
-      } else {
-        assert(false, "unhandled hi_test: %d", hi_test);
-        return false;
-      }
-    } else if (lo_test == BoolTest::le) {
-      if (hi_test == BoolTest::lt || hi_test == BoolTest::ge) {
-        lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
-        cond = BoolTest::ge;
-      } else if (hi_test == BoolTest::le || hi_test == BoolTest::gt) {
-        adjusted_lim = igvn->transform(new SubINode(hi, lo));
-        lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
-        cond = BoolTest::ge;
-      } else {
-        assert(false, "unhandled hi_test: %d", hi_test);
-        return false;
-      }
-    } else {
-      assert(igvn->_worklist.member(in(1)) && in(1)->Value(igvn) != igvn->type(in(1)), "unhandled lo_test: %d", lo_test);
-      return false;
-    }
-    // this test was canonicalized
-    assert(this_bool->_test.is_less() && !fail->_con, "incorrect test");
+  Node* lo = nullptr;
+  Node* hi = nullptr;
+  const TypeInt* lo_type = nullptr;
+  const TypeInt* hi_type = nullptr;
+  BoolTest::mask lo_test = BoolTest::illegal;
+  BoolTest::mask hi_test = BoolTest::illegal;
+  if (BoolTest::is_greater(test1) && BoolTest::is_less(test2)) {
+    lo = v1;
+    hi = v2;
+    lo_type = IfNode::filtered_int_type(igvn, n, fail1);
+    hi_type = IfNode::filtered_int_type(igvn, n, fail2);
+    lo_test = test1;
+    hi_test = test2;
+  } else if (BoolTest::is_less(test1) && BoolTest::is_greater(test2)) {
+    lo = v2;
+    hi = v1;
+    lo_type = IfNode::filtered_int_type(igvn, n, fail2);
+    hi_type = IfNode::filtered_int_type(igvn, n, fail1);
+    lo_test = test2;
+    hi_test = test1;
   } else {
-    const TypeInt* failtype = filtered_int_type(igvn, n, proj);
-    if (failtype != nullptr) {
-      const TypeInt* type2 = filtered_int_type(igvn, n, fail);
-      if (type2 != nullptr) {
-        if (failtype->filter(type2) == Type::TOP) {
-          // previous if determines the result of this if so
-          // replace Bool with constant
-          igvn->replace_input_of(this, 1, igvn->intcon(success->_con));
-          progress_guard.commit();
-          return true;
-        }
-      }
-    }
+    // Could not find upper and lower bound.
+    return false;
+  }
+  assert(BoolTest::is_greater(lo_test), "lower bound: n {>/>=} lo");
+  assert(BoolTest::is_less(hi_test),    "upper bound: n {</<=} lo");
+
+  // Check that we got lower and upper bounds as expected.
+  if (lo_type == nullptr ||
+      hi_type == nullptr ||
+      hi_type->_hi != max_jint ||
+      lo_type->_lo != min_jint) {
+    // Upper and lower bounds could not be established.
     return false;
   }
 
-  assert(lo != nullptr && hi != nullptr, "sanity");
-  Node* hook = new Node(lo); // Add a use to lo to prevent him from dying
-  // Merge the two compares into a single unsigned compare by building (CmpU (n - lo) (hi - lo))
-  Node* adjusted_val = igvn->transform(new SubINode(n,  lo));
-  if (adjusted_lim == nullptr) {
-    adjusted_lim = igvn->transform(new SubINode(hi, lo));
-  }
-  hook->destruct(igvn);
+  // -------------------------------------------------------------------
+  // In the proofs below, we need some basic Lemmas to deal with integer
+  // signed and unsigned arithmetic.
+  //
+  // Lemma1:
+  //   Let a and b be in [min_int .. max_int].
+  //   If a >=s b, then:
+  //     U(a - b) = a - b
+  //
+  //   Proof:
+  //     a >= b
+  //     -> a - b >= 0
+  //
+  //     a <= max_int
+  //     b >= min_int
+  //     -> a - b <= max_int - min_int = 2^32-1
+  //
+  //     0 <= a - b <= 2^32-1
+  //     -> cast to unsigned has no overflow
+  //     -> U(a - b) = a - b
+  //
+  // Lemma2:
+  //   Let a and b be in [min_int .. max_int].
+  //   If a <s b, then:
+  //     U(a - b) = a - b + 2^32
+  //
+  //   Proof:
+  //     a < b
+  //     -> a - b < 0
+  //
+  //     a >= min_int
+  //     b <= max_int
+  //     -> a - b >= min_int - max_int = 2^32-1
+  //
+  //     2^32-1 <= a - b < 0
+  //     -> cast to unsigned leads to exactly one overflow
+  //     -> U(a - b) = a - b + 2^32
+  //
+  // Lemma3:
+  //   Let a and b be in [min_int .. max_int].
+  //     a + 2^32 > b
+  //
+  //   Proof:
+  //     Using a >= min_int, and b <= max_int:
+  //     a + 2^32 >= min_int + 2^32
+  //               = max_int + 1
+  //              >= b       + 1
+  //              >  b
+  // -------------------------------------------------------------------
 
-  if (adjusted_val->is_top() || adjusted_lim->is_top()) {
-    return false;
+  // Handle the 4 cases.
+  // All produce this form: n - lo + x1 <cond> hi - lo + x2
+  Node* x1 = nullptr;
+  Node* x2 = nullptr;
+  BoolTest::mask cond = BoolTest::illegal;
+  if (lo_test == BoolTest::gt && hi_test == BoolTest::lt) {
+    // We perform the the (CHECK) below, which implies (LO-HI),
+    // as we will show below.
+    if (lo_type->_hi >= hi_type->_lo) {
+      return false; // (CHECK) fails, we cannot establish (LO-HI) assumption.
+    }
+    // a)  (n >  lo && n <  hi)  ->   n - lo - 1 <u  hi - lo - 1  (assuming lo <  hi)
+    //     (BEFORE)                   (AFTER)                     (LO-HI)
+    //
+    // Proof:
+    //   From IfNode::filtered_int_type, we get:
+    //     lo_type = [min_int .. lo->_hi]    for n <= lo
+    //     -> lo_type->_hi = lo->_hi
+    //     hi_type = [hi->_lo .. max_int]    for n >= lo
+    //     -> hi_type->_lo = hi->_lo
+    //   We will need the assumption (LO-HI) below, which we can
+    //   establish with the following (CHECK):
+    //     lo_type->_hi < hi_type->_lo               (CHECK)
+    //     -> lo->_hi < hi->_lo
+    //     -> lo      < hi                           (LO-HI)
+    //
+    //   Case n <= lo:
+    //     (BEFORE) is always false, show (AFTER) is always false.
+    //     Since lo < hi (LO-HI), S(lo+1) = lo+1 (no overflow):
+    //     -> lo+1 <= hi
+    //     -> n < lo+1
+    //     U(n - (lo + 1))           <  U(hi - (lo + 1))
+    //     -- Lemma2 (n < lo+1) --     -- Lemma1 (lo+1 <= hi) --
+    //       n - (lo + 1) + 2^32     <    hi - (lo + 1)
+    //       n            + 2^32     <    hi
+    //     Always false by Lemma3.
+    //
+    //   Case lo < n < hi:
+    //     (BEFORE) is always true, show (AFTER) is always true.
+    //     Since lo < hi (LO-HI), S(lo+1) = lo+1 (no overflow):
+    //     -> lo+1 <= hi
+    //     -> n >= lo+1
+    //     U(n - (lo + 1))           <  U(hi - (lo + 1))
+    //     -- Lemma1 (n >= lo+1) --   -- Lemma1 (lo+1 <= hi) --
+    //       n - (lo + 1)            <    hi - (lo + 1)
+    //       n                       <    hi
+    //     Corresponds to case assumption, so always true.
+    //
+    //   Case n >= hi:
+    //     (BEFORE) is always false, show (AFTER) is always false.
+    //     Since lo < hi (LO-HI), S(lo+1) = lo+1 (no overflow):
+    //     -> lo+1 <= hi
+    //     U(n - (lo + 1))           <  U(hi - (lo + 1))
+    //     -- Lemma1 (n >= lo+1) --    -- Lemma1 (lo+1 <= hi) --
+    //       n - (lo + 1)            <    hi - (lo + 1)
+    //       n                       <    hi
+    //     Contradicts case assumption, so always false.
+    // QED.
+    //
+    // Note: we cannot use anything more relaxed than the assumption
+    //       lo < hi: with lo=hi the rhs of the CmpU would underflow.
+    //
+    // Produce form: n - lo + x1 <cond> hi - lo + x2
+    //               n - lo -  1   <u   hi - lo - 1
+    x1 = igvn->intcon(-1);
+    x2 = igvn->intcon(-1);
+    cond = BoolTest::lt;
+  } else if (lo_test == BoolTest::gt && hi_test == BoolTest::le) {
+    // We perform the the (CHECK) below, which implies (LO-HI),
+    // as we will show below.
+    if (lo_type->_hi >= hi_type->_lo) {
+      return false; // (CHECK) fails, we cannot establish (LO-HI) assumption.
+    }
+    // b)  (n >  lo && n <= hi)  ->   n - lo - 1 <u  hi - lo      (assuming lo <= hi)
+    //     (BEFORE)                   (AFTER)                     (LO-HI)
+    //
+    // Proof:
+    //   From IfNode::filtered_int_type, we get:
+    //     lo_type = [min_int .. lo->_hi]                  for n <= lo
+    //     -> lo_type->_hi = lo->_hi
+    //     hi_type = [min(hi->_lo+1, max_int) .. max_int]  for n > hi
+    //     -> hi_type->_lo <= lo->_lo + 1
+    //   We will need the assumption (LO-HI) below, which we can
+    //   establish with the following (CHECK):
+    //        lo_type->_hi <  hi_type->_lo       (CHECK)
+    //     -> lo->_hi      <  hi->_lo + 1
+    //     -> lo           <  hi      + 1
+    //     -> lo           <= hi                 (LO-HI)
+    //
+    //   Case A: lo = hi
+    //     Let y = lo = hi
+    //     -> n > lo && n <= hi   vs     n - lo - 1 <u hi - lo
+    //     -> n > y  && n <= y    vs     n - y  - 1 <u y  - y = 0
+    //        false                      false
+    //     Hence, (BEFORE) and (AFTER) are both always false.
+    //
+    //   Case B: lo < hi
+    //     Case n <= lo:
+    //       (BEFORE) is always false, show (AFTER) is always false.
+    //       Since lo < hi (Case B), S(lo+1) = lo+1 (no overflow):
+    //       -> n < lo+1
+    //       U(n - (lo + 1))         <  U(hi - lo)
+    //       -- Lemma2 (n < lo+1) --    -- Lemma1 (lo <= hi, LO-HI) --
+    //         n - (lo + 1) + 2^32   <    hi - lo
+    //         n -       1  + 2^32   <    hi
+    //         n            + 2^32   <=   hi
+    //       Always false by Lemma3.
+    //       Note: To apply Lemma2 above, we must use (Case B), we
+    //             could not have done it with (LO-HI) alone.
+    //
+    //     Case lo < n <= hi:
+    //       (BEFORE) is always true, show (AFTER) is always true.
+    //       Since lo < hi (Case B), S(lo+1) = lo+1 (no overflow):
+    //       -> n >= lo+1
+    //       U(n - (lo + 1))          <  U(hi - lo)
+    //       -- Lemma1 (n >= lo+1) --   -- Lemma1 (lo <= hi, LO-HI) --
+    //         n - (lo + 1)           <    hi - lo
+    //         n -       1            <    hi
+    //         n                      <=   hi
+    //       Follows from case assumption, so always true.
+    //
+    //     Case n > hi:
+    //       (BEFORE) is always false, show (AFTER) is always false.
+    //       Since lo < hi (Case B), S(lo+1) = lo+1 (no overflow):
+    //       -> lo+1 <= hi
+    //       -> n > lo+1
+    //       U(n - (lo + 1))          <  U(hi - lo)
+    //       -- Lemma1 (n > lo+1) --     -- Lemma1 (lo <= hi, LO-HI) --
+    //         n - (lo + 1)           <    hi - lo
+    //         n -       1            <    hi
+    //         n                      <=   hi
+    //     Contradicts case assumption, so always false.
+    // QED.
+    //
+    // Note: we cannot use anything more relaxed than the assumption
+    //       lo <= hi: with lo=hi+1 the rhs of the CmpU would underflow.
+    //
+    // Produce form: n - lo + x1 <cond> hi - lo + x2
+    //               n - lo -  1   <u   hi - lo
+    x1 = igvn->intcon(-1);
+    x2 = igvn->intcon(0);
+    cond = BoolTest::lt;
+  } else if (lo_test == BoolTest::ge && hi_test == BoolTest::lt) {
+    // We perform the the (CHECK) below, which implies (LO-HI),
+    // as we will show below.
+    if (lo_type->_hi >= hi_type->_lo) {
+      return false; // (CHECK) fails, we cannot establish (LO-HI) assumption.
+    }
+    // c)  (n >= lo && n <  hi)  ->   n - lo     <u  hi - lo      (assuming lo <= hi)
+    //     (BEFORE)                   (AFTER)                     (LO-HI)
+    //
+    // Proof:
+    //   From IfNode::filtered_int_type, we get:
+    //     lo_type = [min_int .. max(min_int, lo->_hi - 1)]  for n < lo
+    //     -> lo_type->_hi >= lo->_hi - 1
+    //     hi_type = [b->_lo .. max_int]                     for n >= hi
+    //     -> hi_type->_lo = hi->_lo
+    //   We will need the assumption (LO-HI) below, which we can
+    //   establish with the following (CHECK):
+    //        lo_type->_hi < hi_type->_lo
+    //     -> lo->_hi - 1  <  hi->_lo
+    //     -> lo->_hi      <= hi->_lo
+    //     -> lo           <= hi                         (HI-LO)
+    //
+    //   Case n < lo:
+    //     (BEFORE) is always false, show (AFTER) is always false.
+    //     U(n - lo)              < U(hi - lo)
+    //     -- Lemma2 (n < lo) --    -- Lemma1 (lo <= hi, LO-HI) --
+    //       n - lo + 2^32        <   hi - lo
+    //       n      + 2^32        <   hi
+    //     Always false by Lemma3.
+    //
+    //   Case lo <=s n <s hi:
+    //     (BEFORE) is always true, show (AFTER) is always true.
+    //     U(n - lo)              < U(hi - lo)
+    //     -- Lemma1 (n >= lo) --   -- Lemma1 (lo <= hi, LO-HI) --
+    //       n - lo               <   hi - lo
+    //       n                    <   hi
+    //     Follows from case assumption, so always true.
+    //
+    //   Case n >=s hi:
+    //     (BEFORE) is always false, show (AFTER) is always false.
+    //     U(n - lo)              < U(hi - lo)
+    //     -- Lemma1 (n >= lo) --     -- Lemma1 (lo <= hi, LO-HI) --
+    //       n - lo               <   hi - lo
+    //       n                    <   hi
+    //     Contradicts case assumption, so always false.
+    // QED.
+    //
+    /// Note: we cannot use anything more relaxed than the assumption
+    //       lo <= hi: with lo=hi+1 the rhs of the CmpU would underflow.
+    //
+    // Produce form: n - lo + x1 <cond> hi - lo + x2
+    //               n - lo        <u   hi - lo
+    x1 = igvn->intcon(0);
+    x2 = igvn->intcon(0);
+    cond = BoolTest::lt;
+  } else {
+    assert (lo_test == BoolTest::ge && hi_test == BoolTest::le, "");
+    // We perform the the (CHECK) below, which implies (LO-HI),
+    // as we will show below.
+    jlong lo_type_hi = lo_type->_hi;
+    jlong hi_type_lo = hi_type->_lo;
+    if (lo_type_hi >= hi_type_lo - 1) {
+      return false; // (CHECK) fails, we cannot establish (LO-HI) assumption.
+    }
+    // d)  (n >= lo && n <= hi)  ->   n - lo     <=u hi - lo      (assuming lo <= hi)
+    //     (BEFORE)                   (AFTER)                     (LO-HI)
+    //
+    // Proof:
+    //   From IfNode::filtered_int_type, we get:
+    //     lo_type = [min_int .. max(min_int, lo->_hi-1)]   for n < lo
+    //     -> lo_type->_hi >= lo->_hi - 1
+    //     hi_type = [min(hi->_lo+1, max_int) .. max_int]   for n > hi
+    //     -> hi_type->_lo <= hi->_lo + 1
+    //   We will need the assumption (LO-HI) below, which we can
+    //   establish with the following (CHECK), which we must compute in
+    //   long to avoid underflow:
+    //        lo_type->_hi     <  hi_type->_lo - 1      (CHECK)
+    //     -> lo_type->_hi + 1 <= hi_type->_lo - 1
+    //     -> lo->_hi          <= hi->_lo
+    //     -> lo               <= hi                    (LO-HI)
+    //
+    //   Case n <s lo:
+    //     (BEFORE) is always false, show (AFTER) is always false.
+    //     U(n - lo)              <= U(hi - lo)
+    //     -- Lemma2 (n < lo) --     -- Lemma1 (hi >= lo, LO-HI) --
+    //       n - lo + 2^32        <=  hi - lo
+    //       n      + 2^32        <=  hi
+    //     Always false by Lemma3.
+    //
+    //   Case lo <=s n <=s hi:
+    //     (BEFORE) is always true, show (AFTER) is always true.
+    //     U(n - lo)              <= U(hi - lo)
+    //     -- Lemma1 (n >= lo) --    -- Lemma1 (hi >= lo, LO-HI) --
+    //       n - lo               <=   hi - lo
+    //       n                    <=   hi
+    //     Corresponds to case assumption, so always true.
+    //
+    //   Case n >s hi:
+    //     (BEFORE) is always false, show (AFTER) is always false.
+    //     U(n - lo)              <=  U(hi - lo)
+    //     -- Lemma1 (n > lo) --      -- Lemma1 (hi >= lo, LO-HI) --
+    //       n - lo               <=    hi - lo
+    //       n                    <=    hi
+    //       n                    <=    hi
+    //     Contradicts case assumption, so always false.
+    // QED.
+    //
+    // Note: (CHECK) is stronger in this case than in (a, b, c). We have
+    //       had multiple bugs around this case (d) in the past. For example:
+    //       - Before JDK-8135069: transform into: n - lo <=u hi - lo
+    //         leads to rhs underflow with lo=0      and hi=-1
+    //         -> we are coming back to this solution, but instead
+    //            of checking   lo_type->_hi <  hi_type->_lo
+    //            we now check: lo_type->_hi <  hi_type->_lo - 1
+    //            which implies lo <= hi and excludes this bad case.
+    //       - Before JDK-8346420: transform into: n - lo <u hi - lo + 1
+    //         leads to rhs overflow  with lo=min_int and hi=max_int
+    //
+    // Produce form: n - lo + x1 <cond> hi - lo + x2
+    //               n - lo        <=u  hi - lo
+    x1 = igvn->intcon(0);
+    x2 = igvn->intcon(0);
+    cond = BoolTest::le;
   }
 
-  if (igvn->type(adjusted_lim)->is_int()->_lo < 0 &&
-      !igvn->C->post_loop_opts_phase()) {
-    // If range check elimination applies to this comparison, it includes code to protect from overflows that may
-    // cause the main loop to be skipped entirely. Delay this transformation.
-    // Example:
-    // for (int i = 0; i < limit; i++) {
-    //   if (i < max_jint && i > min_jint) {...
-    // }
-    // Comparisons folded as:
-    // i - min_jint - 1 <u -2
-    // when RC applies, main loop limit becomes:
-    // min(limit, max(-2 + min_jint + 1, min_jint))
-    // = min(limit, min_jint)
-    // = min_jint
-    if (lo != orig_lo && lo->outcnt() == 0) {
-      igvn->remove_dead_node(lo, PhaseIterGVN::NodeOrigin::Speculative);
-    }
-    if (adjusted_val->outcnt() == 0) {
-      igvn->remove_dead_node(adjusted_val, PhaseIterGVN::NodeOrigin::Speculative);
-    }
-    if (adjusted_lim->outcnt() == 0) {
-      igvn->remove_dead_node(adjusted_lim, PhaseIterGVN::NodeOrigin::Speculative);
-    }
-    igvn->C->record_for_post_loop_opts_igvn(this);
-    return false;
-  }
-
-  Node* newcmp = igvn->transform(new CmpUNode(adjusted_val, adjusted_lim));
+  // Construct the new check: n - lo + x1 <cond> hi - lo + x2
+  Node* lhs = igvn->transform(new SubINode(n,  lo));
+  lhs = igvn->transform(new AddINode(lhs, x1));
+  Node* rhs = igvn->transform(new SubINode(hi, lo));
+  rhs = igvn->transform(new AddINode(rhs, x2));
+  Node* newcmp = igvn->transform(new CmpUNode(lhs, rhs));
+  if (succ->Opcode() == Op_IfFalse) { cond = BoolTest::negate_mask(cond); }
   Node* newbool = igvn->transform(new BoolNode(newcmp, cond));
 
-  igvn->replace_input_of(dom_iff, 1, igvn->intcon(proj->_con));
-  igvn->replace_input_of(this, 1, newbool);
+  // Fold iff1 towards middle, and replace the iff2 condition:
+  igvn->replace_input_of(iff1, 1, igvn->intcon(middle->_con));
+  igvn->replace_input_of(iff2, 1, newbool);
 
-  progress_guard.commit();
-  return true;
+  return true; // Success with CmpU
 }
 
 // Merge the branches that trap for this If and the dominating If into
diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp
index d5737166bb6..4f68ff281a0 100644
--- a/src/hotspot/share/opto/memnode.cpp
+++ b/src/hotspot/share/opto/memnode.cpp
@@ -53,6 +53,7 @@
 #include "opto/vectornode.hpp"
 #include "utilities/align.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/powerOfTwo.hpp"
 #include "utilities/vmError.hpp"
@@ -1217,7 +1218,9 @@ Node* LoadNode::can_see_stored_value_through_membars(Node* st, PhaseValues* phas
     }
   }
 
-  return can_see_stored_value(st, phase);
+  Node* res = can_see_stored_value(st, phase);
+  assert(res == nullptr || is_java_primitive(value_basic_type()) || res->bottom_type()->higher_equal(type()), "the fold is unsafe");
+  return res;
 }
 
 // If st is a store to the same location as this, return the stored value
@@ -1273,7 +1276,22 @@ Node* MemNode::can_see_stored_value(Node* st, PhaseValues* phase) const {
           return nullptr;
         }
       }
-      return st->in(MemNode::ValueIn);
+
+      // Even if we can see the store, we cannot fold the load if the store is not type safe (e.g.
+      // store a j.l.Object into an array of j.l.String) because folding makes the compiler lose the
+      // type information that the uses of this node may need. This is only necessary for pointers, we
+      // can see the stored value of a LoadS even if it is an int because LoadSNode::Ideal will do the
+      // necessary truncation.
+      // The same phenomenon is not an issue for StoreNodes because they don't use res.
+      Node* res = st->in(MemNode::ValueIn);
+      if (is_Store() || is_java_primitive(value_basic_type()) || res->bottom_type()->higher_equal(bottom_type())) {
+        return res;
+      }
+
+      // Type-unsafe stores must be due to array polymorphism
+      const TypePtr* adr_type = this->adr_type();
+      assert(adr_type == nullptr || adr_type->isa_aryptr() != nullptr, "unexpected type-unsafe store");
+      return nullptr;
     }
 
     // A load from a freshly-created object always returns zero.
diff --git a/src/hotspot/share/opto/mulnode.cpp b/src/hotspot/share/opto/mulnode.cpp
index 5e05d6a6e04..e48acd23b87 100644
--- a/src/hotspot/share/opto/mulnode.cpp
+++ b/src/hotspot/share/opto/mulnode.cpp
@@ -894,7 +894,8 @@ static Node* mask_and_replace_shift_amount(PhaseGVN* phase, Node* shift_node, ui
     }
 
     if (replace) {
-      shift_node->set_req(2, phase->intcon(masked_shift)); // Replace shift count with masked value.
+      // Replace shift count with masked value and put potential dead nodes on the worklist.
+      shift_node->set_req_X(2, phase->intcon(masked_shift), phase);
 
       // We need to notify the caller that the graph was reshaped, as Ideal needs
       // to return the root of the reshaped graph if any change was made.
diff --git a/src/hotspot/share/opto/subnode.hpp b/src/hotspot/share/opto/subnode.hpp
index 29ec25b41f8..358508248d0 100644
--- a/src/hotspot/share/opto/subnode.hpp
+++ b/src/hotspot/share/opto/subnode.hpp
@@ -334,8 +334,11 @@ struct BoolTest {
   static mask negate_mask(mask btm) { return mask(btm ^ 4); }
   static mask unsigned_mask(mask btm);
   bool is_canonical( ) const { return (_test == BoolTest::ne || _test == BoolTest::lt || _test == BoolTest::le || _test == BoolTest::overflow); }
-  bool is_less( )  const { return _test == BoolTest::lt || _test == BoolTest::le; }
-  bool is_greater( ) const { return _test == BoolTest::gt || _test == BoolTest::ge; }
+  bool is_less( )  const { return is_less(_test); }
+  bool is_greater( ) const { return is_greater(_test); }
+  static bool is_less(mask btm) { return btm == BoolTest::lt || btm == BoolTest::le; }
+  static bool is_greater(mask btm) { return btm == BoolTest::gt || btm == BoolTest::ge; }
+
   void dump_on(outputStream *st) const;
   mask merge(BoolTest other) const;
 };
diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
index 6c48ad470c9..fcdf9b09fc3 100644
--- a/src/hotspot/share/opto/type.cpp
+++ b/src/hotspot/share/opto/type.cpp
@@ -22,6 +22,7 @@
  *
  */
 
+#include "ci/ciInstanceKlass.hpp"
 #include "ci/ciMethodData.hpp"
 #include "ci/ciTypeFlow.hpp"
 #include "classfile/javaClasses.hpp"
@@ -3280,6 +3281,17 @@ bool TypeInterfaces::eq(ciInstanceKlass* k) const {
   return true;
 }
 
+// Check whether an instance of type k will satisfy this
+bool TypeInterfaces::is_subset(ciInstanceKlass* k) const {
+  assert(k->is_loaded(), "should be loaded");
+  GrowableArray<ciInstanceKlass*>* k_interfaces = k->transitive_interfaces();
+  for (int i = 0; i < _interfaces.length(); i++) {
+    if (!k_interfaces->contains(_interfaces.at(i))) {
+      return false;
+    }
+  }
+  return true;
+}
 
 uint TypeInterfaces::hash() const {
   assert(_initialized, "must be");
@@ -5958,20 +5970,16 @@ const TypeKlassPtr* TypeInstKlassPtr::try_improve() const {
   Compile* C = Compile::current();
   Dependencies* deps = C->dependencies();
   assert((deps != nullptr) == (C->method() != nullptr && C->method()->code_size() > 0), "sanity");
-  const TypeInterfaces* interfaces = _interfaces;
   if (k->is_loaded()) {
     ciInstanceKlass* ik = k->as_instance_klass();
     bool klass_is_exact = ik->is_final();
-    if (!klass_is_exact &&
-        deps != nullptr) {
+    if (!klass_is_exact && deps != nullptr) {
       ciInstanceKlass* sub = ik->unique_concrete_subklass();
-      if (sub != nullptr) {
-        if (_interfaces->eq(sub)) {
-          deps->assert_abstract_with_unique_concrete_subtype(ik, sub);
-          k = ik = sub;
-          klass_is_exact = sub->is_final();
-          return TypeKlassPtr::make(klass_is_exact ? Constant : _ptr, k, _offset);
-        }
+      if (sub != nullptr && _interfaces->is_subset(sub)) {
+        deps->assert_abstract_with_unique_concrete_subtype(ik, sub);
+        k = ik = sub;
+        klass_is_exact = sub->is_final();
+        return TypeKlassPtr::make(klass_is_exact ? Constant : _ptr, k, _offset);
       }
     }
   }
diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
index 3e029c387b1..6eaa497e86d 100644
--- a/src/hotspot/share/opto/type.hpp
+++ b/src/hotspot/share/opto/type.hpp
@@ -1137,6 +1137,7 @@ public:
   static const TypeInterfaces* make(GrowableArray<ciInstanceKlass*>* interfaces = nullptr);
   bool eq(const Type* other) const;
   bool eq(ciInstanceKlass* k) const;
+  bool is_subset(ciInstanceKlass* k) const;
   uint hash() const;
   const Type *xdual() const;
   void dump(outputStream* st) const;
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
index 1059bfe20e5..dd49d88ce96 100644
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@@ -1168,6 +1168,32 @@ static bool is_commutative_vector_operation(int opcode) {
   }
 }
 
+static bool is_associative_and_commutative_vector_operation(int opcode) {
+  switch (opcode) {
+    case Op_AddVB:
+    case Op_AddVS:
+    case Op_AddVI:
+    case Op_AddVL:
+    case Op_MulVB:
+    case Op_MulVS:
+    case Op_MulVI:
+    case Op_MulVL:
+    case Op_MaxV:
+    case Op_MinV:
+    case Op_UMinV:
+    case Op_UMaxV:
+    case Op_XorV:
+    case Op_OrV:
+    case Op_AndV:
+    case Op_AndVMask:
+    case Op_OrVMask:
+    case Op_XorVMask:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
   // Predicated vector operations are sensitive to ordering of inputs.
   // When the mask corresponding to a vector lane is false then
@@ -1299,7 +1325,7 @@ Node* VectorNode::create_reassociated_node(Node* parent, Node* child, Node* cinp
   return cloned_parent;
 }
 
-// Try to reassociate commutative vector operations using the following ideal transformation,
+// Try to reassociate associative vector operations using the following ideal transformation,
 // this will facilitate strength reducing a vector operation with all replicated inputs to
 // a scalar operation.
 //
@@ -1312,8 +1338,8 @@ Node* VectorNode::reassociate_vector_operation(PhaseGVN* phase) {
     return nullptr;
   }
 
-  // Enable re-association for commutative vector operations.
-  if (!is_commutative_vector_operation(Opcode())) {
+  // Enable re-association only for associative and commutative vector operations.
+  if (!is_associative_and_commutative_vector_operation(Opcode())) {
     return nullptr;
   }
 
@@ -2701,9 +2727,7 @@ Node* XorVNode::Ideal_XorV_VectorMaskCmp(PhaseGVN* phase, bool can_reshape) {
   Node* in1 = in(1);
   Node* in2 = in(2);
   // Transformations for predicated vectors are not supported for now.
-  if (is_predicated_vector() ||
-      in1->is_predicated_vector() ||
-      in2->is_predicated_vector()) {
+  if (is_predicated_vector()) {
     return nullptr;
   }
 
@@ -2727,6 +2751,7 @@ Node* XorVNode::Ideal_XorV_VectorMaskCmp(PhaseGVN* phase, bool can_reshape) {
   }
   if (in1->Opcode() != Op_VectorMaskCmp ||
       in1->outcnt() != 1 ||
+      in1->is_predicated_vector() ||
       !in1->as_VectorMaskCmp()->predicate_can_be_negated() ||
       !VectorNode::is_all_ones_vector(in2)) {
     return nullptr;
diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp
index b7e12fc1c92..2c389d37fec 100644
--- a/src/hotspot/share/prims/whitebox.cpp
+++ b/src/hotspot/share/prims/whitebox.cpp
@@ -747,6 +747,14 @@ WB_ENTRY(void, WB_NMTArenaMalloc(JNIEnv* env, jobject o, jlong arena, jlong size
   a->Amalloc(size_t(size));
 WB_END
 
+WB_ENTRY(jboolean, WB_isC2Included(JNIEnv* env))
+#ifdef COMPILER2
+  return true;
+#else
+  return false;
+#endif
+WB_END
+
 static jmethodID reflected_method_to_jmid(JavaThread* thread, JNIEnv* env, jobject method) {
   assert(method != nullptr, "method should not be null");
   ThreadToNativeFromVM ttn(thread);
@@ -2843,6 +2851,7 @@ static JNINativeMethod methods[] = {
   {CC"NMTNewArena",         CC"(J)J",                 (void*)&WB_NMTNewArena        },
   {CC"NMTFreeArena",        CC"(J)V",                 (void*)&WB_NMTFreeArena       },
   {CC"NMTArenaMalloc",      CC"(JJ)V",                (void*)&WB_NMTArenaMalloc     },
+  {CC"isC2Included",        CC"()Z",                  (void*)&WB_isC2Included       },
   {CC"deoptimizeFrames",   CC"(Z)I",                  (void*)&WB_DeoptimizeFrames  },
   {CC"isFrameDeoptimized", CC"(I)Z",                  (void*)&WB_IsFrameDeoptimized},
   {CC"deoptimizeAll",      CC"()V",                   (void*)&WB_DeoptimizeAll     },
diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp
index a607484d02a..dc177bdce6d 100644
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@@ -2195,11 +2195,9 @@ jint Arguments::parse_each_vm_init_arg(const JavaVMInitArgs* args, JVMFlagOrigin
       if (FLAG_SET_CMDLINE(ThreadStackSize, value) != JVMFlag::SUCCESS) {
         return JNI_EINVAL;
       }
-    } else if (match_option(option, "-Xmaxjitcodesize", &tail) ||
-               match_option(option, "-XX:ReservedCodeCacheSize=", &tail)) {
-      if (match_option(option, "-Xmaxjitcodesize", &tail)) {
-        warning("Option -Xmaxjitcodesize was deprecated in JDK 26 and will likely be removed in a future release.");
-      }
+    } else if (match_option(option, "-Xmaxjitcodesize", &tail)) {
+      warning("Ignoring option %s; support was removed in JDK 27", option->optionString);
+    } else if (match_option(option, "-XX:ReservedCodeCacheSize=", &tail)) {
       julong long_ReservedCodeCacheSize = 0;
 
       ArgsRange errcode = parse_memory_size(tail, &long_ReservedCodeCacheSize, 1);
diff --git a/src/hotspot/share/runtime/continuation.cpp b/src/hotspot/share/runtime/continuation.cpp
index 0b7e64a3ba6..26d865057e6 100644
--- a/src/hotspot/share/runtime/continuation.cpp
+++ b/src/hotspot/share/runtime/continuation.cpp
@@ -87,7 +87,8 @@ class UnmountBeginMark : public StackObj {
   }
   ~UnmountBeginMark() {
     assert(!_current->is_suspended()
-           JVMTI_ONLY(|| (_current->is_vthread_transition_disabler() && _result != freeze_ok)), "must be");
+           JVMTI_ONLY(|| (_result != freeze_ok &&
+                          (_current->is_vthread_transition_disabler() || _current->is_disable_suspend()))), "must be");
     assert(_current->is_in_vthread_transition(), "must be");
 
     if (_result != freeze_ok) {
diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp
index 08d3cde8562..9e3c629ad78 100644
--- a/src/hotspot/share/runtime/javaThread.hpp
+++ b/src/hotspot/share/runtime/javaThread.hpp
@@ -873,7 +873,7 @@ public:
   // Atomic version; invoked by a thread other than the owning thread.
   bool in_critical_atomic() { return AtomicAccess::load(&_jni_active_critical) > 0; }
 
-  bool jni_deferred_suspension() { return AtomicAccess::load(&_jni_deferred_suspension_count); }
+  bool jni_deferred_suspension() const { return AtomicAccess::load(&_jni_deferred_suspension_count); }
   inline void enter_jni_deferred_suspension();
   void exit_jni_deferred_suspension() {
     precond(Thread::current() == this);
diff --git a/src/hotspot/share/runtime/mountUnmountDisabler.cpp b/src/hotspot/share/runtime/mountUnmountDisabler.cpp
index 65a82d6c563..277a841a88c 100644
--- a/src/hotspot/share/runtime/mountUnmountDisabler.cpp
+++ b/src/hotspot/share/runtime/mountUnmountDisabler.cpp
@@ -129,7 +129,7 @@ bool MountUnmountDisabler::is_start_transition_disabled(JavaThread* thread, oop
   int base_disable_count = notify_jvmti_events() ? 1 : 0;
   return java_lang_Thread::vthread_transition_disable_count(vthread) > 0
          || global_vthread_transition_disable_count() > base_disable_count
-         JVMTI_ONLY(|| (!thread->is_vthread_transition_disabler() &&
+         JVMTI_ONLY(|| (!thread->is_vthread_transition_disabler() && !thread->is_disable_suspend() &&
                         (JvmtiVTSuspender::is_vthread_suspended(java_lang_Thread::thread_id(vthread)) || thread->is_suspended())));
 }
 
diff --git a/src/java.base/share/classes/com/sun/crypto/provider/ML_KEM.java b/src/java.base/share/classes/com/sun/crypto/provider/ML_KEM.java
index 6bd70c3cdd6..96a1eb686cc 100644
--- a/src/java.base/share/classes/com/sun/crypto/provider/ML_KEM.java
+++ b/src/java.base/share/classes/com/sun/crypto/provider/ML_KEM.java
@@ -46,14 +46,17 @@ public final class ML_KEM {
     private static final int XOF_PAD = 24;
     private static final int MONT_R_BITS = 20;
     private static final int MONT_Q = 3329;
-    private static final int MONT_R_SQUARE_MOD_Q = 152;
     private static final int MONT_Q_INV_MOD_R = 586497;
 
     // toMont((ML_KEM_N / 2)^-1 mod ML_KEM_Q) using R = 2^MONT_R_BITS
     private static final int MONT_DIM_HALF_INVERSE = 1534;
     private static final int BARRETT_MULTIPLIER = 20159;
+    private static final int BARRETT_ADDEND = 1665;
     private static final int BARRETT_SHIFT = 26;
-    private static final int[] MONT_ZETAS_FOR_NTT = new int[]{
+
+    // The values from Appendix A of the FIPS 203 standard converted to the
+    // Montgomery domain, i.e. toMont(zeta^ (bitrev_7(i)) for i = 0..127
+    private static final int[] MONT_ZETAS_FOR_NTT = new int[] {
             1188, 914, -969, 585, -551, 1263, -97, 593,
             -35, -1400, -417, -1253, 742, -281, 185, -819,
             -1226, 895, -530, 52, 25, 1000, 1249, -909,
@@ -72,7 +75,7 @@ public final class ML_KEM {
             -1599, -709, -789, -1317, -57, 1049, -584
     };
 
-    private static final short[] montZetasForVectorNttArr = new short[]{
+    private static final short[] montZetasForVectorNttArr = new short[] {
             // level 0
             -758, -758, -758, -758, -758, -758, -758, -758,
             -758, -758, -758, -758, -758, -758, -758, -758,
@@ -193,26 +196,8 @@ public final class ML_KEM {
             -108, -108, -308, -308, 996, 996, 991, 991,
             958, 958, -1460, -1460, 1522, 1522, 1628, 1628
     };
-    private static final int[] MONT_ZETAS_FOR_INVERSE_NTT = new int[]{
-            584, -1049, 57, 1317, 789, 709, 1599, -1601,
-            -990, 604, 348, 857, 612, 474, 1177, -1014,
-            -88, -982, -191, 668, 1386, 486, -1153, -534,
-            514, 137, 586, -1178, 227, 339, -907, 244,
-            1200, -833, 1394, -30, 1074, 636, -317, -1192,
-            -1259, -355, -425, -884, -977, 1430, 868, 607,
-            184, 1448, 702, 1327, 431, 497, 595, -94,
-            1649, -1497, -620, 42, -172, 1107, -222, 1003,
-            426, -845, 395, -510, 1613, 825, 1269, -290,
-            -1429, 623, -567, 1617, 36, 1007, 1440, 332,
-            -201, 1313, -1382, -744, 669, -1538, 128, -1598,
-            1401, 1183, -553, 714, 405, -1155, -445, 406,
-            -1496, -49, 82, 1369, 259, 1604, 373, 909,
-            -1249, -1000, -25, -52, 530, -895, 1226, 819,
-            -185, 281, -742, 1253, 417, 1400, 35, -593,
-            97, -1263, 551, -585, 969, -914, -1188
-    };
 
-    private static final short[] montZetasForVectorInverseNttArr = new short[]{
+    private static final short[] montZetasForVectorInverseNttArr = new short[] {
             // level 0
             -1628, -1628, -1522, -1522, 1460, 1460, -958, -958,
             -991, -991, -996, -996, 308, 308, 108, 108,
@@ -334,25 +319,28 @@ public final class ML_KEM {
             758, 758, 758, 758, 758, 758, 758, 758
     };
 
-    private static final int[] MONT_ZETAS_FOR_NTT_MULT = new int[]{
-            -1003, 1003, 222, -222, -1107, 1107, 172, -172,
-            -42, 42, 620, -620, 1497, -1497, -1649, 1649,
-            94, -94, -595, 595, -497, 497, -431, 431,
-            -1327, 1327, -702, 702, -1448, 1448, -184, 184,
-            -607, 607, -868, 868, -1430, 1430, 977, -977,
-            884, -884, 425, -425, 355, -355, 1259, -1259,
-            1192, -1192, 317, -317, -636, 636, -1074, 1074,
-            30, -30, -1394, 1394, 833, -833, -1200, 1200,
-            -244, 244, 907, -907, -339, 339, -227, 227,
-            1178, -1178, -586, 586, -137, 137, -514, 514,
-            534, -534, 1153, -1153, -486, 486, -1386, 1386,
-            -668, 668, 191, -191, 982, -982, 88, -88,
-            1014, -1014, -1177, 1177, -474, 474, -612, 612,
-            -857, 857, -348, 348, -604, 604, 990, -990,
-            1601, -1601, -1599, 1599, -709, 709, -789, 789,
-            -1317, 1317, -57, 57, 1049, -1049, -584, 584
+    // modulo MLKEM_Q positive equivalents of the values listed for
+    // the MultiplyNTTs algorithm in the FIPS 203 standard
+    private static final int[] ZETAS_FOR_NTT_MULT = new int[] {
+            17, 3312, 2761, 568, 583, 2746, 2649, 680,
+            1637, 1692, 723, 2606, 2288, 1041, 1100, 2229,
+            1409, 1920, 2662, 667, 3281, 48, 233, 3096,
+            756, 2573, 2156, 1173, 3015, 314, 3050, 279,
+            1703, 1626, 1651, 1678, 2789, 540, 1789, 1540,
+            1847, 1482, 952, 2377, 1461, 1868, 2687, 642,
+            939, 2390, 2308, 1021, 2437, 892, 2388, 941,
+            733, 2596, 2337, 992, 268, 3061, 641, 2688,
+            1584, 1745, 2298, 1031, 2037, 1292, 3220, 109,
+            375, 2954, 2549, 780, 2090, 1239, 1645, 1684,
+            1063, 2266, 319, 3010, 2773, 556, 757, 2572,
+            2099, 1230, 561, 2768, 2466, 863, 2594, 735,
+            2804, 525, 1092, 2237, 403, 2926, 1026, 2303,
+            1143, 2186, 2150, 1179, 2775, 554, 886, 2443,
+            1722, 1607, 1212, 2117, 1874, 1455, 1029, 2300,
+            2110, 1219, 2935, 394, 885, 2444, 2154, 1175
     };
-    private static final short[] montZetasForVectorNttMultArr = new short[]{
+
+    private static final short[] montZetasForVectorNttMultArr = new short[] {
             -1103, 1103, 430, -430, 555, -555, 843, -843,
             -1251, 1251, 871, -871, 1550, -1550, 105, -105,
             422, -422, 587, -587, 177, -177, -235, 235,
@@ -1174,17 +1162,20 @@ public final class ML_KEM {
     }
 
     static void implKyberNttMultJava(short[] result, short[] ntta, short[] nttb) {
-        for (int m = 0; m < ML_KEM_N / 2; m++) {
-
-            int a0 = ntta[2 * m];
-            int a1 = ntta[2 * m + 1];
-            int b0 = nttb[2 * m];
-            int b1 = nttb[2 * m + 1];
-            int r = montMul(a0, b0) +
-                    montMul(montMul(a1, b1), MONT_ZETAS_FOR_NTT_MULT[m]);
-            result[2 * m] = (short) montMul(r, MONT_R_SQUARE_MOD_Q);
-            result[2 * m + 1] = (short) montMul(
-                    (montMul(a0, b1) + montMul(a1, b0)), MONT_R_SQUARE_MOD_Q);
+        for (int m = 0; m < ML_KEM_N; m += 2) {
+            int a0 = ntta[m];
+            int a1 = ntta[m + 1];
+            int b0 = nttb[m];
+            int b1 = nttb[m + 1];
+            long r = a1 * b1;
+            r -= ((r * BARRETT_MULTIPLIER) >> BARRETT_SHIFT) * ML_KEM_Q;
+            r *= ZETAS_FOR_NTT_MULT[m >> 1];
+            r += a0 * b0;
+            result[m] = (short) (r - (((r + BARRETT_ADDEND) *
+                    BARRETT_MULTIPLIER) >> BARRETT_SHIFT) * ML_KEM_Q);
+            long r1 = a0 * b1 + a1 * b0;
+            result[m + 1] = (short) (r1 - (((r1 + BARRETT_ADDEND) *
+                    BARRETT_MULTIPLIER) >> BARRETT_SHIFT) * ML_KEM_Q);
         }
     }
 
@@ -1552,9 +1543,10 @@ public final class ML_KEM {
     }
 
     static void implKyberBarrettReduceJava(short[] poly) {
+        int tmp = 0;
         for (int m = 0; m < ML_KEM_N; m++) {
-            int tmp = ((int) poly[m] * BARRETT_MULTIPLIER) >> BARRETT_SHIFT;
-            poly[m] = (short) (poly[m] - tmp * ML_KEM_Q);
+            tmp = poly[m];
+            poly[m] = (short) (tmp - ((tmp * BARRETT_MULTIPLIER) >> BARRETT_SHIFT) * ML_KEM_Q);
         }
     }
 
diff --git a/src/java.base/share/classes/java/text/ListFormat.java b/src/java.base/share/classes/java/text/ListFormat.java
index 5cd0e9e3651..3f320bbcc9b 100644
--- a/src/java.base/share/classes/java/text/ListFormat.java
+++ b/src/java.base/share/classes/java/text/ListFormat.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,6 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
 import java.util.Objects;
-import java.util.regex.Pattern;
 import java.util.stream.IntStream;
 import sun.util.locale.provider.LocaleProviderAdapter;
 
@@ -112,6 +111,7 @@ public final class ListFormat extends Format {
     private static final int TWO = 3;
     private static final int THREE = 4;
     private static final int PATTERN_ARRAY_LENGTH = THREE + 1;
+    private static final int PLACEHOLDER_LENGTH = 3; // i.e., "{i}".length()
 
     /**
      * The locale to use for formatting list patterns.
@@ -126,14 +126,11 @@ public final class ListFormat extends Format {
      */
     private final String[] patterns;
 
-    private static final Pattern PARSE_START = Pattern.compile("(.*?)\\{0}(.*?)\\{1}");
-    private static final Pattern PARSE_MIDDLE = Pattern.compile("\\{0}(.*?)\\{1}");
-    private static final Pattern PARSE_END = Pattern.compile("\\{0}(.*?)\\{1}(.*?)");
-    private static final Pattern PARSE_TWO = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)");
-    private static final Pattern PARSE_THREE = Pattern.compile("(.*?)\\{0}(.*?)\\{1}(.*?)\\{2}(.*?)");
-    private transient Pattern startPattern;
+    private transient String startBefore;
+    private transient String startBetween;
     private transient String middleBetween;
-    private transient Pattern endPattern;
+    private transient String endBetween;
+    private transient String endAfter;
 
     private ListFormat(Locale l, String[] patterns) {
         locale = l;
@@ -149,50 +146,60 @@ public final class ListFormat extends Format {
             }
         }
 
-        // get pattern strings
-        var m = PARSE_START.matcher(patterns[START]);
-        String startBefore;
-        String startBetween;
-        if (m.matches()) {
-            startBefore = m.group(1);
-            startBetween = m.group(2);
+        // Get pattern strings. Pattern conditions from LDML are:
+        // - it contains the placeholders {0}, {1}, and {2} ("3"-pattern only) in order
+        // - "start" and "middle" patterns end with the {1} placeholder
+        // - "middle" and "end" patterns begin with the {0} placeholder
+        var pattern = patterns[START];
+        var placeholderPositions = findPlaceholders(pattern);
+        if (placeholderPositions != null &&
+                placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) {
+            startBefore = pattern.substring(0, placeholderPositions[0]);
+            startBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH,
+                    placeholderPositions[1]);
         } else {
-            throw new IllegalArgumentException("start pattern is incorrect: " + patterns[START]);
+            throw new IllegalArgumentException("start pattern is incorrect: " + pattern);
         }
-        m = PARSE_MIDDLE.matcher(patterns[MIDDLE]);
-        if (m.matches()) {
-            middleBetween = m.group(1);
+
+        pattern = patterns[MIDDLE];
+        placeholderPositions = findPlaceholders(pattern);
+        if (placeholderPositions != null &&
+                placeholderPositions[0] == 0 &&
+                placeholderPositions[1] + PLACEHOLDER_LENGTH == pattern.length()) {
+            middleBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH,
+                    placeholderPositions[1]);
         } else {
-            throw new IllegalArgumentException("middle pattern is incorrect: " + patterns[MIDDLE]);
+            throw new IllegalArgumentException("middle pattern is incorrect: " + pattern);
         }
-        m = PARSE_END.matcher(patterns[END]);
-        String endBetween;
-        String endAfter;
-        if (m.matches()) {
-            endBetween = m.group(1);
-            endAfter = m.group(2);
+
+        pattern = patterns[END];
+        placeholderPositions = findPlaceholders(pattern);
+        if (placeholderPositions != null && placeholderPositions[0] == 0) {
+            endBetween = pattern.substring(placeholderPositions[0] + PLACEHOLDER_LENGTH,
+                    placeholderPositions[1]);
+            endAfter = pattern.substring(placeholderPositions[1] + PLACEHOLDER_LENGTH);
         } else {
-            throw new IllegalArgumentException("end pattern is incorrect: " + patterns[END]);
+            throw new IllegalArgumentException("end pattern is incorrect: " + pattern);
         }
 
         // Validate two/three patterns, if given. Otherwise, generate them
-        if (!patterns[TWO].isEmpty()) {
-            if (!PARSE_TWO.matcher(patterns[TWO]).matches()) {
-                throw new IllegalArgumentException("pattern for two is incorrect: " + patterns[TWO]);
+        pattern = patterns[TWO];
+        if (!pattern.isEmpty()) {
+            if (findPlaceholders(pattern) == null) {
+                throw new IllegalArgumentException("pattern for two is incorrect: " + pattern);
             }
         } else {
             patterns[TWO] = startBefore + "{0}" + endBetween + "{1}" + endAfter;
         }
-        if (!patterns[THREE].isEmpty()) {
-            if (!PARSE_THREE.matcher(patterns[THREE]).matches()) {
-                throw new IllegalArgumentException("pattern for three is incorrect: " + patterns[THREE]);
+        pattern = patterns[THREE];
+        if (!pattern.isEmpty()) {
+            placeholderPositions = findPlaceholders(pattern);
+            if (placeholderPositions == null || placeholderPositions[2] == -1) {
+                throw new IllegalArgumentException("pattern for three is incorrect: " + pattern);
             }
         } else {
             patterns[THREE] = startBefore + "{0}" + startBetween + "{1}" + endBetween + "{2}" + endAfter;
         }
-
-        startPattern = Pattern.compile(startBefore + "(.+?)" + startBetween);
-        endPattern = Pattern.compile(endBetween + "(.+?)" + endAfter);
     }
 
     /**
@@ -455,21 +462,29 @@ public final class ListFormat extends Format {
     public Object parseObject(String source, ParsePosition parsePos) {
         Objects.requireNonNull(source);
         Objects.requireNonNull(parsePos);
-        var sm = startPattern.matcher(source);
-        var em = endPattern.matcher(source);
+        var startPattern = findPattern(source, parsePos.getIndex(), startBefore, startBetween);
+        var endPattern = findPattern(source, parsePos.getIndex(), endBetween, endAfter);
         Object parsed = null;
-        if (sm.find(parsePos.getIndex()) && em.find(parsePos.getIndex())) {
-            // get em to the last
-            var c = em.start();
-            while (em.find()) {
-                c = em.start();
+        if (startPattern != null && endPattern != null) {
+            // get endPattern to the last
+            var ep = endPattern;
+            while ((ep = findPattern(source, ep[1], endBetween, endAfter)) != null) {
+                endPattern = ep;
             }
-            em.find(c);
-            var startEnd = sm.end();
-            var endStart = em.start();
+
+            var startEnd = startPattern[1];
+            var endStart = endPattern[0];
             if (startEnd <= endStart) {
                 var mid = source.substring(startEnd, endStart);
-                var count = mid.split(middleBetween).length + 2;
+                var count = 3;
+                var mbLength = middleBetween.length();
+                if (mbLength > 0) {
+                    var midIndex = 0;
+                    while ((midIndex = mid.indexOf(middleBetween, midIndex)) >= 0) {
+                        count++;
+                        midIndex += mbLength;
+                    }
+                }
                 parsed = new MessageFormat(createMessageFormatString(count), locale).parseObject(source, parsePos);
             }
         }
@@ -565,7 +580,9 @@ public final class ListFormat extends Format {
     private String createMessageFormatString(int count) {
         var sb = new StringBuilder(256).append(patterns[START]);
         IntStream.range(2, count - 1).forEach(i -> sb.append(middleBetween).append("{").append(i).append("}"));
-        sb.append(patterns[END].replaceFirst("\\{0}", "").replaceFirst("\\{1}", "\\{" + (count - 1) + "\\}"));
+        sb.append(endBetween)
+            .append("{").append(count - 1).append("}")
+            .append(endAfter);
         return sb.toString();
     }
 
@@ -643,4 +660,51 @@ public final class ListFormat extends Format {
          */
         NARROW
     }
+
+    /**
+     * {@return the positions of the "{0}", "{1}", and "{2}" placeholders in the
+     * given pattern string, or null if the pattern is invalid}
+     *
+     * The returned array contains -1 for "{2}" if that placeholder is absent.
+     *
+     * @param pattern pattern string to parse
+     */
+    private static int[] findPlaceholders(String pattern) {
+        var positions = new int[3];
+        for (int i = 0; i < positions.length; i++) {
+            positions[i] = pattern.indexOf("{" + i + "}");
+        }
+
+        // Check the existence and order of the placeholders
+        if (positions[0] == -1 ||
+            positions[1] == -1 ||
+            positions[0] + PLACEHOLDER_LENGTH > positions[1] ||
+            positions[2] != -1 && positions[1] + PLACEHOLDER_LENGTH > positions[2]) {
+            return null;
+        }
+
+        return positions;
+    }
+
+    /**
+     * {@return the start and end positions of the first pattern found in
+     * the given {@code source} starting at {@code pos}, or null if no such
+     * pattern exists}
+     *
+     * The pattern must contain at least one character between the
+     * {@code prefix} and {@code suffix} strings. The returned end position is
+     * exclusive.
+     *
+     * @param source string to search
+     * @param pos position at which to start the search
+     * @param prefix starting string within the pattern
+     * @param suffix ending string within the pattern
+     */
+    private static int[] findPattern(String source, int pos, String prefix, String suffix) {
+        var prefixPos = source.indexOf(prefix, pos);
+        var suffixPos = prefixPos != -1 ? source.indexOf(suffix, prefixPos + prefix.length() + 1) : -1;
+
+        return prefixPos < suffixPos ?
+            new int[] {prefixPos, suffixPos + suffix.length()} : null;
+    }
 }
diff --git a/src/java.base/share/classes/java/util/zip/GZIPInputStream.java b/src/java.base/share/classes/java/util/zip/GZIPInputStream.java
index 72fb8036f08..88d08386e8c 100644
--- a/src/java.base/share/classes/java/util/zip/GZIPInputStream.java
+++ b/src/java.base/share/classes/java/util/zip/GZIPInputStream.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,17 +34,55 @@ import java.io.EOFException;
 import java.util.Objects;
 
 /**
- * This class implements a stream filter for reading compressed data in
- * the GZIP file format.
+ * This class implements a stream filter for decompressing GZIP file format data.
+ *
+ * <h2><a id="gzip_file_format">GZIP file format</a></h2>
+ * The GZIP file format is specified by RFC 1952. The format, as specified in section 2.2 of
+ * the RFC, consists of a series of "members" that appear one after another in the stream with
+ * no additional information before, between, or after them. Each member consists of a header,
+ * followed by data that is compressed using the {@code deflate} algorithm, and then a trailer.
+ * <p>
+ * This class is capable of reading a stream consisting of a series of members.
+ * <p>
+ * Reading from the stream may read and buffer bytes from the underlying stream.
+ * This includes bytes that follow a member's trailer. Whether or not any additional bytes
+ * have been read past a member's trailer, the read methods on this class yield decompressed
+ * data from at most one member; data from multiple members is not combined in
+ * a single read operation.
+ *
+ * <h2><a id="thread_safety">Thread safety</a></h2>
+ * {@code GZIPInputStream} is not safe for use by multiple concurrent threads. Any multithreaded
+ * concurrent use must be guarded by appropriate synchronization.
+ *
+ * @apiNote
+ * The {@link #close} method should be called to release resources used by this
+ * stream, either directly, or with the {@code try}-with-resources statement.
+ *
+ * @spec https://www.rfc-editor.org/info/rfc1952
+ *       RFC 1952: GZIP file format specification version 4.3
+ *
+ * @see InflaterInputStream
  *
- * @see         InflaterInputStream
- * @author      David Connelly
  * @since 1.1
- *
  */
 public class GZIPInputStream extends InflaterInputStream {
     /**
-     * CRC-32 for uncompressed data.
+     * GZIP header magic number.
+     */
+    public static final int GZIP_MAGIC = 0x8b1f;
+
+    /*
+     * File header flags.
+     */
+    private static final int FHCRC      = 2;    // Header CRC
+    private static final int FEXTRA     = 4;    // Extra field
+    private static final int FNAME      = 8;    // File name
+    private static final int FCOMMENT   = 16;   // File comment
+
+    private final byte[] tmpbuf = new byte[128];
+
+    /**
+     * CRC-32 for decompressed data.
      */
     protected CRC32 crc = new CRC32();
 
@@ -66,13 +104,15 @@ public class GZIPInputStream extends InflaterInputStream {
 
     /**
      * Creates a new input stream with the specified buffer size.
+     *
      * @param in the input stream
      * @param size the input buffer size
      *
      * @throws    ZipException if a GZIP format error has occurred or the
      *                         compression method used is unsupported
      * @throws    NullPointerException if {@code in} is null
-     * @throws    IOException if an I/O error has occurred
+     * @throws    IOException if an I/O error occurs when reading the member header
+     *                        from the underlying stream
      * @throws    IllegalArgumentException if {@code size <= 0}
      */
     public GZIPInputStream(InputStream in, int size) throws IOException {
@@ -103,25 +143,27 @@ public class GZIPInputStream extends InflaterInputStream {
 
     /**
      * Creates a new input stream with a default buffer size.
+     *
      * @param in the input stream
      *
      * @throws    ZipException if a GZIP format error has occurred or the
      *                         compression method used is unsupported
      * @throws    NullPointerException if {@code in} is null
-     * @throws    IOException if an I/O error has occurred
+     * @throws    IOException if an I/O error occurs when reading the member header
+     *                        from the underlying stream
      */
     public GZIPInputStream(InputStream in) throws IOException {
         this(in, 512);
     }
 
     /**
-     * Reads uncompressed data into an array of bytes, returning the number of inflated
+     * Reads decompressed data into an array of bytes, returning the number of decompressed
      * bytes. If {@code len} is not zero, the method will block until some input can be
      * decompressed; otherwise, no bytes are read and {@code 0} is returned.
      * <p>
      * If this method returns a nonzero integer <i>n</i> then {@code buf[off]}
-     * through {@code buf[off+}<i>n</i>{@code -1]} contain the uncompressed
-     * data.  The content of elements {@code buf[off+}<i>n</i>{@code ]} through
+     * through {@code buf[off+}<i>n</i>{@code -1]} contain the decompressed
+     * data. The content of elements {@code buf[off+}<i>n</i>{@code ]} through
      * {@code buf[off+}<i>len</i>{@code -1]} is undefined, contrary to the
      * specification of the {@link java.io.InputStream InputStream} superclass,
      * so an implementation is free to modify these elements during the inflate
@@ -131,18 +173,20 @@ public class GZIPInputStream extends InflaterInputStream {
      *
      * @param buf the buffer into which the data is read
      * @param off the start offset in the destination array {@code buf}
-     * @param len the maximum number of bytes read
-     * @return  the actual number of bytes inflated, or -1 if the end of the
-     *          compressed input stream is reached
+     * @param len the maximum number of bytes to read into {@code buf}
+     * @return  the actual number of bytes decompressed from a GZIP member, or -1 if the
+     *          end-of-stream is reached
      *
      * @throws     NullPointerException If {@code buf} is {@code null}.
      * @throws     IndexOutOfBoundsException If {@code off} is negative,
      * {@code len} is negative, or {@code len} is greater than
      * {@code buf.length - off}
      * @throws    ZipException if the compressed input data is corrupt.
-     * @throws    IOException if an I/O error has occurred.
+     * @throws    IOException if the stream is closed or an I/O error has occurred.
      *
+     * @see ##gzip_file_format GZIP file format
      */
+    @Override
     public int read(byte[] buf, int off, int len) throws IOException {
         ensureOpen();
         if (eos) {
@@ -165,6 +209,7 @@ public class GZIPInputStream extends InflaterInputStream {
      * with the stream.
      * @throws    IOException if an I/O error has occurred
      */
+    @Override
     public void close() throws IOException {
         if (!closed) {
             super.close();
@@ -173,20 +218,6 @@ public class GZIPInputStream extends InflaterInputStream {
         }
     }
 
-    /**
-     * GZIP header magic number.
-     */
-    public static final int GZIP_MAGIC = 0x8b1f;
-
-    /*
-     * File header flags.
-     */
-    private static final int FTEXT      = 1;    // Extra text
-    private static final int FHCRC      = 2;    // Header CRC
-    private static final int FEXTRA     = 4;    // Extra field
-    private static final int FNAME      = 8;    // File name
-    private static final int FCOMMENT   = 16;   // File comment
-
     /*
      * Reads GZIP member header and returns the total byte number
      * of this member header.
@@ -309,8 +340,6 @@ public class GZIPInputStream extends InflaterInputStream {
         return b;
     }
 
-    private byte[] tmpbuf = new byte[128];
-
     /*
      * Skips bytes of input data blocking until all bytes are skipped.
      * Does not assume that the input stream is capable of seeking.
diff --git a/src/java.base/share/classes/jdk/internal/util/Architecture.java b/src/java.base/share/classes/jdk/internal/util/Architecture.java
index 4f193e75597..31f328c2ead 100644
--- a/src/java.base/share/classes/jdk/internal/util/Architecture.java
+++ b/src/java.base/share/classes/jdk/internal/util/Architecture.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,15 +37,15 @@ import java.util.Locale;
  * architecture values.
  */
 public enum Architecture {
-    /*
-     * An unknown architecture not specifically named.
-     * The addrSize and ByteOrder values are those of the current architecture.
-     */
     AARCH64(64, ByteOrder.LITTLE_ENDIAN),
     ARM(32, ByteOrder.LITTLE_ENDIAN),
     LOONGARCH64(64, ByteOrder.LITTLE_ENDIAN),
     MIPSEL(32, ByteOrder.LITTLE_ENDIAN),
     MIPS64EL(64, ByteOrder.LITTLE_ENDIAN),
+    /*
+     * An unknown architecture not specifically named.
+     * The addrSize and ByteOrder values are those of the current architecture.
+     */
     OTHER(is64bit() ? 64 : 32, ByteOrder.nativeOrder()),
     PPC(32, ByteOrder.BIG_ENDIAN),
     PPC64(64, ByteOrder.BIG_ENDIAN),
diff --git a/src/java.base/share/classes/sun/security/ssl/SSLCipher.java b/src/java.base/share/classes/sun/security/ssl/SSLCipher.java
index 9d1d6dabaec..a0fc6c6e207 100644
--- a/src/java.base/share/classes/sun/security/ssl/SSLCipher.java
+++ b/src/java.base/share/classes/sun/security/ssl/SSLCipher.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 package sun.security.ssl;
 
 import sun.security.ssl.Authenticator.MAC;
+import sun.security.util.Debug;
 
 import javax.crypto.BadPaddingException;
 import javax.crypto.Cipher;
@@ -371,15 +372,15 @@ enum SSLCipher {
             ProtocolVersion[]>[] writeCipherGenerators;
 
     // Map of Ciphers listed in jdk.tls.keyLimits
-    private static final HashMap<String, Long> cipherLimits = new HashMap<>();
+    static final HashMap<String, Long> cipherLimits = new HashMap<>();
 
     // Keywords found on the jdk.tls.keyLimits security property.
     static final String[] tag = {"KEYUPDATE"};
+    static final long COUNTDOWNWARN = 20000; // Print debug warning under limit
 
     static  {
         final long max = 4611686018427387904L; // 2^62
         String prop = Security.getProperty("jdk.tls.keyLimits");
-
         if (prop != null) {
             String[] propvalue = prop.split(",");
 
@@ -617,12 +618,21 @@ enum SSLCipher {
 
         /**
          * Check if processed bytes have reached the key usage limit.
-         * If key usage limit is not be monitored, return false.
+         * If key usage limits are not be monitored, return false.
          */
         public boolean atKeyLimit() {
+            if (keyLimitCountdown < COUNTDOWNWARN && SSLLogger.isOn()) {
+                SSLLogger.fine("keyLimitCountdown: " + keyLimitCountdown);
+            }
             if (keyLimitCountdown >= 0) {
                 return false;
             }
+            if (keyLimitEnabled == false) {
+                if (SSLLogger.isOn()) {
+                    SSLLogger.fine("KeyUpdate already sent, skipping");
+                }
+                return false;
+            }
 
             // Turn off limit checking as KeyUpdate will be occurring
             keyLimitEnabled = false;
diff --git a/src/java.base/windows/native/libjava/canonicalize_md.c b/src/java.base/windows/native/libjava/canonicalize_md.c
index 8596521509c..bc17531e4a5 100644
--- a/src/java.base/windows/native/libjava/canonicalize_md.c
+++ b/src/java.base/windows/native/libjava/canonicalize_md.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -181,10 +181,12 @@ WCHAR* getFinalPath(WCHAR* path, WCHAR* finalPath, DWORD size)
                 int isUnc = (finalPath[4] == L'U' &&
                              finalPath[5] == L'N' &&
                              finalPath[6] == L'C');
+                // keep leading double backslashes in case of UNC
+                const int startIdx = (isUnc) ? 1 : 0;
                 int prefixLen = (isUnc) ? 7 : 4;
                 // the amount to copy includes terminator
                 int amountToCopy = len - prefixLen + 1;
-                wmemmove(finalPath, finalPath + prefixLen, amountToCopy);
+                wmemmove(finalPath + startIdx, finalPath + prefixLen, amountToCopy);
             }
 
             return finalPath;
diff --git a/src/java.desktop/windows/native/libawt/windows/Devices.cpp b/src/java.desktop/windows/native/libawt/windows/Devices.cpp
index 8096b1084a2..7192f65425b 100644
--- a/src/java.desktop/windows/native/libawt/windows/Devices.cpp
+++ b/src/java.desktop/windows/native/libawt/windows/Devices.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -117,47 +117,78 @@ static BOOL IsValidMonitor(HMONITOR hMon)
     return TRUE;
 }
 
-// Callback for CountMonitors below
-static BOOL WINAPI clb_fCountMonitors(HMONITOR hMon, HDC hDC, LPRECT rRect, LPARAM lpMonitorCounter)
-{
-    if (IsValidMonitor(hMon)) {
-        (*((int *)lpMonitorCounter))++;
-    }
-
-    return TRUE;
-}
-
-int WINAPI CountMonitors(void)
-{
-    int monitorCounter = 0;
-    ::EnumDisplayMonitors(NULL, NULL, clb_fCountMonitors, (LPARAM)&monitorCounter);
-    return monitorCounter;
-}
 
 // Callback for CollectMonitors below
 static BOOL WINAPI clb_fCollectMonitors(HMONITOR hMon, HDC hDC, LPRECT rRect, LPARAM lpMonitorData)
 {
     MonitorData* pMonitorData = (MonitorData *)lpMonitorData;
-    if ((pMonitorData->monitorCounter < pMonitorData->monitorLimit) && (IsValidMonitor(hMon))) {
-        pMonitorData->hmpMonitors[pMonitorData->monitorCounter] = hMon;
-        pMonitorData->monitorCounter++;
+
+    if (!IsValidMonitor(hMon)) {
+        return TRUE;
     }
 
+    if (pMonitorData->monitorCounter == pMonitorData->monitorLimit) {
+        TRY;
+
+        int newMonitorLimit = pMonitorData->monitorLimit * 2;
+        HMONITOR* newMonitors =
+            (HMONITOR*)SAFE_SIZE_ARRAY_REALLOC(
+                safe_Realloc, pMonitorData->hmpMonitors,
+                newMonitorLimit, sizeof(HMONITOR)
+            );
+        pMonitorData->hmpMonitors = newMonitors;
+        pMonitorData->monitorLimit = newMonitorLimit;
+
+        CATCH_BAD_ALLOC_RET(FALSE);
+    }
+
+    pMonitorData->hmpMonitors[pMonitorData->monitorCounter] = hMon;
+    pMonitorData->monitorCounter++;
+
     return TRUE;
 }
 
-static int WINAPI CollectMonitors(HMONITOR* hmpMonitors, int nNum)
+static HMONITOR* CollectMonitors(int* numScreens)
 {
-    if (NULL != hmpMonitors) {
-        MonitorData monitorData;
-        monitorData.monitorCounter = 0;
-        monitorData.monitorLimit = nNum;
-        monitorData.hmpMonitors = hmpMonitors;
-        ::EnumDisplayMonitors(NULL, NULL, clb_fCollectMonitors, (LPARAM)&monitorData);
-        return monitorData.monitorCounter;
-    } else {
-        return 0;
+    const int initialMonitorLimit = 4;
+
+    *numScreens = 0;
+
+    MonitorData data;
+    data.monitorCounter = 0;
+    data.monitorLimit = initialMonitorLimit;
+
+    TRY;
+
+    data.hmpMonitors = (HMONITOR*)SAFE_SIZE_ARRAY_ALLOC(safe_Malloc,
+                            initialMonitorLimit, sizeof(HMONITOR));
+    CATCH_BAD_ALLOC_RET(NULL);
+
+    if (!::EnumDisplayMonitors(NULL, NULL, clb_fCollectMonitors, (LPARAM)&data)) {
+        free(data.hmpMonitors);
+        return NULL;
     }
+
+    *numScreens = data.monitorCounter;
+    return data.hmpMonitors;
+}
+
+int WINAPI CountMonitors()
+{
+    int numScreens = 0;
+    HMONITOR* monHds = CollectMonitors(&numScreens);
+    free(monHds);
+    return numScreens;
+}
+
+static BOOL AreSameMonitorInfo(LPMONITORINFOEX oldInfo, LPMONITORINFOEX newInfo)
+{
+    if (oldInfo == NULL || newInfo == NULL) {
+        return FALSE;
+    }
+
+    return oldInfo->dwFlags == newInfo->dwFlags
+            && ::lstrcmp(oldInfo->szDevice, newInfo->szDevice) == 0;
 }
 
 BOOL WINAPI MonitorBounds(HMONITOR hmMonitor, RECT* rpBounds)
@@ -206,17 +237,26 @@ BOOL Devices::UpdateInstance(JNIEnv *env)
 {
     J2dTraceLn(J2D_TRACE_INFO, "Devices::UpdateInstance");
 
-    int numScreens = CountMonitors();
-    HMONITOR *monHds = (HMONITOR *)SAFE_SIZE_ARRAY_ALLOC(safe_Malloc,
-            numScreens, sizeof(HMONITOR));
-    if (numScreens != CollectMonitors(monHds, numScreens)) {
+    int numScreens = 0;
+    HMONITOR *monHds = CollectMonitors(&numScreens);
+    if (monHds == NULL) {
         J2dRlsTraceLn(J2D_TRACE_ERROR,
-                      "Devices::UpdateInstance: Failed to get all "\
+                      "Devices::UpdateInstance: Failed to get "\
                       "monitor handles.");
         free(monHds);
         return FALSE;
     }
 
+    if (numScreens == 0) {
+        CriticalSection::Lock l(arrayLock);
+        if (theInstance != NULL) {
+            J2dRlsTraceLn(J2D_TRACE_ERROR,
+                          "Devices::UpdateInstance: No valid monitor handles.");
+            free(monHds);
+            return FALSE;
+        }
+    }
+
     Devices *newDevices = new Devices(numScreens);
     // This way we know that the array will not be disposed of
     // at least until we replaced it with a new one.
@@ -242,18 +282,26 @@ BOOL Devices::UpdateInstance(JNIEnv *env)
         theInstance = newDevices;
 
         if (oldDevices) {
-            // Invalidate the devices with indexes out of the new set of
-            // devices. This doesn't cover all cases when the device
-            // might should be invalidated (like if it's not the last device
-            // that was removed), but it will have to do for now.
             int oldNumScreens = oldDevices->GetNumDevices();
-            int newNumScreens = theInstance->GetNumDevices();
-            J2dTraceLn(J2D_TRACE_VERBOSE, "  Invalidating removed devices");
-            for (int i = newNumScreens; i < oldNumScreens; i++) {
-                // removed device, needs to be invalidated
+            J2dTraceLn(J2D_TRACE_VERBOSE, "  Invalidating changed devices");
+            for (int i = 0; i < oldNumScreens; i++) {
+                AwtWin32GraphicsDevice *oldDevice =
+                    oldDevices->GetDevice(i, FALSE);
+                AwtWin32GraphicsDevice *newDevice =
+                    theInstance->GetDevice(i, FALSE);
+                BOOL changed = (newDevice == NULL)
+                    || !AreSameMonitorInfo(
+                            (LPMONITORINFOEX) oldDevice->GetMonitorInfo(),
+                            (LPMONITORINFOEX) newDevice->GetMonitorInfo());
+
+                if (!changed) {
+                    newDevice->TransferJavaDevice(env, oldDevice);
+                    continue;
+                }
+
                 J2dTraceLn(J2D_TRACE_WARNING,
-                           "Devices::UpdateInstance: device removed: %d", i);
-                oldDevices->GetDevice(i)->Invalidate(env);
+                           "Devices::UpdateInstance: device changed: %d", i);
+                oldDevice->Invalidate(env);
             }
             // Now that we have a new array in place, remove this (possibly the
             // last) reference to the old instance.
@@ -346,6 +394,12 @@ AwtWin32GraphicsDevice *Devices::GetDevice(int index, BOOL adjust)
     J2dTraceLn(J2D_TRACE_INFO,
                "Devices::GetDevice index=%d adjust?=%d",
                index, adjust);
+    if (numDevices <= 0) {
+        J2dTraceLn(J2D_TRACE_WARNING,
+                   "Devices::GetDevice: "\
+                   "no devices, returning NULL.");
+        return NULL;
+    }
     if (index < 0 || index >= numDevices) {
         if (!adjust) {
             J2dTraceLn(J2D_TRACE_WARNING,
diff --git a/src/java.desktop/windows/native/libawt/windows/Devices.h b/src/java.desktop/windows/native/libawt/windows/Devices.h
index 0972ef1414e..7e7a419453e 100644
--- a/src/java.desktop/windows/native/libawt/windows/Devices.h
+++ b/src/java.desktop/windows/native/libawt/windows/Devices.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,8 +47,11 @@ static BOOL                     UpdateInstance(JNIEnv *env);
        class InstanceAccess {
        public:
            INLINE   InstanceAccess() { devices = Devices::GetInstance(); }
-           INLINE  ~InstanceAccess() { devices->Release(); }
+           INLINE  ~InstanceAccess() { if (devices != NULL) devices->Release(); }
            Devices* operator->()     { return devices; }
+           INLINE AwtWin32GraphicsDevice* Device(int index, BOOL adjust = TRUE) {
+               return devices == NULL ? NULL : devices->GetDevice(index, adjust);
+           }
         private:
            Devices* devices;
            // prevent bad things like copying or getting address of
diff --git a/src/java.desktop/windows/native/libawt/windows/awt_Toolkit.cpp b/src/java.desktop/windows/native/libawt/windows/awt_Toolkit.cpp
index b447ad6889a..a94c96c58c5 100644
--- a/src/java.desktop/windows/native/libawt/windows/awt_Toolkit.cpp
+++ b/src/java.desktop/windows/native/libawt/windows/awt_Toolkit.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1996, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,7 +65,7 @@
 #include <java_awt_Toolkit.h>
 #include <java_awt_event_InputMethodEvent.h>
 
-extern void initScreens(JNIEnv *env);
+extern BOOL initScreens(JNIEnv *env);
 extern "C" void awt_dnd_initialize();
 extern "C" void awt_dnd_uninitialize();
 extern "C" void awt_clipboard_uninitialize(JNIEnv *env);
@@ -157,6 +157,78 @@ extern "C" JNIEXPORT jboolean JNICALL AWTIsHeadless() {
 }
 
 #define IDT_AWT_MOUSECHECK 0x101
+#define IDT_AWT_DISPLAYCHANGE 0x102
+
+#define AWT_DISPLAYCHANGE_RETRY_DELAY 250
+#define AWT_DISPLAYCHANGE_RETRY_LIMIT 20
+
+class DisplayChangeHandler {
+public:
+    static BOOL Handle(JNIEnv *env, HWND hWnd) {
+        // Reinitialize screens
+        if (!initScreens(env)) {
+            OnDisplayChangeFailed(hWnd);
+            return FALSE;
+        }
+
+        OnDisplayChangeSucceeded(hWnd);
+
+        // Notify Java side - call WToolkit.displayChanged()
+        jclass clazz = env->FindClass("sun/awt/windows/WToolkit");
+        DASSERT(clazz != NULL);
+        if (!clazz) throw std::bad_alloc();
+        env->CallStaticVoidMethod(clazz, AwtToolkit::displayChangeMID);
+
+        return !env->ExceptionCheck();
+    }
+
+    static void Reset(HWND hWnd) {
+        ::KillTimer(hWnd, IDT_AWT_DISPLAYCHANGE);
+        retryCount = 0;
+    }
+
+    static void ScheduleFromSessionChange(HWND hWnd) {
+        if (!recoveryPending) {
+            return;
+        }
+        Reset(hWnd);
+        Schedule(hWnd);
+    }
+
+private:
+    static void OnDisplayChangeFailed(HWND hWnd) {
+        recoveryPending = TRUE;
+        Schedule(hWnd);
+    }
+
+    static void OnDisplayChangeSucceeded(HWND hWnd) {
+        recoveryPending = FALSE;
+        Reset(hWnd);
+    }
+
+    static void Schedule(HWND hWnd) {
+        if (retryCount >= AWT_DISPLAYCHANGE_RETRY_LIMIT) {
+            Reset(hWnd);
+            J2dRlsTraceLn(J2D_TRACE_ERROR,
+                          "AwtToolkit: Display change retry limit exceeded.");
+            return;
+        }
+
+        retryCount++;
+        if (::SetTimer(hWnd, IDT_AWT_DISPLAYCHANGE,
+                       AWT_DISPLAYCHANGE_RETRY_DELAY, NULL) == 0) {
+            Reset(hWnd);
+            J2dRlsTraceLn(J2D_TRACE_ERROR,
+                          "AwtToolkit: Failed to schedule display change retry.");
+        }
+    }
+
+    static int retryCount;
+    static BOOL recoveryPending;
+};
+
+int DisplayChangeHandler::retryCount = 0;
+BOOL DisplayChangeHandler::recoveryPending = FALSE;
 
 static LPCTSTR szAwtToolkitClassName = TEXT("SunAwtToolkit");
 
@@ -1004,6 +1076,14 @@ LRESULT CALLBACK AwtToolkit::WndProc(HWND hWnd, UINT message,
       }
 
       case WM_TIMER: {
+          if (wParam == IDT_AWT_DISPLAYCHANGE) {
+              if (DisplayChangeHandler::Handle(env, hWnd)) {
+                  GetInstance().m_displayChanged = TRUE;
+                  ::PostMessage(HWND_BROADCAST, WM_PALETTEISCHANGING, NULL, NULL);
+              }
+              return 0;
+          }
+
           // 6479820. Should check if a window is in manual resizing process: skip
           // sending any MouseExit/Enter events while inside resize-loop.
           // Note that window being in manual moving process could still
@@ -1245,18 +1325,11 @@ LRESULT CALLBACK AwtToolkit::WndProc(HWND hWnd, UINT message,
           return tk.m_inputMethodData;
       }
       case WM_DISPLAYCHANGE: {
-          // Reinitialize screens
-          initScreens(env);
-
-          // Notify Java side - call WToolkit.displayChanged()
-          jclass clazz = env->FindClass("sun/awt/windows/WToolkit");
-          DASSERT(clazz != NULL);
-          if (!clazz) throw std::bad_alloc();
-          env->CallStaticVoidMethod(clazz, AwtToolkit::displayChangeMID);
-
-          GetInstance().m_displayChanged = TRUE;
-
-          ::PostMessage(HWND_BROADCAST, WM_PALETTEISCHANGING, NULL, NULL);
+          DisplayChangeHandler::Reset(hWnd);
+          if (DisplayChangeHandler::Handle(env, hWnd)) {
+              GetInstance().m_displayChanged = TRUE;
+              ::PostMessage(HWND_BROADCAST, WM_PALETTEISCHANGING, NULL, NULL);
+          }
           break;
       }
       /* Session management */
@@ -1341,6 +1414,9 @@ LRESULT CALLBACK AwtToolkit::WndProc(HWND hWnd, UINT message,
                                               activate
                                               ? JNI_TRUE
                                               : JNI_FALSE, reason);
+              if (activate) {
+                  DisplayChangeHandler::ScheduleFromSessionChange(hWnd);
+              }
           }
           break;
       }
diff --git a/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.cpp b/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.cpp
index 785f1301516..a43bd9c6bef 100644
--- a/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.cpp
+++ b/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -612,13 +612,52 @@ void AwtWin32GraphicsDevice::Release()
 }
 
 /**
- * Links this native object with its java Win32GraphicsDevice.
- * Need this link because the colorModel of the java device
- * may be updated from native code.
+ * Links this native object with its java Win32GraphicsDevice peer.
+ *
+ * The link is needed for upcalls to the java peer, such as invalidate()
+ * and dynamic color model updates.
+ *
+ * Passing NULL intentionally clears the link.
+ * Clearing it here prevents stale peer links and releases
+ * the old JNI weak global ref.
+ *
+ * During display changes, the native device array is recreated,
+ * changed or removed devices invalidate their java peers.
+ * Unchanged monitors transfer the existing weak ref to
+ * the new native device by TransferJavaDevice().
  */
 void AwtWin32GraphicsDevice::SetJavaDevice(JNIEnv *env, jobject objPtr)
 {
-    javaDevice = env->NewWeakGlobalRef(objPtr);
+    jobject newJavaDevice = NULL;
+    if (objPtr != NULL) {
+        newJavaDevice = env->NewWeakGlobalRef(objPtr);
+        if (newJavaDevice == NULL) {
+            return;
+        }
+    }
+
+    if (javaDevice != NULL) {
+        env->DeleteWeakGlobalRef(javaDevice);
+    }
+    javaDevice = newJavaDevice;
+}
+
+/**
+ * Transfers the java Win32GraphicsDevice's link from a native device that is
+ * being replaced by a new native device for the same monitor.
+ */
+void AwtWin32GraphicsDevice::TransferJavaDevice(JNIEnv *env,
+                                                AwtWin32GraphicsDevice *device)
+{
+    if (device == NULL || device == this || device->javaDevice == NULL) {
+        return;
+    }
+
+    if (javaDevice != NULL) {
+        env->DeleteWeakGlobalRef(javaDevice);
+    }
+    javaDevice = device->javaDevice;
+    device->javaDevice = NULL;
 }
 
 /**
@@ -1398,7 +1437,10 @@ JNIEXPORT void JNICALL
     (JNIEnv *env, jobject thisPtr, jint screen)
 {
     Devices::InstanceAccess devices;
-    devices->GetDevice(screen)->SetJavaDevice(env, thisPtr);
+    AwtWin32GraphicsDevice *device = devices.Device(screen, FALSE);
+    if (device != NULL) {
+        device->SetJavaDevice(env, thisPtr);
+    }
 }
 
 /*
@@ -1411,9 +1453,8 @@ JNIEXPORT void JNICALL
     (JNIEnv *env, jobject thisPtr, jint screen, jfloat scaleX, jfloat scaleY)
 {
     Devices::InstanceAccess devices;
-    AwtWin32GraphicsDevice *device = devices->GetDevice(screen);
-
-    if (device != NULL ) {
+    AwtWin32GraphicsDevice *device = devices.Device(screen, FALSE);
+    if (device != NULL) {
         device->DisableScaleAutoRefresh();
         device->SetScale(scaleX, scaleY);
     }
@@ -1429,8 +1470,8 @@ JNIEXPORT jfloat JNICALL
     (JNIEnv *env, jobject thisPtr, jint screen)
 {
     Devices::InstanceAccess devices;
-    AwtWin32GraphicsDevice *device = devices->GetDevice(screen);
-    return (device == NULL) ? 1 : device->GetScaleX();
+    AwtWin32GraphicsDevice *device = devices.Device(screen, FALSE);
+    return device == NULL ? 1 : device->GetScaleX();
 }
 
 /*
@@ -1443,8 +1484,8 @@ JNIEXPORT jfloat JNICALL
     (JNIEnv *env, jobject thisPtr, jint screen)
 {
     Devices::InstanceAccess devices;
-    AwtWin32GraphicsDevice *device = devices->GetDevice(screen);
-    return (device == NULL) ? 1 : device->GetScaleY();
+    AwtWin32GraphicsDevice *device = devices.Device(screen, FALSE);
+    return device == NULL ? 1 : device->GetScaleY();
 }
 
 /*
@@ -1457,8 +1498,7 @@ Java_sun_awt_Win32GraphicsDevice_initNativeScale
 (JNIEnv *env, jobject thisPtr, jint screen)
 {
     Devices::InstanceAccess devices;
-    AwtWin32GraphicsDevice *device = devices->GetDevice(screen);
-
+    AwtWin32GraphicsDevice *device = devices.Device(screen, FALSE);
     if (device != NULL) {
         device->InitDesktopScales();
     }
diff --git a/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.h b/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.h
index 55f6c1623a8..b3619dcc545 100644
--- a/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.h
+++ b/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsDevice.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,6 +54,8 @@ public:
     unsigned int            *GetSystemPaletteEntries();
     unsigned char           *GetSystemInverseLUT();
     void                    SetJavaDevice(JNIEnv *env, jobject objPtr);
+    void                    TransferJavaDevice(JNIEnv *env,
+                                               AwtWin32GraphicsDevice *device);
     HPALETTE                SelectPalette(HDC hDC);
     void                    RealizePalette(HDC hDC);
     HPALETTE                GetPalette();
diff --git a/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsEnv.cpp b/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsEnv.cpp
index 50591434d4c..cc53f4a3322 100644
--- a/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsEnv.cpp
+++ b/src/java.desktop/windows/native/libawt/windows/awt_Win32GraphicsEnv.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,10 +35,12 @@
 
 BOOL DWMIsCompositionEnabled();
 
-void initScreens(JNIEnv *env) {
+BOOL initScreens(JNIEnv *env) {
     if (!Devices::UpdateInstance(env)) {
-        JNU_ThrowInternalError(env, "Could not update the devices array.");
+        J2dRlsTraceLn(J2D_TRACE_ERROR, "initScreens: Could not update the devices array.");
+        return FALSE;
     }
+    return TRUE;
 }
 
 /**
@@ -144,7 +146,9 @@ Java_sun_awt_Win32GraphicsEnvironment_initDisplay(JNIEnv *env,
 
     DWMIsCompositionEnabled();
 
-    initScreens(env);
+    if (!initScreens(env)) {
+        JNU_ThrowInternalError(env, "Could not update the devices array.");
+    }
 }
 
 /*
diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/DeferredAttr.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/DeferredAttr.java
index 89422ac4671..c9c2375fab9 100644
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/DeferredAttr.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/DeferredAttr.java
@@ -25,6 +25,7 @@
 
 package com.sun.tools.javac.comp;
 
+import com.sun.source.tree.AnnotatedTypeTree;
 import com.sun.source.tree.LambdaExpressionTree.BodyKind;
 import com.sun.source.tree.NewClassTree;
 import com.sun.tools.javac.code.*;
@@ -62,6 +63,7 @@ import java.util.function.Predicate;
 import java.util.function.Supplier;
 
 import com.sun.source.tree.MemberReferenceTree;
+import com.sun.source.tree.ModifiersTree;
 import com.sun.tools.javac.code.Type;
 import com.sun.tools.javac.tree.JCTree.JCMemberReference.OverloadKind;
 
@@ -191,6 +193,19 @@ public class DeferredAttr extends JCTree.Visitor {
                     result.pos = t.pos;
                     return result;
                 }
+
+                @Override
+                public JCTree visitAnnotatedType(AnnotatedTypeTree node, Void p) {
+                    return copy(((JCAnnotatedType) node).underlyingType, p);
+                }
+
+                @Override
+                public JCTree visitModifiers(ModifiersTree node, Void p) {
+                    JCModifiers mods = (JCModifiers) super.visitModifiers(node, p);
+
+                    mods.annotations = List.nil();
+                    return mods;
+                }
             };
         deferredCopier = new TypeMapping<Void> () {
                 @Override
diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/TransTypes.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/TransTypes.java
index 1229939c0bf..86bdc5ac7b7 100644
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/TransTypes.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/TransTypes.java
@@ -248,9 +248,6 @@ public class TransTypes extends TreeTranslator {
                                                meth.name,
                                                bridgeType,
                                                origin);
-        /* once JDK-6996415 is solved it should be checked if this approach can
-         * be applied to method addOverrideBridgesIfNeeded
-         */
         bridge.params = createBridgeParams(impl, bridge, bridgeType);
         bridge.setAttributes(impl);
 
diff --git a/src/jdk.jdeps/share/man/javap.md b/src/jdk.jdeps/share/man/javap.md
index d09ac57f196..0e11176aa10 100644
--- a/src/jdk.jdeps/share/man/javap.md
+++ b/src/jdk.jdeps/share/man/javap.md
@@ -1,5 +1,5 @@
 ---
-# Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1994, 2026, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -54,7 +54,8 @@ javap - disassemble one or more class files
 
 The `javap` command disassembles one or more class files. The output depends on
 the options used. When no options are used, the `javap` command prints the
-protected and public fields, and methods of the classes passed to it.
+package private, protected, and public fields and methods declared in the classes passed
+to it.
 
 The `javap` command isn't multirelease JAR aware. Using the class path form of
 the command results in viewing the base entry in all JAR files, multirelease or
diff --git a/test/hotspot/jtreg/compiler/igvn/TestShiftWorklist.java b/test/hotspot/jtreg/compiler/igvn/TestShiftWorklist.java
new file mode 100644
index 00000000000..64c5a5fe658
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/igvn/TestShiftWorklist.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8385408
+ * @summary Test that inputs to removed shifts are put on the worklist and cleaned up
+ * @library /test/lib /
+ * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:-UseOnStackReplacement
+ *                   -XX:CompileCommand=compileonly,${test.main.class}::test
+ *                   ${test.main.class}
+ * @run main ${test.main.class}
+ */
+
+package compiler.igvn;
+
+import jdk.test.lib.Asserts;
+
+public class TestShiftWorklist {
+    int N = 400;
+    int iArr[] = new int[N];
+
+    public static void main(String[] args) {
+        TestShiftWorklist t = new TestShiftWorklist();
+        for (int i = 0; i < 2_000; i++) {
+            int result = t.test();
+            Asserts.assertEQ(result, 0);
+        }
+    }
+
+    private int test() {
+        long[] lArr = new long[N];
+        long l = 1957; // l % 32 = 5
+        int n = 1;
+        for (int i = 1; i < 30; ++i) {
+            for (double j = 1; j < 12; j++) {
+                iArr[i] = 3;
+                for (long k = 1; k < 2; k++) {
+                    // C2 is able to prove that the effecive shift value for the int n is always 5.
+                    n >>= l;
+                }
+                l = 907436423360901L; // l % 32 = 5
+            }
+        }
+        return (int) checkSum(lArr);
+    }
+
+    private static long checkSum(long[] a) {
+        long sum = 0;
+        for (int j = 0; j < a.length; j++) {
+            sum += (a[j] / (j + 1) + a[j] % (j + 1));
+        }
+        return sum;
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/parsing/TestAastoreTypeSafe.java b/test/hotspot/jtreg/compiler/parsing/TestAastoreTypeSafe.java
new file mode 100644
index 00000000000..f33c0fc0b87
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/parsing/TestAastoreTypeSafe.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package compiler.parsing;
+
+import jdk.test.lib.Asserts;
+
+/*
+ * @test
+ * @bug 8382936
+ * @library /test/lib
+ * @summary Test that aastore generates a type safe graph
+ * @run main ${test.main.class}
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:-TieredCompilation
+ *                   -XX:CompileOnly=${test.main.class}::test* -XX:-MonomorphicArrayCheck
+ *                   -XX:+AlwaysIncrementalInline -XX:+UseCompressedOops ${test.main.class}
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:-TieredCompilation
+ *                   -XX:CompileOnly=${test.main.class}::test* -XX:-MonomorphicArrayCheck
+ *                   -XX:+AlwaysIncrementalInline -XX:-UseCompressedOops ${test.main.class}
+ */
+public class TestAastoreTypeSafe {
+    private static final class A {
+        int v;
+    }
+
+    private static class B {
+    }
+
+    private static final class C extends B {
+        int v;
+    }
+
+    public static void main(String[] args) {
+        {
+            A[] array = new A[1];
+            A element = new A();
+            for (int i = 0; i < 10; i++) {
+                test1(true, array, element, element);
+                Asserts.assertEQ(0, element.v);
+            }
+        }
+
+        {
+            C[] array = new C[1];
+            C element = new C();
+            for (int i = 0; i < 10; i++) {
+                test2(true, array, element, element);
+                Asserts.assertEQ(0, element.v);
+            }
+        }
+    }
+
+    // The array load will be folded, but the element type is lost in the graph. This leads to the
+    // scheduler missing the dependency between the load from the return value of aaload and the
+    // store into alias.v. The load will then be put late just before the second store to alias.v,
+    // which is an invalid schedule.
+    private static void test1(boolean b, A[] array, Object element, A alias) {
+        aastoreA(array, element);
+        int v = aaloadA(array).v;
+        alias.v = 1;
+        if (b) {
+            alias.v = v;
+        }
+    }
+
+    private static void aastoreA(A[] array, Object element) {
+        // This forces the compiler to try storing an Object into an A[]. Otherwise, doing
+        // array[0] = (A)element will make it so that the stored value being an A already.
+        ((Object[]) array)[0] = element;
+    }
+
+    private static A aaloadA(A[] array) {
+        return array[0];
+    }
+
+    // Similar to above, but the store cannot be truly type safe because the exact array type is
+    // unknown
+    private static void test2(boolean b, B[] array, Object element, C alias) {
+        aastoreB(array, element);
+        int v = aaloadC(array).v;
+        alias.v = 1;
+        if (b) {
+            alias.v = v;
+        }
+    }
+
+    private static void aastoreB(B[] array, Object element) {
+        ((Object[]) array)[0] = element;
+    }
+
+    private static C aaloadC(B[] array) {
+        return ((C[]) array)[0];
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/rangechecks/TestFoldCompares.java b/test/hotspot/jtreg/compiler/rangechecks/TestFoldCompares.java
new file mode 100644
index 00000000000..bec3e442403
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/rangechecks/TestFoldCompares.java
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test id=vanilla
+ * @bug 8346420
+ * @summary Test logic in IfNode::fold_compares, which folds 2 signed comparisons
+ *          into a single comparison.
+ * @library /test/lib /
+ * @run main ${test.main.class}
+ */
+
+/*
+ * @test id=Xcomp
+ * @bug 8346420
+ * @library /test/lib /
+ * @run main ${test.main.class} -Xcomp -XX:-TieredCompilation -XX:CompileCommand=compileonly,${test.main.class}::test*
+ */
+
+package compiler.rangechecks;
+
+import compiler.lib.ir_framework.*;
+
+/**
+ * This test here is here to cover some basic cases of IfNode::fold_compares. It also contains the
+ * reproducers for JDK-8346420. We don't do any result verification, other than that we should never
+ * hit an Exception. For a test with result verification, see TestFoldComparesFuzzer.java
+ */
+public class TestFoldCompares {
+    public static boolean FLAG_FALSE = false;
+
+    public static void main(String[] args) {
+        TestFramework framework = new TestFramework();
+        framework.addFlags(args);
+        framework.start();
+    }
+
+    // ------------------------- Failing cases for JDK-8346420 ------------------------------
+
+    @Test
+    @Arguments(values = {Argument.NUMBER_42})
+    // Reported overflow case with wrong result in JDK-8346420
+    public static void test_Case3a_LTLE_overflow(int i) {
+        int minimum, maximum;
+        if (FLAG_FALSE) {
+            minimum = 0;
+            maximum = 1;
+        } else {
+            // Always goes to else-path
+            minimum = Integer.MIN_VALUE;
+            maximum = Integer.MAX_VALUE;
+        }
+        // i  < INT_MIN    || i  > MAX_INT
+        // 42 < INT_MIN    || 42 > MAX_INT
+        //    false           false
+        // => false
+        //
+        // C2 transforms this into:
+        // i  - minimum >=u (maximum - minimum) + 1
+        // 42 - INT_MIN >=u (INT_MAX - INT_MIN) + 1
+        // 42 + MIN_INT >=u -1                  + 1
+        //                  ------ overflow -------
+        // 42 + MIN_INT >=u 0
+        // => true
+        if (i < minimum || i > maximum) {
+            throw new RuntimeException("i can never be outside [min_int, max_int]");
+        }
+    }
+
+    @Test
+    @Arguments(values = {Argument.NUMBER_42})
+    // Same as  test_Case3a_LTLE_overflow, just with swapped conditions (JDK-8346420).
+    public static void test_Case3b_LTLE_overflow(int i) {
+        int minimum, maximum;
+        if (FLAG_FALSE) {
+            minimum = 0;
+            maximum = 1;
+        } else {
+            // Always goes to else-path
+            minimum = Integer.MIN_VALUE;
+            maximum = Integer.MAX_VALUE;
+        }
+        if (i > maximum || i < minimum) {
+            throw new RuntimeException("i can never be outside [min_int, max_int]");
+        }
+    }
+
+    @Test
+    @Arguments(values = {Argument.NUMBER_42})
+    //  22  ConI  === 0  [[ 25 37 ]]  #int:0
+    //  35  ConI  === 0  [[ 37 ]]  #int:minint
+    //  33  ConI  === 0  [[ 38 81 ]]  #int:1
+    //  37  Phi  === 34 35 22  [[ 42 80 81 84 ]]  #int:minint..0, 0u..maxint+1
+    //  81  AddI  === _ 37 33  [[ 82 ]]
+    //  82  Node  === 81  [[ ]]                      <----- hook
+    //
+    // We hit this assert, found while working on JDK-8346420:
+    // "fatal error: no reachable node should have no use"
+    //
+    // Because we compute:
+    //   lo = lo + 1
+    //   hook = Node(lo)
+    //   adjusted_val = i - lo
+    //   -> gvn transformed to: (i - lo) + -1
+    //   -> the "lo = lo + 1" AddI now is only used by the hook,
+    //      but once the hook is destroyed, it has no use any more,
+    //      and we hit the assert.
+    public static void test_Case4a_LELE_assert(int i) {
+        int minimum, maximum;
+        if (FLAG_FALSE) {
+            minimum = 0;
+            maximum = 1;
+        } else {
+            minimum = Integer.MIN_VALUE;
+            maximum = Integer.MAX_VALUE;
+        }
+        if (i <= minimum || i > maximum) {
+            throw new RuntimeException("should never be reached");
+        }
+    }
+
+    // ------------------- IR tests to check that optimization was performed ------------------------
+
+    // The following tests with constant bounds are expected to fold to a single CmpU.
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_lohi_ltle(int i) {
+        if (i < -100_000 || i > 100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_lohi_lele(int i) {
+        if (i <= -100_000 || i > 100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_lohi_ltlt(int i) {
+        if (i < -100_000 || i >= 100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_lohi_lelt(int i) {
+        if (i <= -100_000 || i >= 100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_hilo_ltle(int i) {
+        if (i >= 100_000 || i <= -100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_hilo_lele(int i) {
+        if (i > 100_000 || i <= -100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_hilo_lelt(int i) {
+        if (i > 100_000 || i < -100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"})
+    @Arguments(values = {Argument.NUMBER_42})
+    public static void test_hilo_ltlt(int i) {
+        if (i >= 100_000 || i < -100_000) {
+            throw new RuntimeException();
+        }
+    }
+
+    // The following tests can completely remove the test and branches, we can prove that
+    // the path cannot be taken.
+
+    @Setup
+    public static Object[] range256(SetupInfo info) {
+        return new Object[]{info.invocationCounter() & 255};
+    }
+
+    @Setup
+    public static Object[] rangeM128P127(SetupInfo info) {
+        return new Object[]{(info.invocationCounter() & 255) - 128};
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 0"})
+    @Arguments(setup = "rangeM128P127")
+    // Case from JDK-8135069. We used to do the CmpI->CmpU trick, but we can also constant fold
+    // this directly!
+    public static void test_empty_0(int i) {
+        if (i < 0 || i > -1) {
+            return; // always success
+        }
+        throw new RuntimeException("should not be reached");
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 0"})
+    @Arguments(setup = "range256")
+    public static void test_empty_1(int i) {
+        if (i < 100 || i > 50) {
+            return; // always success
+        }
+        throw new RuntimeException("should not be reached");
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 0"})
+    @Arguments(setup = "range256")
+    public static void test_empty_2(int i) {
+        if (i <= 100 || i >= 101) {
+            return; // always success
+        }
+        throw new RuntimeException("should not be reached");
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 1", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    // Note: the two CmpI->Bool pairs are already canonicallized and commoned to a single pair.
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 0"})
+    @Arguments(setup = "range256")
+    public static void test_empty_3(int i) {
+        if (i <= 100 || i > 100) {
+            return; // always success
+        }
+        throw new RuntimeException("should not be reached");
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 1", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    // Note: the two CmpI->Bool pairs are already canonicallized and commoned to a single pair.
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 0"})
+    @Arguments(setup = "range256")
+    public static void test_empty_4(int i) {
+        if (i < 101 || i >= 101) {
+            return; // always success
+        }
+        throw new RuntimeException("should not be reached");
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING)
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 0"})
+    @Arguments(setup = "range256")
+    public static void test_empty_5(int i) {
+        if (i < 101 || i > 100) {
+            return; // always success
+        }
+        throw new RuntimeException("should not be reached");
+    }
+
+    // Now test that we can use a.length, which means we do a null-check
+    // and then a comparison with a LoadRange that has type int[>=0]
+
+    public static int[] ARR = new int[256];
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING,
+        applyIf = {"TieredCompilation", "true"}) // proxy for "not Xcomp"
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"},
+        applyIf = {"TieredCompilation", "true"}) // proxy for "not Xcomp"
+    @Arguments(setup = "range256")
+    // Note: cannot get optimized with Xcomp
+    static int test_array_length_and_null_check_1(int i) {
+        if (i < 0 || i >= ARR.length) {
+            return -1; // never happens
+        }
+        return i;
+    }
+
+    @Check(test = "test_array_length_and_null_check_1")
+    public void check_test_array_length_and_null_check_1(int i) {
+        if (i < 0) { throw new RuntimeException("Wrong value: " + i); }
+    }
+
+    @Test
+    @IR(counts = {IRNode.CMP_I, "= 2", IRNode.CMP_U, "= 0"}, phase = CompilePhase.AFTER_PARSING,
+        applyIf = {"TieredCompilation", "true"}) // proxy for "not Xcomp"
+    @IR(counts = {IRNode.CMP_I, "= 0", IRNode.CMP_U, "= 1"},
+        applyIf = {"TieredCompilation", "true"}) // proxy for "not Xcomp"
+    @Arguments(setup = "range256")
+    // Note: cannot get optimized with Xcomp
+    static int test_array_length_and_null_check_2(int i) {
+        if (i < 0 || i >= ARR.length) {
+            throw new RuntimeException("never go out of bounds");
+        }
+        return i;
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/rangechecks/TestFoldComparesFuzzer.java b/test/hotspot/jtreg/compiler/rangechecks/TestFoldComparesFuzzer.java
new file mode 100644
index 00000000000..0467689eb17
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/rangechecks/TestFoldComparesFuzzer.java
@@ -0,0 +1,728 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+
+/*
+ * @test
+ * @bug 8346420
+ * @summary Fuzz patterns for IfNode::fold_compares_helper
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib /
+ * @compile ../lib/ir_framework/TestFramework.java
+ * @compile ../lib/generators/Generators.java
+ * @compile ../lib/verify/Verify.java
+ * @run driver ${test.main.class}
+ */
+
+package compiler.rangechecks;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Random;
+import java.util.HashSet;
+import java.util.Set;
+
+import jdk.test.lib.Utils;
+
+import compiler.lib.compile_framework.*;
+import compiler.lib.generators.*;
+import compiler.lib.template_framework.Template;
+import compiler.lib.template_framework.TemplateToken;
+import static compiler.lib.template_framework.Template.scope;
+import static compiler.lib.template_framework.Template.let;
+import static compiler.lib.template_framework.Template.$;
+
+import compiler.lib.template_framework.library.TestFrameworkClass;
+
+/**
+ * For more basic examples, see TestFoldCompares.java
+ *
+ * I'm only covering some basic cases to test the fundamental
+ * logic inside IfNode::fold_compares_helper.
+ * - TestMethodGeneratorConstIR does extensive result and IR verification
+ *   for the cases a-d) in IfNode::fold_compares_helper, but only with
+ *   constant lo and hi.
+ * - Other test generators currently don't have IR rules, but check
+ *   correctness in various relevant scenarios I came across during
+ *   the bugfix of JDK-8346420.
+ * - I'm also mixing signed and unsigned comparisons, just to ensure
+ *   the less often used (and tested) unsigned comparisons don't slip
+ *   through the cracks.
+ *
+ * In the future, we could add more cases:
+ * - Extend to long - though the optimization does not yet cover longs anyway.
+ * - More IR rules: difficult to make stable. Not all permutations are covered
+ *   by the optimizations, edge-cases could make IR rules brittle.
+ */
+public class TestFoldComparesFuzzer {
+    private static final Random RANDOM = Utils.getRandomInstance();
+    private static final RestrictableGenerator<Integer> INT_GEN = Generators.G.ints();
+
+    public static void main(String[] args) {
+        // Create a new CompileFramework instance.
+        CompileFramework comp = new CompileFramework();
+
+        long t0 = System.nanoTime();
+        // Add a java source file.
+        comp.addJavaSourceCode("compiler.rangecheck.templated.Generated", generate(comp));
+
+        long t1 = System.nanoTime();
+        // Compile the source file.
+        comp.compile();
+
+        long t2 = System.nanoTime();
+
+        // Run the tests without any additional VM flags.
+        comp.invoke("compiler.rangecheck.templated.Generated", "main", new Object[] {new String[] {}});
+        long t3 = System.nanoTime();
+
+        System.out.println("Code Generation:  " + (t1-t0) * 1e-9f);
+        System.out.println("Code Compilation: " + (t2-t1) * 1e-9f);
+        System.out.println("Running Tests:    " + (t3-t2) * 1e-9f);
+    }
+
+    public static String generate(CompileFramework comp) {
+        // Create a list to collect all tests.
+        List<TemplateToken> testTemplateTokens = new ArrayList<>();
+
+        for (int i = 0; i < 100; i++) {
+            testTemplateTokens.add(generateTest(/* no warmup, like -Xcomp */ 0));
+        }
+        for (int i = 0; i < 5; i++) {
+            testTemplateTokens.add(generateTest(/* with warmup, slower */ 10_000));
+        }
+
+        // Create the test class, which runs all testTemplateTokens.
+        return TestFrameworkClass.render(
+            // package and class name.
+            "compiler.rangecheck.templated", "Generated",
+            // List of imports.
+            Set.of("compiler.lib.generators.*",
+                   "compiler.lib.verify.*",
+                   "java.util.Random",
+                   "jdk.test.lib.Utils"),
+            // classpath, so the Test VM has access to the compiled class files.
+            comp.getEscapedClassPathOfCompiledClasses(),
+            // The list of tests.
+            testTemplateTokens);
+    }
+
+    enum Comparator {
+        // TODO: enable again after JDK-8385157
+        // ULT(" <  0", false),
+        // ULE(" <= 0", false),
+        // UGT(" >  0", false),
+        // UGE(" >= 0", false),
+        // UEQ(" == 0", false),
+        // UNE(" != 0", false),
+        LT(" <  ", true),
+        LE(" <= ", true),
+        GT(" >  ", true),
+        GE(" >= ", true),
+        EQ(" == ", true),
+        NE(" != ", true);
+
+        private final String token;
+        private final boolean signed;
+
+        Comparator(String token, boolean signed) {
+            this.token = token;
+            this.signed = signed;
+        }
+
+        public String getToken() {
+            return token;
+        }
+
+        public boolean isSigned() {
+            return signed;
+        }
+
+        public Comparator negate() {
+            return switch(this) {
+                // TODO: enable again after JDK-8385157
+                // case ULT -> UGE;
+                // case ULE -> UGT;
+                // case UGT -> ULE;
+                // case UGE -> ULT;
+                // case UEQ -> UNE;
+                // case UNE -> UEQ;
+                case LT -> GE;
+                case LE -> GT;
+                case GT -> LE;
+                case GE -> LT;
+                case EQ -> NE;
+                case NE -> EQ;
+            };
+        }
+
+        public Comparator flip() {
+            return switch(this) {
+                // TODO: enable again after JDK-8385157
+                // case ULT -> UGT;
+                // case ULE -> UGE;
+                // case UGT -> ULT;
+                // case UGE -> ULE;
+                // case UEQ -> UEQ;
+                // case UNE -> UNE;
+                case LT -> GT;
+                case LE -> GE;
+                case GT -> LT;
+                case GE -> LE;
+                case EQ -> EQ;
+                case NE -> NE;
+            };
+        }
+
+        static Comparator random() {
+            return values()[RANDOM.nextInt(values().length)];
+        }
+
+        static Comparator randomGreater() {
+            return RANDOM.nextBoolean() ? GE : GT;
+        }
+
+        static Comparator randomLess() {
+            return RANDOM.nextBoolean() ? LE : LT;
+        }
+    }
+
+    record Comparison(String lhs, Comparator cmp, String rhs, boolean negated) {
+        public Comparison(String lhs, Comparator cmp, String rhs) {
+            this(lhs, cmp, rhs, false);
+        }
+
+        public String toString() {
+            return cmp.isSigned()
+                ? ((negated ? "!" : "") + "(" + lhs + " "+ cmp.getToken() + " " + rhs + ")")
+                : ((negated ? "!" : "") + "(Integer.compareUnsigned(" + lhs + ", " + rhs + ")" + cmp.getToken() + ")");
+        }
+
+        // Keep the same semantics of the test, but change its form.
+        Comparison permuteRandom() {
+            return flipRandom().complementRandom();
+        }
+
+        Comparison flipRandom() {
+            return RANDOM.nextBoolean() ? this : new Comparison(rhs, cmp.flip(), lhs);
+        }
+
+        Comparison complementRandom() {
+            return RANDOM.nextBoolean() ? this : new Comparison(lhs, cmp.negate(), rhs, true);
+        }
+
+        Comparison negateCmp() {
+            return new Comparison(lhs, cmp.negate(), rhs, negated);
+        }
+    }
+
+    interface TestMethodGenerator {
+        Template.OneArg<String> getTestTemplate();
+
+        default Template.ZeroArgs getIRTemplate(boolean withWarmup) {
+            return Template.make(() -> scope("// No IR rule.\n"));
+        }
+
+        default Template.ZeroArgs getInputTemplate() {
+            return Template.make(() -> scope(
+                """
+                RestrictableGenerator<Integer> gen = Generators.G.ints();
+                int n = gen.next();
+                int a = gen.next();
+                int b = gen.next();
+                """
+            ));
+        };
+    }
+
+    // Some basic ranges with constant bounds.
+    // This should test some basic correctness, and also covers the case
+    // of bug JDK-8135069.
+    static class TestMethodGeneratorConst implements TestMethodGenerator {
+        private final int con1 = INT_GEN.next();
+        private final int con2 = INT_GEN.next();
+
+        private final Comparison c1 = new Comparison("n", Comparator.random(), "con1").permuteRandom();
+        private final Comparison c2 = new Comparison("n", Comparator.random(), "con2").permuteRandom();
+
+        private final Template.OneArg<String> testTemplate = Template.make("methodName", (String methodName) -> scope(
+            let("con1", con1),
+            let("con2", con2),
+            let("c1", c1),
+            let("c2", c2),
+            """
+            static boolean #methodName(int n, int a, int b) {
+                int con1 = #con1;
+                int con2 = #con2;
+                if (#c1 || #c2) {
+                    return true;
+                }
+                return false;
+            }
+            """
+        ));
+
+        public Template.OneArg<String> getTestTemplate() { return testTemplate; }
+    }
+
+    // Cases where a and b are ranges that touch min_int/max_int.
+    // Note: if con1=0 and con2=1 then this is like the cases:
+    // - test_Case3a_LTLE_overflow
+    // - test_Case3b_LTLE_overflow
+    // - test_Case4a_LELE_assert
+    //
+    // Hence, I think this test gives us quite good coverage for the kinds of bugs
+    // such as JDK-8346420.
+    static class TestMethodGeneratorWithIf implements TestMethodGenerator {
+        private final int con1 = INT_GEN.next();
+        private final int con2 = INT_GEN.next();
+        private final String m1 = RANDOM.nextBoolean() ? "Integer.MIN_VALUE" : "Integer.MAX_VALUE";
+        private final String m2 = RANDOM.nextBoolean() ? "Integer.MIN_VALUE" : "Integer.MAX_VALUE";
+
+        private final Comparison c1 = new Comparison("n", Comparator.random(), "a").permuteRandom();
+        private final Comparison c2 = new Comparison("n", Comparator.random(), "b").permuteRandom();
+
+        private final Template.OneArg<String> testTemplate = Template.make("methodName", (String methodName) -> scope(
+            let("con1", con1),
+            let("con2", con2),
+            let("m1", m1),
+            let("m2", m2),
+            let("c1", c1),
+            let("c2", c2),
+            """
+            static boolean #methodName(int n, int a, int b) {
+                if (a < b) {
+                    a = #con1;
+                    b = #con2;
+                } else {
+                    a = #m1;
+                    b = #m2;
+                }
+                if (#c1 || #c2) {
+                    return true;
+                }
+                return false;
+            }
+            """
+        ));
+
+        public Template.OneArg<String> getTestTemplate() { return testTemplate; }
+    }
+
+    // Just for good practice: add some case where the ranges are more free.
+    static class TestMethodGeneratorRanges implements TestMethodGenerator {
+        private final int n_hi = INT_GEN.next();
+        private final int n_lo = INT_GEN.next();
+        private final int a_hi = INT_GEN.next();
+        private final int a_lo = INT_GEN.next();
+        private final int b_hi = INT_GEN.next();
+        private final int b_lo = INT_GEN.next();
+
+        private final Comparison c1 = new Comparison("n", Comparator.random(), "a").permuteRandom();
+        private final Comparison c2 = new Comparison("n", Comparator.random(), "b").permuteRandom();
+
+        private final Template.OneArg<String> template = Template.make("methodName", (String methodName) -> scope(
+            let("n_hi", n_hi),
+            let("n_lo", n_lo),
+            let("a_hi", a_hi),
+            let("a_lo", a_lo),
+            let("b_hi", b_hi),
+            let("b_lo", b_lo),
+            let("c1", c1),
+            let("c2", c2),
+            """
+            static boolean #methodName(int n, int a, int b) {
+                n = Math.min(#n_hi, Math.max(#n_lo, n));
+                a = Math.min(#a_hi, Math.max(#a_lo, a));
+                b = Math.min(#b_hi, Math.max(#b_lo, b));
+                if (#c1 || #c2) {
+                    return true;
+                }
+                return false;
+            }
+            """
+        ));
+
+        public Template.OneArg<String> getTestTemplate() {
+            return template;
+        }
+    }
+
+    // Generate some more constrained cases, but with IR rules
+    static class TestMethodGeneratorConstIR implements TestMethodGenerator {
+        private final int lo;
+        private final int hi;
+        { // instance initializer
+            // We want to cover all cases for lo and hi combinations. But the
+            // critical cases happen around int_min and int_max, and when
+            // lo and hi are close to each other.
+            switch (RANDOM.nextInt(3)) {
+                case 0 -> {
+                    // Full freedom, will eventually cover all cases
+                    lo = INT_GEN.next();
+                    hi = INT_GEN.next();
+                }
+                case 1 -> {
+                    // Pick cases around overflow and underflow
+                    lo = Integer.MAX_VALUE - 5 + RANDOM.nextInt(10);
+                    hi = Integer.MAX_VALUE - 5 + RANDOM.nextInt(10);
+                }
+                default -> {
+                    // Pick cases where lo and hi are close to each other
+                    lo = INT_GEN.next();
+                    hi = lo - 5 + RANDOM.nextInt(10);
+                }
+            }
+        }
+
+        // Since we are using constants for lo and hi, the checks should get canonicalized,
+        // so that n is always in the lhs. We only create cases that are covered by the
+        // 4 cases of "2 CmpI -> 1 CmpU" optimization in IfNode::fold_compares_helper.
+        private final Comparison c_lo = new Comparison("n", Comparator.randomGreater(), "lo");
+        private final Comparison c_hi = new Comparison("n", Comparator.randomLess(), "hi");
+        private final boolean swap = RANDOM.nextBoolean();
+        private final Comparison c1Permuted = (swap ? c_lo : c_hi).permuteRandom();
+        private final Comparison c2Permuted = (swap ? c_hi : c_lo).permuteRandom();
+        // n >  lo && n <  hi -> check for inside range
+        // n <= lo || n >= hi -> chedk for outside range
+        private final boolean withAnd = RANDOM.nextBoolean();
+        private final String operator = withAnd ? "&&" : "||";
+        private final Comparison c1 = withAnd ? c1Permuted : c1Permuted.negateCmp();
+        private final Comparison c2 = withAnd ? c2Permuted : c2Permuted.negateCmp();
+
+        private final Template.OneArg<String> testTemplate = Template.make("methodName", (String methodName) -> scope(
+            let("lo", lo),
+            let("hi", hi),
+            let("c1", c1),
+            let("c2", c2),
+            let("op", operator),
+            """
+            static boolean #methodName(int n, int a, int b) {
+                int lo = #lo;
+                int hi = #hi;
+                if (#c1 #op #c2) {
+                    return true;
+                }
+                return false;
+            }
+            """
+        ));
+
+        public Template.OneArg<String> getTestTemplate() { return testTemplate; }
+
+        public Template.ZeroArgs getIRTemplate(boolean withWarmup) {
+            return Template.make(() -> {
+                String cmpIParse, cmpUParse, cmpIFinal, cmpUFinal;
+                String comment;
+
+                // If both branches are compiled (in -Xcomp mode, i.e. no warmup), then
+                // we can know very precisely what happens in each case.
+                if (c_lo.cmp() == Comparator.GT && c_hi.cmp() == Comparator.LT) {
+                    // a)   (n >  lo && n <  hi)
+                    if (lo == Integer.MAX_VALUE || hi == Integer.MIN_VALUE) {
+                        cmpIParse = "< 2"; cmpUParse = "= 0"; cmpIFinal = "< 2"; cmpUFinal = "= 0";
+                        comment = "a) one or both checks fold at parse time";
+                    } else if (lo < hi && lo+2 == hi) {
+                        // Not yet folded at parsing, because lo != hi
+                        // BoolNode::Ideal: x <u 1 or x <=u 0 -> x==0 (signed)
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 1"; cmpUFinal = "= 0";
+                        comment = "a) replace with CmpU (single element) -> CmpI eq";
+                    } else if (lo < hi && lo+1 == hi) {
+                        // Not yet folded at parsing, because lo != hi
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "a) impossible condition (exact) -> fold away";
+                    } else if (lo < hi) {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 1";
+                        comment = "a) replace with CmpU (non-empty)";
+                    } else if (lo == hi) {
+                        // same CmpI at parse time
+                        cmpIParse = "= 1"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "a) impossible condition -> fold away";
+                    } else {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "a) impossible condition -> fold away";
+                    }
+                } else if (c_lo.cmp() == Comparator.GT && c_hi.cmp() == Comparator.LE) {
+                    // b)   (n >  lo && n <= hi)
+                    if (lo == Integer.MAX_VALUE || hi == Integer.MAX_VALUE) {
+                        cmpIParse = "< 2"; cmpUParse = "= 0"; cmpIFinal = "< 2"; cmpUFinal = "= 0";
+                        comment = "b) one or both checks fold at parse time";
+                    } else if (lo < hi && lo+1 == hi) {
+                        // BoolNode::Ideal: x <u 1 or x <=u 0 -> x==0 (signed)
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 1"; cmpUFinal = "= 0";
+                        comment = "b) replace with CmpU (single element) -> CmpI eq";
+                    } else if (lo < hi && lo+1 < hi) {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 1";
+                        comment = "b) replace with CmpU (non-empty)";
+                    } else if (lo == hi) {
+                        cmpIParse = "= 1"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "b) impossible condition (exact) -> fold away";
+                    } else {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "b) impossible condition -> fold away";
+                    }
+                } else if (c_lo.cmp() == Comparator.GE && c_hi.cmp() == Comparator.LT) {
+                    // c)   (n >= lo && n <  hi)
+                    if (lo == Integer.MIN_VALUE || hi == Integer.MIN_VALUE) {
+                        cmpIParse = "< 2"; cmpUParse = "= 0"; cmpIFinal = "< 2"; cmpUFinal = "= 0";
+                        comment = "c) one or both checks fold at parse time";
+                    } else if (lo < hi && lo+1 == hi) {
+                        // BoolNode::Ideal: x <u 1 or x <=u 0 -> x==0 (signed)
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 1"; cmpUFinal = "= 0";
+                        comment = "c) replace with CmpU (single element) -> CmpI eq";
+                    } else if (lo < hi && lo+1 < hi) {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 1";
+                        comment = "c) replace with CmpU (non-empty)";
+                    } else if (lo == hi) {
+                        // RegionNode::optimize_trichotomy: can fold (n >= x && n < x) -> never
+                        cmpIParse = "< 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "c) impossible condition (exact) -> fold away";
+                    } else {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "c) impossible condition -> fold away";
+                    }
+                } else if (c_lo.cmp() == Comparator.GE && c_hi.cmp() == Comparator.LE) {
+                    // d)   (n >= lo && n <= hi)
+                    if (lo == Integer.MIN_VALUE || hi == Integer.MAX_VALUE) {
+                        cmpIParse = "< 2"; cmpUParse = "= 0"; cmpIFinal = "< 2"; cmpUFinal = "= 0";
+                        comment = "d) one or both checks fold at parse time";
+                    } else if (lo == hi) {
+                        // same CmpI at parse time
+                        // BoolNode::Ideal: x <u 1 or x <=u 0 -> x==0 (signed)
+                        cmpIParse = "= 1"; cmpUParse = "= 0"; cmpIFinal = "= 1"; cmpUFinal = "= 0";
+                        comment = "d) replace with CmpU (single element) -> CmpI eq";
+                    } else if (lo < hi) {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 1";
+                        comment = "d) replace with CmpU (non-empty)";
+                    } else {
+                        cmpIParse = "= 2"; cmpUParse = "= 0"; cmpIFinal = "= 0"; cmpUFinal = "= 0";
+                        comment = "d) impossible condition -> fold away";
+                    }
+                } else {
+                    throw new RuntimeException("should not be generated: " + c_lo + " and " + c_hi);
+                }
+
+                // All the precise counting above assumes that both ifs get compiled, and hence
+                // both CmpI are generated. Further, it assumes that both of the "or" branches
+                // (fail1 and fail2) end up "in the same place": either at the same region, or
+                // both in an uncommon trap. With profiling, the following cases are possible:
+                // - The first if is constant folded to fail1, and we have no CmpI nor CmpU
+                //   in the graph.
+                // - The first if always leads to fail1, and away from the second if, and so we
+                //   only have a single CmpI in the graph after parsing.
+                // - The first if always leads towards the second if, and away from fail1. And
+                //   the second if always points towards fail2 and away from succ. We get an
+                //   uncommon trap for fail1 and succ, and only the fail2 path is compiled.
+                //   Hence, we have two CmpI, but fail1 and fail2 do not end up "in the same place".
+                // This makes our IR rule quite weak, sadly. We could make the IR rules stronger,
+                // but we would need to control warmup, and generate corresponding inputs that
+                // ensure the right paths are compiled or not compiled.
+                if (withWarmup) {
+                    cmpIParse = "<= 2"; cmpUParse = "= 0"; cmpIFinal = "<= 2"; cmpUFinal = "< 2";
+                    comment = "with warmup: unstable-if makes precise counting hard.";
+                }
+
+                return scope(
+                    let("IP", cmpIParse),
+                    let("UP", cmpUParse),
+                    let("IF", cmpIFinal),
+                    let("UF", cmpUFinal),
+                    let("comment", comment),
+                    """
+                    // #comment
+                    @IR(counts = {IRNode.CMP_I, "#IP", IRNode.CMP_U, "#UP"}, phase = CompilePhase.AFTER_PARSING)
+                    @IR(counts = {IRNode.CMP_I, "#IF", IRNode.CMP_U, "#UF"})
+                    """
+                );
+            });
+        }
+
+        @Override
+        public Template.ZeroArgs getInputTemplate() {
+            return Template.make(() -> scope(
+                let("lo", lo),
+                let("hi", hi),
+                """
+                Random r = Utils.getRandomInstance();
+                RestrictableGenerator<Integer> gen = Generators.G.ints();
+                int a = gen.next();
+                int b = gen.next();
+                """,
+                switch (RANDOM.nextInt(9)) {
+                    // Random values
+                    case 0 -> "int n = gen.next();\n";
+                    // Fuzz around specific values
+                    case 1 -> "int n = r.nextInt(10) - 5 + #lo;\n";
+                    case 2 -> "int n = r.nextInt(10) - 5 + #hi;\n";
+                    case 3 -> "int n = r.nextInt(10) - 5 + (r.nextBoolean() ? #lo : #hi);\n";
+                    case 4 -> "int n = r.nextInt(10) - 5 + Integer.MAX_VALUE;\n";
+                    // Only very low or very high values, or in the middle
+                    case 5 -> "int n = r.nextInt(10) - 10 + Integer.MAX_VALUE;\n";
+                    case 6 -> "int n = r.nextInt(10) + Integer.MIN_VALUE;\n";
+                    case 7 -> "int n = r.nextInt(10) - 5 + #lo/2 + #hi/2;\n";
+                    // Always the same constant
+                    default -> "int n = " + INT_GEN.next() + ";\n";
+                }
+            ));
+        };
+    }
+
+    // switch cases can also be implemented with range checks using
+    // constants, and then we can optimize 2 CmpI with a single CmpU,
+    // at least in some cases.
+    static class TestMethodGeneratorSwitch implements TestMethodGenerator {
+        Set<Short> cases = new HashSet<>();
+        { // instance initializer
+            int n = RANDOM.nextInt(1, 20);
+            for (int i = 0; i < n; i++) {
+                cases.add((short)(int)INT_GEN.next());
+            }
+        }
+
+        private final Template.OneArg<String> testTemplate = Template.make("methodName", (String methodName) -> scope(
+            """
+            static boolean #methodName(int n, int a, int b) {
+                switch((short)n) {
+            """,
+            cases.stream().map(i -> scope(
+                let("i", i),
+                """
+                case (short)#i:
+                """
+            )).toList(),
+            """
+                    return true;
+                default:
+                    return false;
+                }
+            }
+            """
+        ));
+
+        public Template.OneArg<String> getTestTemplate() { return testTemplate; }
+    }
+
+    // If arr.length is in the second check, the null-check for arr
+    // is located between the two checks.
+    // I'm not adding any IR rules here, just checking for correctness.
+    static class TestMethodGeneratorArrLength implements TestMethodGenerator {
+        private final int n_hi = INT_GEN.next();
+        private final int n_lo = INT_GEN.next();
+        private final int a_hi = INT_GEN.next();
+        private final int a_lo = INT_GEN.next();
+        private final int size = INT_GEN.restricted(0, 100_000).next();
+
+        // Get checks like: n < a || n >= arr.length
+        private final Comparison c_lo = new Comparison("n", Comparator.random(), "a").permuteRandom();
+        private final Comparison c_hi = new Comparison("n", Comparator.random(), "arr.length").permuteRandom();
+        private final boolean swap = RANDOM.nextBoolean();
+        private final Comparison c1Permuted = (swap ? c_lo : c_hi).permuteRandom();
+        private final Comparison c2Permuted = (swap ? c_hi : c_lo).permuteRandom();
+        // n >  lo && n <  hi -> check for inside range
+        // n <= lo || n >= hi -> chedk for outside range
+        private final boolean withAnd = RANDOM.nextBoolean();
+        private final String operator = withAnd ? "&&" : "||";
+        private final Comparison c1 = withAnd ? c1Permuted : c1Permuted.negateCmp();
+        private final Comparison c2 = withAnd ? c2Permuted : c2Permuted.negateCmp();
+
+        private final Template.OneArg<String> testTemplate = Template.make("methodName", (String methodName) -> scope(
+            let("n_hi", n_hi),
+            let("n_lo", n_lo),
+            let("a_hi", a_hi),
+            let("a_lo", a_lo),
+            let("size", size),
+            let("c1", c1),
+            let("c2", c2),
+            let("op", operator),
+            """
+            static boolean #methodName(int n, int a, int b) {
+                int[] arr = $arr;
+                n = Math.min(#n_hi, Math.max(#n_lo, n));
+                a = Math.min(#a_hi, Math.max(#a_lo, a));
+                if (#c1 #op #c2) {
+                    return true;
+                }
+                return false;
+            }
+            static int[] $arr = new int[#size];
+            """
+        ));
+
+        public Template.OneArg<String> getTestTemplate() { return testTemplate; }
+    }
+
+    public static TemplateToken generateTest(int warmup) {
+        TestMethodGenerator tg = switch(RANDOM.nextInt(6)) {
+            case 0 -> new TestMethodGeneratorConst();
+            case 1 -> new TestMethodGeneratorWithIf();
+            case 2 -> new TestMethodGeneratorRanges();
+            case 3 -> new TestMethodGeneratorConstIR();
+            case 4 -> new TestMethodGeneratorSwitch();
+            case 5 -> new TestMethodGeneratorArrLength();
+            default -> throw new RuntimeException("not expected");
+        };
+        Template.ZeroArgs testInputTemplate = tg.getInputTemplate();
+        Template.OneArg<String> testMethodTemplate = tg.getTestTemplate();
+        Template.ZeroArgs testIRTemplate = tg.getIRTemplate(warmup >= 10_000);
+
+        var testTemplate = Template.make(() -> scope(
+            let("warmup", warmup / 100),
+            """
+            // --- $test start ---
+            @Run(test = "$test")
+            @Warmup(#warmup)
+            public static void $run() {
+                for (int i = 0; i < 100; i++) {
+                    // Generate random values for n, a, b.
+                    """,
+                    testInputTemplate.asToken(),
+                    """
+
+                    // Run test and compare with interpreter results.
+                    var result   =      $test(n, a, b);
+                    var expected = $reference(n, a, b);
+                    if (result != expected) {
+                        throw new RuntimeException("wrong result: " + result + " vs " + expected
+                                                   + "\\nn: " + n
+                                                   + "\\na: " + a
+                                                   + "\\nb: " + b);
+                    }
+                }
+            }
+
+            @Test
+            """,
+            testIRTemplate.asToken(),
+            testMethodTemplate.asToken($("test")),
+            """
+
+            @DontCompile
+            """,
+            testMethodTemplate.asToken($("reference")),
+            """
+            // --- $test end   ---
+            """
+        ));
+        return testTemplate.asToken();
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestIncorrectVectorReassociation.java b/test/hotspot/jtreg/compiler/vectorapi/TestIncorrectVectorReassociation.java
new file mode 100644
index 00000000000..bc94c0b19cc
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/vectorapi/TestIncorrectVectorReassociation.java
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.vectorapi;
+
+import compiler.lib.ir_framework.*;
+import compiler.lib.verify.*;
+import java.util.Arrays;
+import jdk.incubator.vector.*;
+
+/**
+ * @test
+ * @bug 8384507 8385308
+ * @library /test/lib /
+ * @summary Incorrect vector reassociation for signed saturating addition
+ * @modules jdk.incubator.vector
+ *
+ * @run driver compiler.vectorapi.TestIncorrectVectorReassociation
+ */
+
+public class TestIncorrectVectorReassociation {
+
+    public static void main(String[] args) {
+        TestFramework testFramework = new TestFramework();
+        testFramework.setDefaultWarmup(10000)
+                     .addFlags("--add-modules=jdk.incubator.vector")
+                     .start();
+    }
+
+    /* =======================
+     * BYTE: a=100, b=100, arr[i]=-50
+     *   Correct: sat_add(100, sat_add(100, -50)) = sat_add(100, 50) = 127
+     *   Wrong:   sat_add(sat_add(100, 100), -50) = sat_add(127, -50) = 77
+     * ======================= */
+
+    static final VectorSpecies<Byte> BSP = ByteVector.SPECIES_PREFERRED;
+    static byte[] byteIn  = new byte[BSP.length()];
+    static byte[] byteOut = new byte[BSP.length()];
+    static final byte BA = 100, BB = 100;
+
+    static {
+        Arrays.fill(byteIn, (byte) -50);
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_ADD_VB, " 2 "},
+        applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
+    static void test_byte_sadd(int index) {
+        ByteVector.broadcast(BSP, BA)
+                  .lanewise(VectorOperators.SADD,
+                            ByteVector.broadcast(BSP, BB)
+                                     .lanewise(VectorOperators.SADD,
+                                               ByteVector.fromArray(BSP, byteIn, index)))
+                  .intoArray(byteOut, index);
+    }
+
+    @Run(test = "test_byte_sadd")
+    void run_byte_sadd() {
+        for (int i = 0; i < BSP.loopBound(byteIn.length); i += BSP.length()) {
+            test_byte_sadd(i);
+        }
+        for (int i = 0; i < BSP.loopBound(byteIn.length); i++) {
+            Verify.checkEQ(byteOut[i], VectorMath.addSaturating(BA, VectorMath.addSaturating(BB, byteIn[i])));
+        }
+    }
+
+    /* =======================
+     * SHORT: a=30000, b=30000, arr[i]=-50
+     *   Correct: sat_add(30000, sat_add(30000, -50)) = sat_add(30000, 29950) = 32767
+     *   Wrong:   sat_add(sat_add(30000, 30000), -50) = sat_add(32767, -50) = 32717
+     * ======================= */
+
+    static final VectorSpecies<Short> SSP = ShortVector.SPECIES_PREFERRED;
+    static short[] shortIn  = new short[SSP.length()];
+    static short[] shortOut = new short[SSP.length()];
+    static final short SA = 30000, SB = 30000;
+
+    static {
+        Arrays.fill(shortIn, (short) -50);
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_ADD_VS, " 2 "},
+        applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
+    static void test_short_sadd(int index) {
+        ShortVector.broadcast(SSP, SA)
+                   .lanewise(VectorOperators.SADD,
+                             ShortVector.broadcast(SSP, SB)
+                                      .lanewise(VectorOperators.SADD,
+                                                ShortVector.fromArray(SSP, shortIn, index)))
+                   .intoArray(shortOut, index);
+    }
+
+    @Run(test = "test_short_sadd")
+    void run_short_sadd() {
+        for (int i = 0; i < SSP.loopBound(shortIn.length); i += SSP.length()) {
+            test_short_sadd(i);
+        }
+        for (int i = 0; i < SSP.loopBound(shortIn.length); i++) {
+            Verify.checkEQ(shortOut[i], VectorMath.addSaturating(SA, VectorMath.addSaturating(SB, shortIn[i])));
+        }
+    }
+
+    /* =======================
+     * INT: a=2_000_000_000, b=2_000_000_000, arr[i]=-50
+     *   Correct: sat_add(2B, sat_add(2B, -50)) = sat_add(2B, 1_999_999_950) = MAX
+     *   Wrong:   sat_add(sat_add(2B, 2B), -50) = sat_add(MAX, -50) = MAX-50
+     * ======================= */
+
+    static final VectorSpecies<Integer> ISP = IntVector.SPECIES_PREFERRED;
+    static int[] intIn  = new int[ISP.length()];
+    static int[] intOut = new int[ISP.length()];
+    static final int IA = 2_000_000_000, IB = 2_000_000_000;
+
+    static {
+        Arrays.fill(intIn, -50);
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_ADD_VI, " 2 "},
+        applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
+    static void test_int_sadd(int index) {
+        IntVector.broadcast(ISP, IA)
+                 .lanewise(VectorOperators.SADD,
+                           IntVector.broadcast(ISP, IB)
+                                    .lanewise(VectorOperators.SADD,
+                                              IntVector.fromArray(ISP, intIn, index)))
+                 .intoArray(intOut, index);
+    }
+
+    @Run(test = "test_int_sadd")
+    void run_int_sadd() {
+        for (int i = 0; i < ISP.loopBound(intIn.length); i += ISP.length()) {
+            test_int_sadd(i);
+        }
+        for (int i = 0; i < ISP.loopBound(intIn.length); i++) {
+            Verify.checkEQ(intOut[i], VectorMath.addSaturating(IA, VectorMath.addSaturating(IB, intIn[i])));
+        }
+    }
+
+    /* =======================
+     * LONG: a=8_000_000_000_000_000_000L, b=8_000_000_000_000_000_000L, arr[i]=-50
+     *   Correct: sat_add(8e18, sat_add(8e18, -50)) = sat_add(8e18, 7_999_999_999_999_999_950) = MAX
+     *   Wrong:   sat_add(sat_add(8e18, 8e18), -50) = sat_add(MAX, -50) = MAX-50
+     * ======================= */
+
+    static final VectorSpecies<Long> LSP = LongVector.SPECIES_PREFERRED;
+    static long[] longIn  = new long[LSP.length()];
+    static long[] longOut = new long[LSP.length()];
+    static final long LA = 8_000_000_000_000_000_000L, LB = 8_000_000_000_000_000_000L;
+
+    static {
+        Arrays.fill(longIn, -50L);
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_ADD_VL, " 2 "},
+        applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
+    static void test_long_sadd(int index) {
+        LongVector.broadcast(LSP, LA)
+                  .lanewise(VectorOperators.SADD,
+                            LongVector.broadcast(LSP, LB)
+                                     .lanewise(VectorOperators.SADD,
+                                               LongVector.fromArray(LSP, longIn, index)))
+                  .intoArray(longOut, index);
+    }
+
+    @Run(test = "test_long_sadd")
+    void run_long_sadd() {
+        for (int i = 0; i < LSP.loopBound(longIn.length); i += LSP.length()) {
+            test_long_sadd(i);
+        }
+        for (int i = 0; i < LSP.loopBound(longIn.length); i++) {
+            Verify.checkEQ(longOut[i], VectorMath.addSaturating(LA, VectorMath.addSaturating(LB, longIn[i])));
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_ADD_VI, " 2 "},
+        applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
+    static IntVector test_mixed_sadd_suadd() {
+        IntVector v0 = IntVector.broadcast(ISP, 1);
+        IntVector v1 = IntVector.broadcast(ISP, 0);
+        IntVector v2 = v0.lanewise(VectorOperators.SADD, v1);
+        return v2.lanewise(VectorOperators.SUADD, -1);
+    }
+
+    @Run(test = "test_mixed_sadd_suadd")
+    void run_mixed_sadd_suadd() {
+        IntVector result = test_mixed_sadd_suadd();
+        int expected = VectorMath.addSaturatingUnsigned(
+                           VectorMath.addSaturating(1, 0), -1);
+        int[] res = result.toArray();
+        for (int i = 0; i < res.length; i++) {
+            Verify.checkEQ(res[i], expected);
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java
index 09185f63c69..9a2f440ea82 100644
--- a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java
+++ b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2025, 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,7 +30,7 @@ import jdk.test.lib.Asserts;
 
 /*
  * @test
- * @bug 8354242
+ * @bug 8354242 8382532
  * @key randomness
  * @library /test/lib /
  * @summary test combining vector not operation with compare
@@ -42,6 +42,7 @@ import jdk.test.lib.Asserts;
 
 public class VectorMaskCompareNotTest {
     private static int LENGTH = 128;
+    private static float TWO_FLOAT = 2.0f;
 
     private static final VectorSpecies<Byte> B_SPECIES = VectorSpecies.ofLargestShape(byte.class);
     private static final VectorSpecies<Short> S_SPECIES = VectorSpecies.ofLargestShape(short.class);
@@ -77,6 +78,7 @@ public class VectorMaskCompareNotTest {
     private static double[] dnan;
     private static double[] dpinf;
     private static double[] dninf;
+    private static boolean[] mi;
     private static boolean[] mr;
 
     static {
@@ -99,6 +101,7 @@ public class VectorMaskCompareNotTest {
         dnan = new double[LENGTH];
         dpinf = new double[LENGTH];
         dninf = new double[LENGTH];
+        mi = new boolean[LENGTH];
         mr = new boolean[LENGTH];
 
         Generator<Integer> iGen = RD.ints();
@@ -125,6 +128,7 @@ public class VectorMaskCompareNotTest {
             dnan[i] = Double.NaN;
             dpinf[i] = Double.POSITIVE_INFINITY;
             dninf[i] = Double.NEGATIVE_INFINITY;
+            mi[i] = true;
         }
     }
 
@@ -271,523 +275,363 @@ public class VectorMaskCompareNotTest {
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareEQMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.EQ, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.EQ);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.EQ, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.EQ);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.EQ, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.EQ);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareNEMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.NE, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.NE);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.NE, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.NE);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.NE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.NE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareLTMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.LT, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.LT);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.LT, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.LT);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.LT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.LT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareGTMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.GT, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.GT);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.GT, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.GT);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.GT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.GT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareLEMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.LE, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.LE);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.LE, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.LE);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.LE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.LE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareGEMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.GE, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.GE);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.GE, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.GE);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.GE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.GE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareULTMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.ULT, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.ULT);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.ULT, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.ULT);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.ULT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.ULT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareUGTMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.UGT, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.UGT);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.UGT, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.UGT);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.UGT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.UGT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareULEMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.ULE, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.ULE);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.ULE, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.ULE);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.ULE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.ULE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 1",
-                   IRNode.VECTOR_MASK_CMP, "= 3" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareUGEMaskNotByte() {
         testCompareMaskNotByte(B_SPECIES, VectorOperators.UGE, (m) -> { return m.not(); });
         verifyResultsByte(B_SPECIES, VectorOperators.UGE);
         testCompareMaskNotByte(B_SPECIES, VectorOperators.UGE, (m) -> { return B_SPECIES.maskAll(true).xor(m); });
         verifyResultsByte(B_SPECIES, VectorOperators.UGE);
-
-        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.UGE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
-        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.UGE);
     }
 
     // Short tests
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareEQMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.EQ, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.EQ);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.EQ, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.EQ);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.EQ, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.EQ);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.EQ, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.EQ);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareNEMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.NE, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.NE);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.NE, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.NE);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.NE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.NE);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.NE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.NE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareLTMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.LT, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.LT);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.LT, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.LT);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.LT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.LT);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.LT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.LT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareGTMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.GT, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.GT);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.GT, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.GT);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.GT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.GT);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.GT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.GT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareLEMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.LE, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.LE);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.LE, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.LE);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.LE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.LE);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.LE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.LE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareGEMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.GE, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.GE);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.GE, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.GE);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.GE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.GE);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.GE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.GE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareULTMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.ULT, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.ULT);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.ULT, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.ULT);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.ULT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.ULT);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.ULT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.ULT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareUGTMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.UGT, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.UGT);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.UGT, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.UGT);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.UGT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.UGT);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.UGT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.UGT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareULEMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.ULE, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.ULE);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.ULE, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.ULE);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.ULE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.ULE);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.ULE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.ULE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx", "true", "rvv", "true" })
     public static void testCompareUGEMaskNotShort() {
         testCompareMaskNotShort(S_SPECIES, VectorOperators.UGE, (m) -> { return m.not(); });
         verifyResultsShort(S_SPECIES, VectorOperators.UGE);
         testCompareMaskNotShort(S_SPECIES, VectorOperators.UGE, (m) -> { return S_SPECIES.maskAll(true).xor(m); });
         verifyResultsShort(S_SPECIES, VectorOperators.UGE);
-
-        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.UGE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
-        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.UGE);
-        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.UGE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
-        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.UGE);
     }
 
     // Int tests
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareEQMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.EQ, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.EQ);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.EQ, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.EQ);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.EQ, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.EQ);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.EQ, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.EQ);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareNEMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.NE, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.NE);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.NE, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.NE);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.NE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.NE);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.NE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.NE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareLTMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.LT, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.LT);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.LT, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.LT);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.LT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.LT);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.LT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.LT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareGTMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.GT, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.GT);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.GT, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.GT);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.GT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.GT);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.GT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.GT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareLEMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.LE, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.LE);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.LE, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.LE);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.LE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.LE);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.LE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.LE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareGEMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.GE, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.GE);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.GE, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.GE);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.GE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.GE);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.GE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.GE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareULTMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.ULT, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.ULT);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.ULT, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.ULT);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.ULT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.ULT);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.ULT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.ULT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareUGTMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.UGT, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.UGT);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.UGT, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.UGT);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.UGT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.UGT);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.UGT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.UGT);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareULEMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.ULE, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.ULE);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.ULE, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.ULE);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.ULE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.ULE);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.ULE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.ULE);
     }
 
     @Test
     @IR(counts = { IRNode.XOR_V_MASK, "= 0",
                    IRNode.XOR_V, "= 0",
-                   IRNode.VECTOR_MASK_CAST, "= 2",
-                   IRNode.VECTOR_MASK_CMP, "= 4" },
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
         applyIfCPUFeatureOr = { "asimd", "true", "avx2", "true", "rvv", "true" })
     public static void testCompareUGEMaskNotInt() {
         testCompareMaskNotInt(I_SPECIES, VectorOperators.UGE, (m) -> { return m.not(); });
         verifyResultsInt(I_SPECIES, VectorOperators.UGE);
         testCompareMaskNotInt(I_SPECIES, VectorOperators.UGE, (m) -> { return I_SPECIES.maskAll(true).xor(m); });
         verifyResultsInt(I_SPECIES, VectorOperators.UGE);
-
-        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.UGE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
-        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.UGE);
-        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.UGE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
-        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.UGE);
     }
 
     // Long tests
@@ -1291,9 +1135,618 @@ public class VectorMaskCompareNotTest {
         verifyResultsDouble(D_SPECIES, VectorOperators.LT, da, db);
     }
 
+    // Cast variants of the byte, short and int tests above. These cases use
+    // mask compares whose result feeds into a VectorMaskCast (and either an
+    // explicit .not()/xor or an all-true mask xor).
+    //
+    // The IR expectation depends on whether the platform requires partial
+    // vector operations (see Matcher::vector_needs_partial_operations):
+    //   * On SVE, a sub-register vector compare is lowered via
+    //     ideal_partial_operations(), which generates an all-true predicate
+    //     with VectorMaskGen and attaches it to the VectorMaskCmp. The
+    //     compare becomes predicated, so after JDK-8382532 the
+    //     XorV-VectorMaskCmp optimization does not fire and one not node
+    //     (XorVMask) remains.
+    //   * On all other supported platforms (ASIMD-only, AVX2/AVX-512, RVV),
+    //     vector_needs_partial_operations() returns false, the
+    //     VectorMaskCmp stays unpredicated, the optimization fires and the
+    //     not nodes are folded into negated VectorMaskCmps. No XorV/XorVMask
+    //     remains.
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareEQMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.EQ, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.EQ);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareNEMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.NE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.NE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareLTMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.LT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.LT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareGTMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.GT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.GT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareLEMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.LE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.LE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareGEMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.GE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.GE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareULTMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.ULT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.ULT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareUGTMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.UGT, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.UGT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareULEMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.ULE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.ULE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "sve", "true" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareUGEMaskNotByteCast() {
+        testCompareMaskNotByte(ByteVector.SPECIES_64, VectorOperators.UGE, (m) -> { return m.cast(ShortVector.SPECIES_128).not(); });
+        verifyResultsByte(ByteVector.SPECIES_64, VectorOperators.UGE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareEQMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.EQ, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.EQ);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.EQ, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.EQ);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareNEMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.NE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.NE);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.NE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.NE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareLTMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.LT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.LT);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.LT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.LT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareGTMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.GT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.GT);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.GT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.GT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareLEMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.LE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.LE);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.LE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.LE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareGEMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.GE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.GE);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.GE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.GE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareULTMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.ULT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.ULT);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.ULT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.ULT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareUGTMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.UGT, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.UGT);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.UGT, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.UGT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareULEMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.ULE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.ULE);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.ULE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.ULE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareUGEMaskNotShortCast() {
+        testCompareMaskNotShort(ShortVector.SPECIES_64, VectorOperators.UGE, (m) -> { return IntVector.SPECIES_128.maskAll(true).xor(m.cast(IntVector.SPECIES_128)); });
+        verifyResultsShort(ShortVector.SPECIES_64, VectorOperators.UGE);
+        testCompareMaskNotShort(ShortVector.SPECIES_128, VectorOperators.UGE, (m) -> { return m.cast(ByteVector.SPECIES_64).not(); });
+        verifyResultsShort(ShortVector.SPECIES_128, VectorOperators.UGE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareEQMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.EQ, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.EQ);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.EQ, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.EQ);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareNEMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.NE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.NE);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.NE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.NE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareLTMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.LT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.LT);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.LT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.LT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareGTMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.GT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.GT);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.GT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.GT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareLEMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.LE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.LE);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.LE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.LE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareGEMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.GE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.GE);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.GE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.GE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareULTMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.ULT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.ULT);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.ULT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.ULT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareUGTMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.UGT, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.UGT);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.UGT, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.UGT);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareULEMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.ULE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.ULE);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.ULE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.ULE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "sve", "true" },
+        applyIf = { "MaxVectorSize", "= 16" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V_MASK, "= 0",
+                   IRNode.XOR_V, "= 0",
+                   IRNode.VECTOR_MASK_CMP, "= 2" },
+        applyIfCPUFeatureOr = { "avx2", "true", "rvv", "true" })
+    public static void testCompareUGEMaskNotIntCast() {
+        testCompareMaskNotInt(I_SPECIES_FOR_CAST, VectorOperators.UGE, (m) -> { return L_SPECIES_FOR_CAST.maskAll(true).xor(m.cast(L_SPECIES_FOR_CAST)); });
+        verifyResultsInt(I_SPECIES_FOR_CAST, VectorOperators.UGE);
+        testCompareMaskNotInt(IntVector.SPECIES_128, VectorOperators.UGE, (m) -> { return m.cast(ShortVector.SPECIES_64).not(); });
+        verifyResultsInt(IntVector.SPECIES_128, VectorOperators.UGE);
+    }
+
+    @Test
+    @IR(counts = { IRNode.XOR_V_MASK, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureOr = { "sve", "true", "avx512", "true", "rvv", "true" })
+    @IR(counts = { IRNode.XOR_V, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "asimd", "true", "sve", "false" })
+    @IR(counts = { IRNode.XOR_V, "= 1",
+                   IRNode.VECTOR_MASK_CMP, "= 1" },
+        applyIfCPUFeatureAnd = { "avx2", "true", "avx512", "false" })
+    public static void testMaskedCompareMaskNotNegative() {
+        int expected = F_SPECIES.length();
+        var ones = FloatVector.broadcast(F_SPECIES, 1f);
+        // All true mask
+        VectorMask<Float> on = VectorMask.fromArray(F_SPECIES, mi, 0);
+        int got = ones.compare(VectorOperators.NE, TWO_FLOAT, on)
+                      .not()
+                      .firstTrue();
+        Asserts.assertEquals(expected, got);
+    }
+
     public static void main(String[] args) {
         TestFramework testFramework = new TestFramework();
-        testFramework.setDefaultWarmup(5000)
+        testFramework.setDefaultWarmup(10000)
                      .addFlags("--add-modules=jdk.incubator.vector")
                      .start();
     }
diff --git a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
index 36e38e6e2fb..68391893a32 100644
--- a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
+++ b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
@@ -86,7 +86,7 @@ public class TestGCLogMessages {
         }
 
         public boolean isAvailable() {
-            return Compiler.isC2Enabled();
+            return Compiler.isC2Included();
         }
     }
 
diff --git a/test/hotspot/jtreg/gc/shenandoah/TestSoftMaxHeapSizeAvailableCalc.java b/test/hotspot/jtreg/gc/shenandoah/TestSoftMaxHeapSizeAvailableCalc.java
index e70f2f0849f..100666fe859 100644
--- a/test/hotspot/jtreg/gc/shenandoah/TestSoftMaxHeapSizeAvailableCalc.java
+++ b/test/hotspot/jtreg/gc/shenandoah/TestSoftMaxHeapSizeAvailableCalc.java
@@ -35,6 +35,7 @@
  *      -XX:ShenandoahGCMode=satb
  *      -XX:+ShenandoahDegeneratedGC
  *      -XX:ShenandoahGCHeuristics=adaptive
+ *      -XX:ShenandoahLearningSteps=0
  *      TestSoftMaxHeapSizeAvailableCalc
  */
 
@@ -60,6 +61,7 @@
  *      -XX:+UseShenandoahGC -Xlog:gc=info
  *      -XX:ShenandoahGCMode=generational
  *      -XX:ShenandoahGCHeuristics=adaptive
+ *      -XX:ShenandoahLearningSteps=0
  *      TestSoftMaxHeapSizeAvailableCalc
  *
  */
@@ -87,12 +89,13 @@ public class TestSoftMaxHeapSizeAvailableCalc {
     // Soft max: 512M, ShenandoahMinFreeThreshold: 10 (default), ShenandoahEvacReserve: 5 (default)
     // Soft max for mutator: 512M * (100.0 - 5) / 100 = 486.4M
     // Threshold to trigger gc: 486.4M - 512 * 10 / 100.0 = 435.2M, just above (300 + 100)M.
-    // Expect gc count to be less than 1 / sec.
+    // Expect gc count to be less than 1 / sec, but to allow for other trigger conditions (like allocation rate),
+    // we bump the max allowed gc count to 35.
     public static class Allocate {
         static final List<byte[]> longLived = new ArrayList<>();
 
         public static void test() throws Exception {
-            final int expectedMaxGcCount = Integer.getInteger("expectedMaxGcCount", 30);
+            final int expectedMaxGcCount = Integer.getInteger("expectedMaxGcCount", 35);
             List<java.lang.management.GarbageCollectorMXBean> collectors = ManagementFactory.getGarbageCollectorMXBeans();
             java.lang.management.GarbageCollectorMXBean cycleCollector = null;
             for (java.lang.management.GarbageCollectorMXBean bean : collectors) {
diff --git a/test/hotspot/jtreg/runtime/NMT/MallocStressTest.java b/test/hotspot/jtreg/runtime/NMT/MallocStressTest.java
index 6af14268c7e..db65348246b 100644
--- a/test/hotspot/jtreg/runtime/NMT/MallocStressTest.java
+++ b/test/hotspot/jtreg/runtime/NMT/MallocStressTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2026, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2023, Red Hat, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -137,6 +137,27 @@ public class MallocStressTest {
         pb.command(new String[] { JDKToolFinder.getJDKTool("jcmd"), pid, "VM.native_memory", "statistics"});
         output = new OutputAnalyzer(pb.start());
         output.shouldNotContain("Tracking level has been downgraded due to lack of resources");
+
+        if (Platform.isDebugBuild()) {
+            String expectedCountString = output.firstMatch("\\s*Expected entry count: (\\d+)", 1);
+            String totalEntriesString = output.firstMatch("\\s*Total entries: (\\d+)", 1);
+
+            if (expectedCountString == null || totalEntriesString == null) {
+                throw new RuntimeException("Missing malloc site table entry counts in NMT statistics output");
+            }
+
+            int expectedCount = Integer.parseInt(expectedCountString);
+            int totalEntries = Integer.parseInt(totalEntriesString);
+
+            // The expected count is loaded after the total entries has accumulated (internal impl detail from hotspot).
+            // As this is a concurrent hashtable, we might have added more malloc sites.
+            // So, we're ok with the expectedCount being larger than totalEntries, but if it's *A lot* larger something
+            // really weird is going on.
+            if (expectedCount - totalEntries > 5) {
+                throw new RuntimeException("Malloc site table entry count mismatch: expected "
+                                           + expectedCount + ", walked " + totalEntries);
+            }
+        }
     }
 
     private static void sleep_wait(int n) {
diff --git a/test/hotspot/jtreg/serviceability/jvmti/vthread/SuspendResume4/SuspendResume4.java b/test/hotspot/jtreg/serviceability/jvmti/vthread/SuspendResume4/SuspendResume4.java
new file mode 100644
index 00000000000..db2eff4c1b5
--- /dev/null
+++ b/test/hotspot/jtreg/serviceability/jvmti/vthread/SuspendResume4/SuspendResume4.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8376621
+ * @summary Suspend virtual thread while it's inside disableSuspendAndPreempt region
+ * @requires vm.continuations
+ * @requires vm.jvmti
+ * @library /test/lib /test/hotspot/jtreg/testlibrary
+ * @run main/othervm SuspendResume4
+ */
+
+import jdk.test.lib.Utils;
+import jdk.test.lib.process.OutputAnalyzer;
+import jdk.test.lib.process.ProcessTools;
+
+import java.io.File;
+
+import jvmti.JVMTIUtils;
+
+public class SuspendResume4 {
+    native static void suspendThread(Thread thread);
+    native static void resumeThread(Thread thread);
+
+    public static void main(String[] args) throws Exception {
+        // Run test in child VM where Locale won't be initialized already by jtreg
+        ProcessBuilder pb = ProcessTools.createTestJavaProcessBuilder(
+            "-Djava.library.path=" + Utils.TEST_NATIVE_PATH,
+            "-agentpath:" + Utils.TEST_NATIVE_PATH + File.separator + System.mapLibraryName("SuspendResume4"),
+            "SuspendResume4$Test");
+        OutputAnalyzer output = ProcessTools.executeProcess(pb);
+        System.out.println(output.getStdout());
+        output.shouldHaveExitValue(0);
+    }
+
+    static class Test{
+        static String targetName;
+
+        private void runTest() throws Exception {
+            // start target vthread
+            Thread target = Thread.ofVirtual().name("target").start(() -> {
+                // Give time for reader to get suspended in
+                // disableSuspendAndPreempt region.
+                spinWaitMillis(100);
+                // Force unmounting. If reader was suspended inside
+                // disableSuspendAndPreempt region this will block
+                // in VirtualThread.unmount.
+                Thread.yield();
+            });
+
+            // start clinit contender
+            Thread contender = Thread.ofPlatform().name("contender").start(() -> {
+                "JAVA".toLowerCase(java.util.Locale.ROOT);
+            });
+
+            // start vthread that reads target's state
+            Thread reader = Thread.ofVirtual().name("reader").start(() -> {
+                targetName = "name: " + target;
+            });
+
+            // start suspend/resumer
+            Thread suspender = Thread.ofPlatform().name("suspender").start(() -> {
+                SuspendResume4.suspendThread(reader);
+                // Give target time for Thread.yield
+                spinWaitMillis(100);
+                SuspendResume4.resumeThread(reader);
+            });
+
+            target.join();
+            contender.join();
+            suspender.join();
+            reader.join();
+        }
+
+        public static void main(String[] args) throws Exception {
+            Test obj = new Test();
+            obj.runTest();
+        }
+
+        static void spinWaitMillis(long millis) {
+            long durationNanos = millis * 1_000_000L;
+            long start = System.nanoTime();
+            while (System.nanoTime() - start < durationNanos) {
+                Thread.onSpinWait();
+            }
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/serviceability/jvmti/vthread/SuspendResume4/libSuspendResume4.cpp b/test/hotspot/jtreg/serviceability/jvmti/vthread/SuspendResume4/libSuspendResume4.cpp
new file mode 100644
index 00000000000..38e90889b7a
--- /dev/null
+++ b/test/hotspot/jtreg/serviceability/jvmti/vthread/SuspendResume4/libSuspendResume4.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <jni.h>
+#include <jvmti.h>
+#include <stdio.h>
+#include <string.h>
+#include "jvmti_common.hpp"
+
+// set by Agent_OnLoad
+static jvmtiEnv* jvmti = nullptr;
+
+extern "C" {
+
+JNIEXPORT void JNICALL
+Java_SuspendResume4_suspendThread(JNIEnv* jni, jclass klass, jthread thread) {
+  jvmtiError err = jvmti->SuspendThread(thread);
+  if (err != JVMTI_ERROR_NONE && err != JVMTI_ERROR_THREAD_NOT_ALIVE) {
+    jni->FatalError("error in JVMTI SuspendThread");
+  }
+}
+
+JNIEXPORT void JNICALL
+Java_SuspendResume4_resumeThread(JNIEnv* jni, jclass klass, jthread thread) {
+  jvmtiError err = jvmti->ResumeThread(thread);
+  if (err != JVMTI_ERROR_NONE && err != JVMTI_ERROR_THREAD_NOT_ALIVE) {
+    jni->FatalError("error in JVMTI ResumeThread");
+  }
+}
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM* jvm, char* options, void* reserved) {
+  jvmtiCapabilities caps;
+  jvmtiError err;
+
+  printf("Agent_OnLoad: started\n");
+  if (jvm->GetEnv((void **) (&jvmti), JVMTI_VERSION) != JNI_OK) {
+    LOG("Agent_OnLoad: error in GetEnv");
+    return JNI_ERR;
+  }
+
+  memset(&caps, 0, sizeof(caps));
+  caps.can_suspend = 1;
+  err = jvmti->AddCapabilities(&caps);
+  if (err != JVMTI_ERROR_NONE) {
+    LOG("Agent_OnLoad: error in JVMTI AddCapabilities: %d\n", err);
+  }
+
+  printf("Agent_OnLoad: finished\n");
+
+  return 0;
+}
+
+} // extern "C"
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/GarbageGenerator.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/GarbageGenerator.java
index bb9c9afa394..bc3ada61136 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/GarbageGenerator.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/GarbageGenerator.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,8 +22,9 @@
  */
 
 package nsk.stress.jni;
+import jdk.test.lib.thread.ThreadWrapper;
 
-class GarbageGenerator extends Thread {
+class GarbageGenerator extends ThreadWrapper {
     class Garbage {
         Garbage() {
             this(1024);
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress001.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress001.java
index 0335942e775..16ccaa310a7 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress001.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress001.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,8 +61,9 @@ package nsk.stress.jni;
 import nsk.share.Consts;
 import nsk.share.Debug;
 import nsk.share.test.StressOptions;
+import jdk.test.lib.thread.ThreadWrapper;
 
-public class jnistress001 extends Thread {
+public class jnistress001 extends ThreadWrapper {
 
     /* Maximum number of iterations.    Ignored if <= 0L */
     static long numIteration = 0L;
@@ -256,8 +257,11 @@ public class jnistress001 extends Thread {
         garb = new GarbageGenerator[nGarb];
         for (i = 0; i < nJNI; i++)
             jniter[i] = new JNIter001(sync);
+        Thread[] jniterThreads = new Thread[nJNI];
+        for (i = 0; i < nJNI; i++)
+            jniterThreads[i] = jniter[i].getThread();
         for (i = 0; i < nInter; i++) {
-            irupt[i] = new Interrupter(jniter, sync);
+            irupt[i] = new Interrupter(jniterThreads, sync);
             irupt[i].setInterval(iruptInterval);
         }
         for (i = 0; i < nGarb; i++) {
@@ -372,7 +376,7 @@ public class jnistress001 extends Thread {
     final private static boolean DEBUG = false;
 }
 
-class JNIter001 extends Thread {
+class JNIter001 extends ThreadWrapper {
 
     // The native method for testing JNI UTF-8 calls
     public native String jnistress(String threadName, int nstr, int printPeriod);
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress002.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress002.java
index 1a9e2999f20..5b6912b623d 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress002.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress002.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,12 +60,13 @@ package nsk.stress.jni;
 import nsk.share.Consts;
 import nsk.share.Debug;
 import nsk.share.test.StressOptions;
+import jdk.test.lib.thread.ThreadWrapper;
 import jdk.test.lib.Utils;
 
 import java.lang.reflect.Field;
 import java.util.Random;
 
-public class jnistress002 extends Thread {
+public class jnistress002 extends ThreadWrapper {
 
     /* Maximum number of iterations.    Ignored if <= 0L */
     static long numIteration = 0L;
@@ -246,8 +247,11 @@ public class jnistress002 extends Thread {
         garb = new GarbageGenerator[nGarb];
         for (i = 0; i < nJNI; i++)
             jniter[i] = new JNIter002(sync);
+        Thread[] jniterThreads = new Thread[nJNI];
+        for (i = 0; i < nJNI; i++)
+            jniterThreads[i] = jniter[i].getThread();
         for (i = 0; i < nInter; i++) {
-            irupt[i] = new Interrupter(jniter, sync);
+            irupt[i] = new Interrupter(jniterThreads, sync);
             irupt[i].setInterval(iruptInterval);
         }
         for (i = 0; i < nGarb; i++) {
@@ -423,7 +427,7 @@ class objectsJNI {
     }
 }
 
-class JNIter002 extends Thread {
+class JNIter002 extends ThreadWrapper {
 
     // The native method for testing JNI Object's calls
     public native objectsJNI[] jniobjects(String s, int i, long l,
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress003.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress003.java
index e06f2e6279f..b1e2702011a 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress003.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress003.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,10 +60,11 @@ package nsk.stress.jni;
 import nsk.share.Consts;
 import nsk.share.Debug;
 import nsk.share.test.StressOptions;
+import jdk.test.lib.thread.ThreadWrapper;
 
 import java.lang.reflect.Array;
 
-public class jnistress003 extends Thread {
+public class jnistress003 extends ThreadWrapper {
 
     /* Maximum number of iterations.  Ignored if <= 0L */
     static long numIteration = 0L;
@@ -257,8 +258,11 @@ public class jnistress003 extends Thread {
         garb = new GarbageGenerator[nGarb];
         for (i = 0; i < nJNI; i++)
             jniter[i] = new JNIter003(sync);
+        Thread[] jniterThreads = new Thread[nJNI];
+        for (i = 0; i < nJNI; i++)
+            jniterThreads[i] = jniter[i].getThread();
         for (i = 0; i < nInter; i++) {
-            irupt[i] = new Interrupter(jniter, sync);
+            irupt[i] = new Interrupter(jniterThreads, sync);
             irupt[i].setInterval(iruptInterval);
         }
         for (i = 0; i < nGarb; i++) {
@@ -379,7 +383,7 @@ public class jnistress003 extends Thread {
     final private static boolean DEBUG = false;
 }
 
-class JNIter003 extends Thread {
+class JNIter003 extends ThreadWrapper {
 
     // The native methods for testing JNI Arrays calls
 
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress004.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress004.java
index 6432b0ccc3c..705ddd8a5f1 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress004.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress004.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,8 +60,9 @@ package nsk.stress.jni;
 import nsk.share.Consts;
 import nsk.share.Debug;
 import nsk.share.test.StressOptions;
+import jdk.test.lib.thread.ThreadWrapper;
 
-public class jnistress004 extends Thread {
+public class jnistress004 extends ThreadWrapper {
 
     /* Maximum number of iterations.  Ignored if <= 0L */
     static long numIteration = 2L;
@@ -243,8 +244,11 @@ public class jnistress004 extends Thread {
         garb = new GarbageGenerator[nGarb];
         for (i = 0; i < nJNI; i++)
             jniter[i] = new JNIter004(sync);
+        Thread[] jniterThreads = new Thread[nJNI];
+        for (i = 0; i < nJNI; i++)
+            jniterThreads[i] = jniter[i].getThread();
         for (i = 0; i < nInter; i++) {
-            irupt[i] = new Interrupter(jniter, sync);
+            irupt[i] = new Interrupter(jniterThreads, sync);
             irupt[i].setInterval(iruptInterval);
         }
         for (i = 0; i < nGarb; i++) {
@@ -364,7 +368,7 @@ public class jnistress004 extends Thread {
     final private static boolean DEBUG = false;
 }
 
-class JNIter004 extends Thread {
+class JNIter004 extends ThreadWrapper {
 
     // The native methods for testing JNI critical calls
     public native char[] CheckSum(String str);
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress005.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress005.java
index a841635462b..0144c116f1e 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress005.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress005.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,8 +61,9 @@ package nsk.stress.jni;
 import nsk.share.Consts;
 import nsk.share.Debug;
 import nsk.share.test.StressOptions;
+import jdk.test.lib.thread.ThreadWrapper;
 
-public class jnistress005 extends Thread {
+public class jnistress005 extends ThreadWrapper {
 
     /* Maximum number of iterations.  Ignored if <= 0L */
     static long numIteration = 0L;
@@ -244,8 +245,11 @@ public class jnistress005 extends Thread {
         garb = new GarbageGenerator[nGarb];
         for (i = 0; i < nJNI; i++)
             jniter[i] = new JNIter005(sync);
+        Thread[] jniterThreads = new Thread[nJNI];
+        for (i = 0; i < nJNI; i++)
+            jniterThreads[i] = jniter[i].getThread();
         for (i = 0; i < nInter; i++) {
-            irupt[i] = new Interrupter(jniter, sync);
+            irupt[i] = new Interrupter(jniterThreads, sync);
             irupt[i].setInterval(iruptInterval);
         }
         for (i = 0; i < nGarb; i++) {
@@ -366,7 +370,7 @@ public class jnistress005 extends Thread {
     final private static boolean DEBUG = false;
 }
 
-class JNIter005 extends Thread {
+class JNIter005 extends ThreadWrapper {
 
     // The native methods for testing JNI exception calls
     public native void except(Throwable tobj);
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress006.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress006.java
index b6646bb45d4..80178bdef28 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress006.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress006.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,8 +61,9 @@ package nsk.stress.jni;
 import nsk.share.Consts;
 import nsk.share.Debug;
 import nsk.share.test.StressOptions;
+import jdk.test.lib.thread.ThreadWrapper;
 
-public class jnistress006 extends Thread {
+public class jnistress006 extends ThreadWrapper {
 
     /* Maximum number of iterations.  Ignored if <= 0L */
     static long numIteration = 0L;
@@ -245,8 +246,11 @@ public class jnistress006 extends Thread {
         garb = new GarbageGenerator[nGarb];
         for (i = 0; i < nJNI; i++)
             jniter[i] = new JNIter006(sync);
+        Thread[] jniterThreads = new Thread[nJNI];
+        for (i = 0; i < nJNI; i++)
+            jniterThreads[i] = jniter[i].getThread();
         for (i = 0; i < nInter; i++) {
-            irupt[i] = new Interrupter(jniter, sync);
+            irupt[i] = new Interrupter(jniterThreads, sync);
             irupt[i].setInterval(iruptInterval);
         }
         for (i = 0; i < nGarb; i++) {
@@ -366,7 +370,7 @@ public class jnistress006 extends Thread {
     final private static boolean DEBUG = false;
 }
 
-class JNIter006 extends Thread {
+class JNIter006 extends ThreadWrapper {
 
     // The native methods for testing JNI exception calls
     public native boolean refs(Object tobj, int jniStringAllocSize);
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress007.java b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress007.java
index 7497660c9e0..727560fae30 100644
--- a/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress007.java
+++ b/test/hotspot/jtreg/vmTestbase/nsk/stress/jni/jnistress007.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,8 +61,9 @@ package nsk.stress.jni;
 import nsk.share.Consts;
 import nsk.share.Debug;
 import nsk.share.test.StressOptions;
+import jdk.test.lib.thread.ThreadWrapper;
 
-public class jnistress007 extends Thread {
+public class jnistress007 extends ThreadWrapper {
 
     /* Maximum number of iterations.  Ignored if <= 0L */
     static long numIteration = 0L;
@@ -244,8 +245,11 @@ public class jnistress007 extends Thread {
         garb = new GarbageGenerator[nGarb];
         for (i = 0; i < nJNI; i++)
             jniter[i] = new JNIter007(sync);
+        Thread[] jniterThreads = new Thread[nJNI];
+        for (i = 0; i < nJNI; i++)
+            jniterThreads[i] = jniter[i].getThread();
         for (i = 0; i < nInter; i++) {
-            irupt[i] = new Interrupter(jniter, sync);
+            irupt[i] = new Interrupter(jniterThreads, sync);
             irupt[i].setInterval(iruptInterval);
         }
         for (i = 0; i < nGarb; i++) {
@@ -364,7 +368,7 @@ public class jnistress007 extends Thread {
     final private static boolean DEBUG = false;
 }
 
-class JNIter007 extends Thread {
+class JNIter007 extends ThreadWrapper {
 
     // The native methods for testing JNI monitors calls
     public native void incCount(String name);
diff --git a/test/jdk/java/io/File/GetCanonicalPath.java b/test/jdk/java/io/File/GetCanonicalPath.java
index 14ef90260fc..f1d517c2202 100644
--- a/test/jdk/java/io/File/GetCanonicalPath.java
+++ b/test/jdk/java/io/File/GetCanonicalPath.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,7 +22,7 @@
  */
 
 /* @test
- * @bug 4899022 8003887 8355342
+ * @bug 4899022 8003887 8355342 8383867
  * @summary Look for erroneous representation of drive letter
  * @run junit GetCanonicalPath
  */
@@ -148,7 +148,14 @@ public class GetCanonicalPath {
         Runtime rt = Runtime.getRuntime();
         String share =
             "\\\\localhost\\" + cwd.charAt(0) + "$" + cwd.substring(2);
+        String junctionName = "tmpDir";
         try {
+            // create directory junction
+            Path tmpDir = Files.createTempDirectory(junctionName);
+            String tmpDirLink = cwd + "\\" + junctionName;
+            Process pmklink = rt.exec(new String[] {"cmd", "/c", "mklink", "/J", tmpDirLink, tmpDir.toString()});
+            assertEquals(0, pmklink.waitFor());
+
             Process p = rt.exec(new String[] {"net", "use", drive + ":", share});
             assertEquals(0, p.waitFor());
         } catch (InterruptedException x) {
@@ -157,13 +164,26 @@ public class GetCanonicalPath {
 
         // check that the canonical path name and its content are as expected
         try {
-            final String filename = "file.txt";
-            final String text = "This is some text";
-            Files.writeString(Path.of(share, filename), text);
-            File file = new File(drive + ":\\" + filename);
-            String canonicalPath = file.getCanonicalPath();
-            assertEquals(drive + ":\\" + filename, canonicalPath);
-            assertEquals(text, Files.readString(Path.of(canonicalPath)));
+            // use drive letter
+            {
+                final String filename = "file.txt";
+                final String text = "This is some text";
+                Files.writeString(Path.of(share, filename), text);
+                File file = new File(drive + ":\\" + filename);
+                String canonicalPath = file.getCanonicalPath();
+                assertEquals(drive + ":\\" + filename, canonicalPath);
+                assertEquals(text, Files.readString(Path.of(canonicalPath)));
+            }
+            // use reparse point (directory junction)
+            {
+                final String filename = junctionName + "\\file.txt";
+                final String text = "This is some text";
+                Files.writeString(Path.of(share, filename), text);
+                File file = new File(drive + ":\\" + filename);
+                String canonicalPath = file.getCanonicalPath();
+                assertTrue(canonicalPath.startsWith("\\\\localhost\\"));
+                assertEquals(text, Files.readString(Path.of(canonicalPath)));
+            }
         } finally {
             try {
                 Process p = rt.exec(new String[] {"net", "use", drive + ":", "/Delete"});
diff --git a/test/jdk/java/io/File/ListRoots.java b/test/jdk/java/io/File/ListRoots.java
index 7b14b2f3b08..d8717b0246a 100644
--- a/test/jdk/java/io/File/ListRoots.java
+++ b/test/jdk/java/io/File/ListRoots.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,9 +24,11 @@
 /* @test
    @bug 4071322
    @summary Basic test for File.listRoots method
+   @run junit ListRoots
  */
 
 import java.io.File;
+import java.io.IOException;
 import java.nio.file.FileSystem;
 import java.nio.file.FileSystems;
 import java.nio.file.Path;
@@ -35,33 +37,57 @@ import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.DisabledOnOs;
+import org.junit.jupiter.api.condition.EnabledOnOs;
+import org.junit.jupiter.api.condition.OS;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
 public class ListRoots {
 
-    public static void main(String[] args) throws Exception {
+    private static Set<File> expectedSet;
+    private static Set<File> actualSet;
+
+    @BeforeAll
+    public static void init() throws IOException {
         File[] rs = File.listRoots();
         for (int i = 0; i < rs.length; i++) {
-            System.out.println(i + ": " + rs[i]);
-        }
-
-        File f = new File(System.getProperty("test.src", "."), "ListRoots.java");
-        String cp = f.getCanonicalPath();
-        boolean found = Stream.of(rs)
-                .map(File::getPath)
-                .anyMatch(p -> cp.startsWith(p));
-        if (!found) {
-            throw new RuntimeException(cp + " does not have a recognized root");
+            System.err.println(i + ": " + rs[i]);
         }
 
         // the list of roots should match FileSystem::getRootDirectories
-        Set<File> roots1 = Stream.of(rs).collect(Collectors.toSet());
         FileSystem fs = FileSystems.getDefault();
-        Set<File> roots2 = StreamSupport.stream(fs.getRootDirectories().spliterator(), false)
-                .map(Path::toFile)
-                .collect(Collectors.toSet());
-        if (!roots1.equals(roots2)) {
-            System.out.println(roots2);
-            throw new RuntimeException("Does not match FileSystem::getRootDirectories");
-        }
+        expectedSet =
+            StreamSupport.stream(fs.getRootDirectories().spliterator(), false)
+                         .map(Path::toFile)
+                         .collect(Collectors.toSet());
+        actualSet = Stream.of(rs).collect(Collectors.toSet());
     }
 
+    @Test
+    public void checkRoot() throws IOException {
+        File f = new File(System.getProperty("user.dir"));
+        String cp = f.getCanonicalPath();
+        boolean found = Stream.of(File.listRoots())
+                .map(File::getPath)
+                .anyMatch(p -> cp.startsWith(p));
+        assertTrue(found, cp + " does not have a recognized root");
+    }
+
+    @Test
+    @DisabledOnOs(OS.WINDOWS)
+    public void listRootsUnix() throws IOException {
+        assertEquals(expectedSet, actualSet,
+                     "Does not equal FileSystem::getRootDirectories");
+    }
+
+    @Test
+    @EnabledOnOs(OS.WINDOWS)
+    public void listRootsWindows() throws IOException {
+        assertTrue(expectedSet.stream().anyMatch(actualSet::contains),
+                   "Does not intersect FileSystem::getRootDirectories");
+    }
 }
diff --git a/test/jdk/java/text/Format/ListFormat/TestListFormat.java b/test/jdk/java/text/Format/ListFormat/TestListFormat.java
index bcccc2873f5..1500a1b0818 100644
--- a/test/jdk/java/text/Format/ListFormat/TestListFormat.java
+++ b/test/jdk/java/text/Format/ListFormat/TestListFormat.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
 
 /*
  * @test
- * @bug 8041488 8316974 8318569 8306116
+ * @bug 8041488 8316974 8318569 8306116 8385736
  * @summary Tests for ListFormat class
  * @run junit TestListFormat
  */
@@ -66,6 +66,14 @@ public class TestListFormat {
             "",
             "",
     };
+    // Ensures regex metacharacters in custom patterns are treated as literals.
+    private static final String[] CUSTOM_PATTERNS_METACHAR = {
+            ". {0} * {1}",
+            "{0} + {1}",
+            "{0} | {1} [",
+            "",
+            "",
+    };
     private static final String[] CUSTOM_PATTERNS_IAE_START = {
             "{0}",
             "{0} mid {1}",
@@ -120,7 +128,7 @@ public class TestListFormat {
         assertEquals(ListFormat.getInstance(), ListFormat.getInstance(Locale.getDefault(Locale.Category.FORMAT), ListFormat.Type.STANDARD, ListFormat.Style.FULL));
     }
 
-    static Arguments[] getInstance_1Arg() {
+    private static Arguments[] getInstance_1Arg() {
         return new Arguments[] {
                 arguments(CUSTOM_PATTERNS_FULL, SAMPLE1, "foo"),
                 arguments(CUSTOM_PATTERNS_FULL, SAMPLE2, "twobef foo two bar twoaft"),
@@ -130,10 +138,14 @@ public class TestListFormat {
                 arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE2, "sbef foo ebet bar eaft"),
                 arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE3, "sbef foo sbet bar ebet baz eaft"),
                 arguments(CUSTOM_PATTERNS_MINIMAL, SAMPLE4, "sbef foo sbet bar mid baz ebet qux eaft"),
+                arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE1, "foo"),
+                arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE2, ". foo | bar ["),
+                arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE3, ". foo * bar | baz ["),
+                arguments(CUSTOM_PATTERNS_METACHAR, SAMPLE4, ". foo * bar + baz | qux ["),
         };
     }
 
-    static Arguments[] getInstance_1Arg_IAE() {
+    private static Arguments[] getInstance_1Arg_IAE() {
         return new Arguments[] {
                 arguments(new String[1], "Pattern array length should be 5"),
                 arguments(new String[6], "Pattern array length should be 5"),
@@ -146,7 +158,7 @@ public class TestListFormat {
         };
     }
 
-    static Arguments[] getInstance_3Arg() {
+    private static Arguments[] getInstance_3Arg() {
         return new Arguments[] {
                 arguments(Locale.US, ListFormat.Type.STANDARD, ListFormat.Style.FULL,
                         "foo, bar, and baz", true),
@@ -188,7 +200,7 @@ public class TestListFormat {
         };
     }
 
-    static Arguments[] parseObject_parsePos() {
+    private static Arguments[] parseObject_parsePos() {
         return new Arguments[] {
                 arguments(CUSTOM_PATTERNS_FULL, SAMPLE1),
                 arguments(CUSTOM_PATTERNS_FULL, SAMPLE2),
@@ -201,7 +213,7 @@ public class TestListFormat {
         };
     }
 
-    static Arguments[] getInstance_3Arg_InheritPatterns() {
+    private static Arguments[] getInstance_3Arg_InheritPatterns() {
         return new Arguments[] {
                 arguments(ListFormat.Type.STANDARD, ListFormat.Style.FULL),
                 arguments(ListFormat.Type.STANDARD, ListFormat.Style.SHORT),
@@ -215,7 +227,7 @@ public class TestListFormat {
         };
     }
 
-    static Arguments[] getLocale_localeDependent() {
+    private static Arguments[] getLocale_localeDependent() {
         return new Arguments[] {
                 arguments(Locale.ROOT),
                 arguments(Locale.US),
@@ -225,6 +237,16 @@ public class TestListFormat {
         };
     }
 
+    private static Arguments[] getInstance_1Arg_InvalidLongPattern() {
+        return new Arguments[] {
+                arguments(0, "start pattern is incorrect:"),
+                arguments(1, "middle pattern is incorrect:"),
+                arguments(2, "end pattern is incorrect:"),
+                arguments(3, "pattern for two is incorrect:"),
+                arguments(4, "pattern for three is incorrect:"),
+        };
+    }
+
     @ParameterizedTest
     @MethodSource
     void getInstance_1Arg(String[] patterns, List<String> input, String expected) throws ParseException {
@@ -240,6 +262,25 @@ public class TestListFormat {
         assertEquals(errorMsg, ex.getMessage());
     }
 
+    @ParameterizedTest
+    @MethodSource
+    void getInstance_1Arg_InvalidLongPattern(int index, String expected) {
+        var patterns = new String[]{
+            "{0}, {1}",
+            "{0}, {1}",
+            "{0}, and {1}",
+            "{0} and {1}",
+            "{0} {1} {2}"
+        };
+        patterns[index] = "{0}".repeat(100_000);
+
+        // Ensures validation of invalid long patterns completes without timing out
+        var msg = assertThrows(IllegalArgumentException.class,
+                               () -> ListFormat.getInstance(patterns))
+            .getMessage();
+        assertEquals(expected, msg.substring(0, Math.min(msg.length(), expected.length())));
+    }
+
     @ParameterizedTest
     @MethodSource
     void getInstance_3Arg(Locale l, ListFormat.Type type, ListFormat.Style style, String expected, boolean roundTrip) throws ParseException {
@@ -348,6 +389,7 @@ public class TestListFormat {
         // should be inherited from parent locales.
         Locale.availableLocales().forEach(l -> ListFormat.getInstance(l, type, style));
     }
+
     @Test
     void getInstance_3Arg_InheritanceValidation() {
         // Tests if inheritance works as expected.
diff --git a/test/jdk/java/util/zip/GZIP/BasicGZIPInputStreamTest.java b/test/jdk/java/util/zip/GZIP/BasicGZIPInputStreamTest.java
index 1f52d44146e..642b0bafa66 100644
--- a/test/jdk/java/util/zip/GZIP/BasicGZIPInputStreamTest.java
+++ b/test/jdk/java/util/zip/GZIP/BasicGZIPInputStreamTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,15 +22,21 @@
  */
 
 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.stream.Stream;
 import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
 
 import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.function.Executable;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
 
 /*
  * @test
@@ -51,7 +57,7 @@ public class BasicGZIPInputStreamTest {
     @ParameterizedTest
     @MethodSource("npeFromConstructors")
     public void testNPEFromConstructors(final Executable constructor) {
-        Assertions.assertThrows(NullPointerException.class, constructor,
+        assertThrows(NullPointerException.class, constructor,
                 "GZIPInputStream constructor did not throw NullPointerException");
     }
 
@@ -71,7 +77,7 @@ public class BasicGZIPInputStreamTest {
     @ParameterizedTest
     @MethodSource("iaeFromConstructors")
     public void testIAEFromConstructors(final Executable constructor) {
-        Assertions.assertThrows(IllegalArgumentException.class, constructor,
+        assertThrows(IllegalArgumentException.class, constructor,
                 "GZIPInputStream constructor did not throw IllegalArgumentException");
     }
 
@@ -89,7 +95,29 @@ public class BasicGZIPInputStreamTest {
     @ParameterizedTest
     @MethodSource("ioeFromConstructors")
     public void testIOEFromConstructors(final Executable constructor) {
-        Assertions.assertThrows(IOException.class, constructor,
+        assertThrows(IOException.class, constructor,
                 "GZIPInputStream constructor did not throw IOException");
     }
+
+    /*
+     * Verifies that GZIPInputStream.read() throws IOException when invoked on a closed
+     * stream
+     */
+    @Test
+    void testClosedStreamRead() throws Exception {
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        try (GZIPOutputStream gzos = new GZIPOutputStream(baos)) {
+            gzos.write(new byte[] {0x42, 0x42}); // GZIP compress these input bytes
+        }
+        final byte[] gzipCompressed = baos.toByteArray();
+        // create the GZIPInputStream to test
+        final GZIPInputStream in = new GZIPInputStream(new ByteArrayInputStream(gzipCompressed));
+        in.close();
+        final IOException ioe = assertThrows(IOException.class, () -> in.read(new byte[1], 0, 1));
+        final String exMessage = ioe.getMessage();
+        if (exMessage == null || !exMessage.contains("Stream closed")) {
+            // unexpected exception message, propagate the original exception
+            throw ioe;
+        }
+    }
 }
diff --git a/test/jdk/java/util/zip/GZIP/GZIPInputStreamRead.java b/test/jdk/java/util/zip/GZIP/GZIPInputStreamRead.java
index 56bd58e1aaf..9cc13b29f34 100644
--- a/test/jdk/java/util/zip/GZIP/GZIPInputStreamRead.java
+++ b/test/jdk/java/util/zip/GZIP/GZIPInputStreamRead.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -21,81 +21,201 @@
  * questions.
  */
 
-/* @test
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import jdk.test.lib.RandomFactory;
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/*
+ * @test
  * @bug 4691425
  * @summary Test the read and write of GZIPInput/OutputStream, including
  *          concatenated .gz inputstream
  * @key randomness
+ * @library /test/lib
+ * @build jdk.test.lib.RandomFactory
+ * @run junit ${test.main.class}
  */
+class GZIPInputStreamRead {
 
-import java.io.*;
-import java.util.*;
-import java.util.zip.*;
+    private static final Random random = RandomFactory.getRandom();
 
-public class GZIPInputStreamRead {
-    public static void main(String[] args) throws Throwable {
-        Random rnd = new Random();
-        for (int i = 1; i < 100; i++) {
-            int members = rnd.nextInt(10) + 1;
+    /*
+     * Generates GZIP content containing multiple members and then verifies
+     * that using GZIPInputStream to decompress that content generates the correct
+     * expected decompressed data.
+     */
+    @Test
+    void testMultipleMembers() throws Exception {
+        final int numMembers = random.nextInt(10) + 1;
+        final ByteArrayOutputStream rawUncompressedBaos = new ByteArrayOutputStream();
+        final ByteArrayOutputStream gzipCompressedBaos = new ByteArrayOutputStream();
+        // generate GZIP content with multiple members
+        for (int j = 0; j < numMembers; j++) {
+            byte[] src = new byte[random.nextInt(8192) + 1];
+            random.nextBytes(src);
+            rawUncompressedBaos.write(src);
 
-            ByteArrayOutputStream srcBAOS = new ByteArrayOutputStream();
-            ByteArrayOutputStream dstBAOS = new ByteArrayOutputStream();
-            for (int j = 0; j < members; j++) {
-                byte[] src = new byte[rnd.nextInt(8192) + 1];
-                rnd.nextBytes(src);
-                srcBAOS.write(src);
-
-                try (GZIPOutputStream gzos = new GZIPOutputStream(dstBAOS)) {
-                    gzos.write(src);
-                }
-            }
-            byte[] srcBytes = srcBAOS.toByteArray();
-            byte[] dstBytes = dstBAOS.toByteArray();
-            // try different size of buffer to read the
-            // GZIPInputStream
-            /* just for fun when running manually
-            for (int j = 1; j < 10; j++) {
-                test(srcBytes, dstBytes, j);
-            }
-            */
-            for (int j = 0; j < 10; j++) {
-                int readBufSZ = rnd.nextInt(2048) + 1;
-                test(srcBytes,
-                     dstBytes,
-                     readBufSZ,
-                     512);    // the defualt buffer size
-                test(srcBytes,
-                     dstBytes,
-                     readBufSZ,
-                     rnd.nextInt(4096) + 1);
+            try (GZIPOutputStream gzos = new GZIPOutputStream(gzipCompressedBaos)) {
+                gzos.write(src);
             }
         }
+        final byte[] uncompressedRawBytes = rawUncompressedBaos.toByteArray();
+        final byte[] gzipCompressedBytes = gzipCompressedBaos.toByteArray();
+        // decompress using GZIPInputStream and verify the decompressed output.
+        // use different input buffer size for GZIPInputStream when running the verification.
+        for (int j = 0; j < 10; j++) {
+            final int readBufSZ = random.nextInt(2048) + 1;
+            verifyDecompressed(uncompressedRawBytes,
+                    gzipCompressedBytes,
+                    readBufSZ,
+                    512);    // the default input buffer size
+            verifyDecompressed(uncompressedRawBytes,
+                    gzipCompressedBytes,
+                    readBufSZ,
+                    random.nextInt(4096) + 1);
+        }
     }
 
-    private static void test(byte[] src, byte[] dst,
-                             int readBufSize, int gzisBufSize)
-        throws Throwable
-    {
-        try (ByteArrayInputStream bais = new ByteArrayInputStream(dst);
-             GZIPInputStream gzis = new GZIPInputStream(bais, gzisBufSize))
-        {
-            byte[] result = new byte[src.length + 10];
+    /*
+     * Generates GZIP content containing one member followed by some arbitrary non-member data.
+     * The test then verifies that using GZIPInputStream to decompress that content generates
+     * the correct expected decompressed data.
+     */
+    @Test
+    void testNonMemberAfterTrailer() throws Exception {
+        final byte[] rawUncompressed = new byte[random.nextInt(1234)];
+        random.nextBytes(rawUncompressed);
+        final ByteArrayOutputStream gzipCompressedPlusExtra = new ByteArrayOutputStream();
+        // generate a valid GZIP member
+        try (GZIPOutputStream gzos = new GZIPOutputStream(gzipCompressedPlusExtra)) {
+            gzos.write(rawUncompressed); // GZIP compress
+        }
+        final int numCompressedBytes = gzipCompressedPlusExtra.size();
+        // past the GZIP trailer, write some additional bytes that doesn't represent a GZIP member
+        final byte[] notGZIPMagic = ByteBuffer.allocate(Integer.BYTES).
+                putInt(GZIPInputStream.GZIP_MAGIC + 42)
+                .array();
+        gzipCompressedPlusExtra.write(notGZIPMagic);
+        assertEquals(numCompressedBytes + notGZIPMagic.length, gzipCompressedPlusExtra.size(),
+                "unexpected number of compressed + extra bytes");
+        // now use GZIPInputStream to decompress the compressed plus extra bytes and verify
+        // that the extra bytes don't cause unexpected decompressed output
+        final ByteArrayOutputStream decompressedBaos = new ByteArrayOutputStream();
+        int n = 0;
+        try (ByteArrayInputStream bais = new ByteArrayInputStream(gzipCompressedPlusExtra.toByteArray());
+             GZIPInputStream gzipIn = new GZIPInputStream(bais)) {
+
+            final byte[] tmpBuf = new byte[42];
+            while ((n = gzipIn.read(tmpBuf)) != -1) {
+                decompressedBaos.write(tmpBuf, 0, n);
+            }
+            final byte[] decompressed = decompressedBaos.toByteArray();
+            // verify the decompressed content
+            assertEquals(rawUncompressed.length, decompressed.length,
+                    "unexpected number of decompressed bytes");
+            assertArrayEquals(rawUncompressed, decompressed, "unexpected decompressed data");
+            // make sure additional calls to read still return EOF
+            assertEquals(-1, gzipIn.read(), "unexpected return from read(), expected EOF");
+            assertEquals(-1, gzipIn.read(new byte[10]), "unexpected return from read(), expected EOF");
+        }
+    }
+
+    /*
+     * Verifies that the InputStream.available() method is invoked on the underlying InputStream
+     * to determine presence of additional GZIP members in the stream.
+     */
+    @Test
+    void testInputStreamAvailableCalled() throws Exception {
+        final byte[] rawUncompressedMember1 = new byte[random.nextInt(111)];
+        random.nextBytes(rawUncompressedMember1);
+        System.err.println("GZIP member 1 has " + rawUncompressedMember1.length + " bytes");
+
+        final byte[] rawUncompressedMember2 = new byte[random.nextInt(33)];
+        random.nextBytes(rawUncompressedMember2);
+        System.err.println("GZIP member 2 has " + rawUncompressedMember2.length + " bytes");
+
+        final ByteArrayOutputStream twoMemberGzipCompressedBaos = new ByteArrayOutputStream();
+        // generate GZIP format data with 2 valid GZIP members
+        try (GZIPOutputStream gzos = new GZIPOutputStream(twoMemberGzipCompressedBaos)) {
+            gzos.write(rawUncompressedMember1); // GZIP compress
+            gzos.write(rawUncompressedMember2); // GZIP compress
+        }
+        final byte[] gzipCompressed = twoMemberGzipCompressedBaos.toByteArray();
+        final AtomicBoolean availableInvoked = new AtomicBoolean();
+        // an InputStream which tracks the calls to available()
+        final ByteArrayInputStream underlying = new ByteArrayInputStream(gzipCompressed) {
+            @Override
+            public int available() {
+                availableInvoked.set(true);
+                return super.available();
+            }
+        };
+        // now use GZIPInputStream to decompress the compressed data and expect the decompressed
+        // data to be correct and also expect the InputStream.available() to have been invoked
+        final ByteArrayOutputStream decompressedBaos = new ByteArrayOutputStream();
+        int n = 0;
+        try (GZIPInputStream gzipIn = new GZIPInputStream(underlying)) {
+
+            final byte[] tmpBuf = new byte[1024];
+            while ((n = gzipIn.read(tmpBuf)) != -1) {
+                decompressedBaos.write(tmpBuf, 0, n);
+            }
+            assertTrue(availableInvoked.get(), "InputStream.available() wasn't invoked");
+            final byte[] decompressed = decompressedBaos.toByteArray();
+            // verify the decompressed content, it should represent the two GZIP members
+            assertEquals(rawUncompressedMember1.length + rawUncompressedMember2.length,
+                    decompressed.length, "unexpected number of decompressed bytes");
+
+            assertArrayEquals(rawUncompressedMember1,
+                    Arrays.copyOfRange(decompressed, 0, rawUncompressedMember1.length),
+                    "unexpected decompressed data of first member");
+
+            assertArrayEquals(rawUncompressedMember2,
+                    Arrays.copyOfRange(decompressed, rawUncompressedMember1.length, decompressed.length),
+                    "unexpected decompressed data of second member");
+
+            // make sure additional calls to read still return EOF
+            assertEquals(-1, gzipIn.read(), "unexpected return from read(), expected EOF");
+            assertEquals(-1, gzipIn.read(new byte[42]), "unexpected return from read(), expected EOF");
+        }
+    }
+
+    // verify that decompressing the gzipCompressed data using GZIPInputStream
+    // generates the expected output
+    private static void verifyDecompressed(final byte[] rawUncompressed,
+                                           final byte[] gzipCompressed,
+                                           final int readBufSize, final int gzisBufSize)
+            throws IOException {
+        try (ByteArrayInputStream bais = new ByteArrayInputStream(gzipCompressed);
+             GZIPInputStream gzis = new GZIPInputStream(bais, gzisBufSize)) {
+
+            byte[] result = new byte[rawUncompressed.length + 10];
             byte[] buf = new byte[readBufSize];
             int n = 0;
-            int off = 0;
-
+            int numDecompressed = 0;
             while ((n = gzis.read(buf, 0, buf.length)) != -1) {
-                System.arraycopy(buf, 0, result, off, n);
-                off += n;
+                System.arraycopy(buf, 0, result, numDecompressed, n);
+                numDecompressed += n;
                 // no range check, if overflow, let it fail
             }
-            if (off != src.length || gzis.available() != 0 ||
-                !Arrays.equals(src, Arrays.copyOf(result, off))) {
-                throw new RuntimeException(
-                    "GZIPInputStream reading failed! " +
-                    ", src.len=" + src.length +
-                    ", read=" + off);
-            }
+            assertEquals(rawUncompressed.length, numDecompressed,
+                    "unexpected number of decompressed bytes");
+            assertEquals(0, gzis.available(),
+                    "unexpected additional bytes available in the GZIPInputStream");
+            assertArrayEquals(rawUncompressed, Arrays.copyOf(result, numDecompressed),
+                    "unexpected decompressed data");
         }
     }
 }
diff --git a/test/jdk/sun/security/ssl/SSLEngineImpl/KeyUpdateOnce.java b/test/jdk/sun/security/ssl/SSLEngineImpl/KeyUpdateOnce.java
new file mode 100644
index 00000000000..298ba86250a
--- /dev/null
+++ b/test/jdk/sun/security/ssl/SSLEngineImpl/KeyUpdateOnce.java
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8329548
+ * @library ../../
+ *          /test/lib
+ *          /javax/net/ssl/templates
+ * @summary Verify KeyUpdate messages skipped after first one sent.
+ *
+ * @run main KeyUpdateOnce server TLS_AES_256_GCM_SHA384 200000
+ * @run main KeyUpdateOnce client TLS_AES_256_GCM_SHA384 200000
+ */
+
+/*
+ * This test runs in another process so we can monitor the debug
+ * results.  The OutputAnalyzer must see correct debug output to return a
+ * success.
+ */
+
+import jdk.test.lib.Utils;
+import jdk.test.lib.process.OutputAnalyzer;
+import jdk.test.lib.process.ProcessTools;
+
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.SSLEngine;
+import javax.net.ssl.SSLEngineResult;
+
+import java.lang.reflect.Field;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * This server/client TLS test will force side A to stop reading as it
+ * continuously writes out.  These write ops will trigger the side B to
+ * request a KeyUpdate.  With side A not reading, side B must skip
+ * sending more KeyUpdate messages.  Only one KeyUpdate message will be
+ * sent by side B.
+ *
+ * This test depends on debug messages string match.  Changing the KeyUpdate-
+ * related messages may cause a failure.
+ */
+
+public class KeyUpdateOnce extends SSLContextTemplate {
+
+    private static final int DATALEN = 10240;
+    private static final int BUF_DATALEN = 4 * DATALEN;
+    private static final int MAXLOOPS = 150;
+    private static final int COUNTDOWNLIMIT = 5;
+
+    private static final boolean DEBUG = true;
+
+    private static ByteBuffer cTos;
+    private static ByteBuffer sToc;
+    private static ByteBuffer outData;
+    private final ByteBuffer inData;
+
+    // thread flags
+    private static boolean ready = false;
+    private static boolean sc = true;
+    private static boolean readDone = false;
+    private static boolean serverWrites = true;
+
+    private static long newLimit;
+
+    // Reflection handle captured on read side once handshake completes
+    private static Object readSideInputRecord = null;
+
+    protected SSLEngine engine;
+    private final int delay = 1;
+    private int totalDataLen = 0;
+
+    KeyUpdateOnce() {
+        this.inData = ByteBuffer.allocate(BUF_DATALEN);
+    }
+
+    /**
+     * args should have:
+     *   server|client, cipherSuite, <limit size>
+     *
+     * Prepending 'p' is for internal use only (test harness relaunch).
+     */
+    public static void main(String[] args) throws Exception {
+        for (String arg : args) {
+            System.out.print(" " + arg);
+        }
+        System.out.println();
+
+        // Harness mode: relaunch self with 'p' to force add-opens + debugging flags
+        if (!"p".equals(args[0])) {
+            // args[]: 0 = client/server, 1 = cipher suite, 2 = newLimit
+            System.setProperty("test.java.opts",
+                System.getProperty("test.java.opts") +
+                    " -Dtest.src=" + System.getProperty("test.src") +
+                    " -Dtest.jdk=" + System.getProperty("test.jdk") +
+                    " -Djavax.net.debug=ssl,handshake" +
+                    " -Djavatest.maxOutputSize=99999999" +
+                    " --add-opens java.base/sun.security.ssl=ALL-UNNAMED");
+
+            System.out.println("test.java.opts: " +
+                System.getProperty("test.java.opts"));
+
+            ProcessBuilder pb = ProcessTools.createTestJavaProcessBuilder(
+                Utils.addTestJavaOpts("KeyUpdateOnce", "p", args[0],
+                    args[1], args[2]));
+
+            OutputAnalyzer output = ProcessTools.executeProcess(pb);
+            try {
+                output.shouldContain(String.format(
+                    "\"cipher suite\"        : \"%s", args[1]));
+                System.err.println("Output logs should show KeyUpdate has" +
+                    " been sent and skipped");
+                List<String> producedList = output.asLines().stream()
+                    .filter(s -> s.contains("Produced KeyUpdate"))
+                    .toList();
+                List<String> skippingList = output.asLines().stream()
+                    .filter(s -> s.contains("KeyUpdate already sent, skipping"))
+                    .toList();
+                producedList.forEach(System.err::println);
+                skippingList.forEach(System.err::println);
+                System.err.println("\"Produced KeyUpdate\" count = " + producedList.size());
+                System.err.println("\"KeyUpdate already send, skipping\" count = " + skippingList.size());
+
+                /*
+                 * Sometimes debug messages may not be consistent.  The below
+                 * checks verify that at least 1 of each message were received.
+                 */
+                // Ideally there should be 2 "Produced KeyUpdate"
+                if (producedList.isEmpty()) {
+                    throw new AssertionError("No \"Produced KeyUpdate\"");
+                }
+                // Ideally there should be 5 "KeyUpdate already send, skipping"
+                if (skippingList.isEmpty()) {
+                    throw new AssertionError("No \"KeyUpdate already send, skipping\"");
+                }
+
+            } finally {
+                System.out.println("-- BEGIN Stdout:");
+                System.out.println(output.getStdout());
+                System.out.println("-- END Stdout");
+                System.out.println("-- BEGIN Stderr:");
+                System.out.println(output.getStderr());
+                System.out.println("-- END Stderr");
+            }
+            return;
+        }
+
+        // Worker mode:
+        // args[]: 0 = p, 1 = client/server, 2 = cipher suite, 3 = newLimit
+        serverWrites = !"client".equals(args[1]);
+        newLimit = Long.parseLong(args[3]);
+
+        cTos = ByteBuffer.allocateDirect(BUF_DATALEN);
+        sToc = ByteBuffer.allocateDirect(BUF_DATALEN);
+        outData = ByteBuffer.allocateDirect(DATALEN);
+
+        byte[] data = new byte[DATALEN];
+        Arrays.fill(data, (byte) 0x0A);
+        outData.put(data).flip();
+
+        cTos.clear();
+        sToc.clear();
+
+        Thread peer = new Thread(serverWrites ? new Client() :
+            new Server(args[2]));
+        peer.start();
+
+        (serverWrites ? new Server(args[2]) : new Client()).run();
+
+        peer.interrupt();
+        peer.join();
+    }
+
+    private static void doTask(SSLEngineResult result, SSLEngine engine)
+        throws Exception {
+        if (result.getHandshakeStatus() ==
+            SSLEngineResult.HandshakeStatus.NEED_TASK) {
+            Runnable runnable;
+            while ((runnable = engine.getDelegatedTask()) != null) {
+                print("\trunning delegated task...");
+                runnable.run();
+            }
+            SSLEngineResult.HandshakeStatus hsStatus =
+                engine.getHandshakeStatus();
+            if (hsStatus == SSLEngineResult.HandshakeStatus.NEED_TASK) {
+                throw new Exception("handshake shouldn't need additional tasks");
+            }
+            print("\tnew HandshakeStatus: " + hsStatus);
+        }
+    }
+
+    private static void print(String s) {
+        if (DEBUG) {
+            System.err.println(s);
+        }
+    }
+
+    private static void log(String s, SSLEngineResult r) {
+        if (DEBUG) {
+            System.err.println(s + ": " +
+                r.getStatus() + "/" + r.getHandshakeStatus() + " " +
+                r.bytesConsumed() + "/" + r.bytesProduced());
+        }
+    }
+
+    private static void dumpBuffers(String aName, ByteBuffer a) {
+        if (DEBUG) {
+            System.err.println(aName + " pos=" + a.position() +
+                " rem=" + a.remaining() +
+                " lim=" + a.limit() + " cap=" + a.capacity());
+        }
+    }
+
+    void writeLoop() throws Exception {
+        int i = 0;
+        SSLEngineResult r;
+        int countdown = COUNTDOWNLIMIT;
+
+        while (!ready) {
+            Thread.sleep(delay);
+        }
+
+        print("Write-side begins");
+
+        while (i++ < MAXLOOPS) {
+            while (sc) {
+                if (readDone) {
+                    return;
+                }
+                Thread.sleep(delay);
+            }
+
+            outData.rewind();
+
+            while (true) {
+                r = engine.wrap(outData, getWriteBuf());
+                log("write wrap", r);
+
+                if (DEBUG && r.getStatus() != SSLEngineResult.Status.OK) {
+                    dumpBuffers("outData", outData);
+                    dumpBuffers("writeBuf", getWriteBuf());
+                }
+
+                if (r.getStatus() == SSLEngineResult.Status.OK &&
+                    r.getHandshakeStatus() ==
+                        SSLEngineResult.HandshakeStatus.NEED_WRAP) {
+                    continue;
+                }
+                break;
+            }
+
+            doTask(r, engine);
+
+            getWriteBuf().flip();
+            sc = true;
+
+            while (sc) {
+                if (readDone) {
+                    return;
+                }
+                Thread.sleep(delay);
+            }
+
+            long rlimit = Long.MAX_VALUE;
+            if (readSideInputRecord != null) {
+                rlimit = getReadLimit(readSideInputRecord);
+            }
+            if (rlimit <= 0) {
+                countdown--;
+            }
+            System.err.println("Write side readLimit = " + rlimit);
+
+            if (countdown == COUNTDOWNLIMIT || countdown <= 0) {
+                inData.clear();
+                r = engine.unwrap(getReadBuf(), inData);
+                log("write unwrap", r);
+
+                if (DEBUG && r.getStatus() != SSLEngineResult.Status.OK) {
+                    dumpBuffers("inData", inData);
+                    dumpBuffers("readBuf", getReadBuf());
+                }
+            } else {
+                print("write side unwrap skipped");
+            }
+
+            doTask(r, engine);
+            getReadBuf().compact();
+            dumpBuffers("compacted getReadBuf()", getReadBuf());
+            sc = true;
+        }
+    }
+
+    void readLoop() throws Exception {
+        byte b = 0x0B;
+        ByteBuffer buf = ByteBuffer.allocateDirect(DATALEN);
+
+        SSLEngineResult r = null;
+        boolean again = true;
+        boolean firstNotHandshake = false;
+
+        while (engine == null) {
+            Thread.sleep(delay);
+        }
+
+        try {
+            System.out.println("connected");
+            print("entering read loop");
+            ready = true;
+
+            while (true) {
+                while (!sc) {
+                    Thread.sleep(delay);
+                }
+
+                boolean exit = false;
+                while (!exit) {
+                    buf.put(b);
+                    buf.flip();
+
+                    r = engine.wrap(buf, getWriteBuf());
+                    log("read wrap", r);
+
+                    if (DEBUG) {
+                        dumpBuffers("buf", buf);
+                        dumpBuffers( "writeBuf", getWriteBuf());
+                    }
+
+                    if (again && r.getStatus() == SSLEngineResult.Status.OK &&
+                        r.getHandshakeStatus() ==
+                            SSLEngineResult.HandshakeStatus.NEED_WRAP) {
+                        buf.compact();
+                        again = false;
+                        continue;
+                    }
+                    exit = true;
+                }
+
+                doTask(r, engine);
+
+                buf.clear();
+                getWriteBuf().flip();
+                sc = false;
+
+                while (!sc) {
+                    Thread.sleep(delay);
+                }
+
+                while (true) {
+                    inData.clear();
+                    r = engine.unwrap(getReadBuf(), inData);
+                    log("read unwrap", r);
+
+                    if (DEBUG && r.getStatus() != SSLEngineResult.Status.OK) {
+                        dumpBuffers("inData", inData);
+                        dumpBuffers("readBuf", getReadBuf());
+
+                        doTask(r, engine);
+                    }
+
+                    if (again && r.getStatus() == SSLEngineResult.Status.OK &&
+                        r.getHandshakeStatus() ==
+                            SSLEngineResult.HandshakeStatus.NEED_UNWRAP) {
+                        inData.clear();
+                        print("again");
+                        again = false;
+                        continue;
+                    }
+                    break;
+                }
+
+                inData.clear();
+                getReadBuf().compact();
+
+                totalDataLen += r.bytesProduced();
+                sc = false;
+
+                if (!firstNotHandshake &&
+                    r.getHandshakeStatus() ==
+                        SSLEngineResult.HandshakeStatus.NOT_HANDSHAKING) {
+
+                    try {
+                        readSideInputRecord = getInputRecord(engine);
+                        setReadLimit(readSideInputRecord, newLimit);
+                    } catch (Exception e) {
+                        throw new RuntimeException(e);
+                    }
+                    System.err.println("Resetting readside");
+                    firstNotHandshake = true;
+                }
+            }
+        } catch (Exception e) {
+            sc = false;
+            readDone = true;
+
+            System.out.println(e.getMessage());
+            e.printStackTrace();
+            System.out.println("Total data read = " + totalDataLen);
+        }
+    }
+
+    // Overridden in Server/Client
+    ByteBuffer getReadBuf() {
+        return null;
+    }
+    ByteBuffer getWriteBuf() {
+        return null;
+    }
+
+    SSLContext initContext() throws Exception {
+        return createServerSSLContext();
+    }
+
+    @Override
+    protected SSLContextTemplate.ContextParameters getServerContextParameters() {
+        return new SSLContextTemplate.ContextParameters("TLSv1.3", "PKIX", "NewSunX509");
+    }
+
+    static Object getInputRecord(SSLEngine eng) throws Exception {
+        Class<?> engineImplCls = Class.forName("sun.security.ssl.SSLEngineImpl");
+        Object conContext = getPrivate(eng, engineImplCls, "conContext");
+
+        Class<?> transportCtxCls = Class.forName("sun.security.ssl.TransportContext");
+        return getPrivate(conContext, transportCtxCls, "inputRecord");
+    }
+
+    static void setReadLimit(Object inputRecord, long newCountdown) throws Exception {
+        Class<?> inputRecordCls = Class.forName("sun.security.ssl.InputRecord");
+        Object readCipher = getPrivate(inputRecord, inputRecordCls, "readCipher");
+        Class<?> sslReadCipher = readCipher.getClass().getSuperclass();
+
+        Field f = getField(sslReadCipher, "keyLimitCountdown");
+        f.setLong(readCipher, newCountdown);
+    }
+
+    static long getReadLimit(Object inputRecord) throws Exception {
+        Class<?> inputRecordCls = Class.forName("sun.security.ssl.InputRecord");
+        Object readCipher = getPrivate(inputRecord, inputRecordCls, "readCipher");
+        Class<?> sslReadCipher = readCipher.getClass().getSuperclass();
+
+        Field f = getField(sslReadCipher, "keyLimitCountdown");
+        return f.getLong(readCipher);
+    }
+
+    private static Field getField(Class<?> type, String name) throws Exception {
+        Field f = type.getDeclaredField(name);
+        f.setAccessible(true); // requires --add-opens for sun.security.ssl
+        return f;
+    }
+
+    private static Object getPrivate(Object target, Class<?> owner, String name) throws Exception {
+        return getField(owner, name).get(target);
+    }
+
+    static class Server extends KeyUpdateOnce implements Runnable {
+        Server(String cipherSuite) throws Exception {
+            super();
+            engine = initContext().createSSLEngine();
+            engine.setUseClientMode(false);
+            engine.setNeedClientAuth(true);
+
+            if (cipherSuite != null && !cipherSuite.isEmpty()) {
+                engine.setEnabledCipherSuites(new String[] { cipherSuite });
+            }
+        }
+
+        @Override
+        public void run() {
+            try {
+                if (serverWrites) {
+                    writeLoop();
+                } else {
+                    readLoop();
+                }
+            } catch (Exception e) {
+                System.out.println("server: " + e.getMessage());
+                e.printStackTrace();
+            }
+            System.out.println("Server closed");
+        }
+
+        @Override
+        ByteBuffer getWriteBuf() {
+            return sToc;
+        }
+
+        @Override
+        ByteBuffer getReadBuf() {
+            return cTos;
+        }
+    }
+
+    static class Client extends KeyUpdateOnce implements Runnable {
+        Client() throws Exception {
+            super();
+            engine = initContext().createSSLEngine();
+            engine.setUseClientMode(true);
+        }
+
+        @Override
+        public void run() {
+            try {
+                if (!serverWrites) {
+                    writeLoop();
+                } else {
+                    readLoop();
+                }
+            } catch (Exception e) {
+                System.out.println("client: " + e.getMessage());
+                e.printStackTrace();
+            }
+            System.out.println("Client closed");
+        }
+
+        @Override
+        ByteBuffer getWriteBuf() {
+            return cTos;
+        }
+
+        @Override
+        ByteBuffer getReadBuf() {
+            return sToc;
+        }
+    }
+}
diff --git a/test/jdk/tools/jpackage/junit/share/jdk.jpackage/jdk/jpackage/internal/cli/MainTest.java b/test/jdk/tools/jpackage/junit/share/jdk.jpackage/jdk/jpackage/internal/cli/MainTest.java
index 93d22df4d26..69db18ccb3a 100644
--- a/test/jdk/tools/jpackage/junit/share/jdk.jpackage/jdk/jpackage/internal/cli/MainTest.java
+++ b/test/jdk/tools/jpackage/junit/share/jdk.jpackage/jdk/jpackage/internal/cli/MainTest.java
@@ -50,6 +50,7 @@ import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import jdk.internal.util.OperatingSystem;
 import jdk.jpackage.internal.Globals;
+import jdk.jpackage.internal.model.BundlingOperationDescriptor;
 import jdk.jpackage.internal.model.ConfigException;
 import jdk.jpackage.internal.model.ExecutableAttributesWithCapturedOutput;
 import jdk.jpackage.internal.model.JPackageException;
@@ -491,8 +492,35 @@ public class MainTest extends JUnitAdapter {
 
             var os = OperatingSystem.current();
             var exitCode = Main.run(os, () -> {
-                CliBundlingEnvironment bundlingEnv = JPackageMockUtils.createBundlingEnvironment(os);
-                return bundlingEnv;
+                return new CliBundlingEnvironment() {
+                    @Override
+                    public Optional<BundlingOperationDescriptor> defaultOperation() {
+                        switch (os) {
+                            case LINUX -> {
+                                return Optional.of(StandardBundlingOperation.CREATE_LINUX_DEB.descriptor());
+                            }
+                            case WINDOWS -> {
+                                return Optional.of(StandardBundlingOperation.CREATE_WIN_MSI.descriptor());
+                            }
+                            case MACOS -> {
+                                return Optional.of(StandardBundlingOperation.CREATE_MAC_PKG.descriptor());
+                            }
+                            default -> {
+                                throw new AssertionError();
+                            }
+                        }
+                    }
+
+                    @Override
+                    public void createBundle(BundlingOperationDescriptor op, Options cmdline) {
+                        if (StandardBundlingOperation.CREATE_APP_IMAGE.contains(StandardBundlingOperation.valueOf(op).orElseThrow())) {
+                            CliBundlingEnvironment bundlingEnv = JPackageMockUtils.createBundlingEnvironment(os);
+                            bundlingEnv.createBundle(op, cmdline);
+                        } else {
+                            throw new AssertionError();
+                        }
+                    }
+                };
             }, new PrintWriter(stdout), new PrintWriter(stderr), args);
 
             return new ExecutionResult(lines(stdout.toString()), lines(stderr.toString()), exitCode);
diff --git a/test/jdk/tools/jpackage/share/AsyncTest.java b/test/jdk/tools/jpackage/share/AsyncTest.java
index b7dd13f91fa..ae90e6315e8 100644
--- a/test/jdk/tools/jpackage/share/AsyncTest.java
+++ b/test/jdk/tools/jpackage/share/AsyncTest.java
@@ -113,7 +113,10 @@ public class AsyncTest {
         @Test
         @ParameterSupplier("ids")
         public void testNativeBundle(int id) throws Exception {
-            new PackageTest().addInitializer(AsyncTest::init).run(Action.CREATE_AND_UNPACK);
+            new PackageTest()
+                    .excludeTypes(PackageType.MAC_DMG) // See JDK-8384250
+                    .addInitializer(AsyncTest::init)
+                    .run(Action.CREATE_AND_UNPACK);
         }
 
         public static Collection<Object[]> ids() {
diff --git a/test/langtools/tools/javac/annotations/typeAnnotations/TypeAnnotationsOnVariables.java b/test/langtools/tools/javac/annotations/typeAnnotations/TypeAnnotationsOnVariables.java
index f89e3ff7398..9c61c3f6d4a 100644
--- a/test/langtools/tools/javac/annotations/typeAnnotations/TypeAnnotationsOnVariables.java
+++ b/test/langtools/tools/javac/annotations/typeAnnotations/TypeAnnotationsOnVariables.java
@@ -23,7 +23,7 @@
 
 /*
  * @test
- * @bug 8371155 8379550 8384843
+ * @bug 8371155 8379550 8381965 8384843
  * @summary Verify type annotations on local-like variables are propagated to
  *          their types at an appropriate time.
  * @library /tools/lib
@@ -559,6 +559,192 @@ public class TypeAnnotationsOnVariables {
                              "        Test$TypeAnno");
     }
 
+    @Test
+    void explicitLambdaHeader3() throws Exception {
+        Path src = base.resolve("src");
+        Path classes = base.resolve("classes");
+        tb.writeJavaFiles(src,
+                          """
+                          import java.lang.annotation.ElementType;
+                          import java.lang.annotation.Target;
+                          import java.util.function.Consumer;
+                          import java.util.List;
+
+                          class Test {
+                              @Target(ElementType.TYPE_USE)
+                              @interface TypeAnno { }
+
+                              static final Consumer<List<@TypeAnno String>> TEST =
+                                  id((List<@TypeAnno String> arg1) -> {});
+
+                              private static <T> Consumer<T> id(Consumer<T> t) { return t;}
+                              private void test() {
+                                  Object test =
+                                        id((List<@TypeAnno String> arg2) -> {});
+                              }
+                          }
+                          """);
+        Files.createDirectories(classes);
+        List<String> actual = new ArrayList<>();
+        new JavacTask(tb)
+                .options("-d", classes.toString())
+                .files(tb.findJavaFiles(src))
+                .callback(task -> {
+                    task.addTaskListener(new TaskListener() {
+                        @Override
+                        public void finished(TaskEvent e) {
+                            if (e.getKind() != TaskEvent.Kind.ANALYZE) {
+                                return ;
+                            }
+                            Trees trees = Trees.instance(task);
+                            new TreePathScanner<Void, Void>() {
+                                @Override
+                                public Void visitVariable(VariableTree node, Void p) {
+                                    actual.add(node.getName() + ": " + typeToString(trees.getTypeMirror(getCurrentPath())));
+                                    return super.visitVariable(node, p);
+                                }
+                                @Override
+                                public Void visitLambdaExpression(LambdaExpressionTree node, Void p) {
+                                    actual.add(treeToString(node)+ ": " + typeToString(trees.getTypeMirror(getCurrentPath())));
+                                    return super.visitLambdaExpression(node, p);
+                                }
+                            }.scan(e.getCompilationUnit(), null);
+                        }
+                    });
+                })
+                .run()
+                .writeAll();
+
+        List<String> expected = List.of(
+            "TEST: java.util.function.Consumer<java.util.List<java.lang.@Test.TypeAnno String>>",
+            "(List<@TypeAnno String> arg1)->{ }: java.util.function.Consumer<java.util.List<java.lang.String>>",
+            "arg1: java.util.List<java.lang.@Test.TypeAnno String>",
+            "t: java.util.function.Consumer<T>",
+            "test: java.lang.Object",
+            "(List<@TypeAnno String> arg2)->{ }: java.util.function.Consumer<java.util.List<java.lang.String>>",
+            "arg2: java.util.List<java.lang.@Test.TypeAnno String>"
+        );
+
+        actual.forEach(System.out::println);
+        if (!expected.equals(actual)) {
+            throw new AssertionError("Expected: " + expected + ", but got: " + actual);
+        }
+
+        Path testClass = classes.resolve("Test.class");
+        TestClassDesc testClassDesc = TestClassDesc.create(testClass);
+        MethodModel clInit = singletonValue(testClassDesc.name2Method().get("<clinit>"));
+        assertEmpty(getAnnotationsFromHeader(clInit));
+        assertEmpty(getAnnotationsFromCode(clInit));
+        MethodModel test = singletonValue(testClassDesc.name2Method().get("test"));
+        assertEmpty(getAnnotationsFromHeader(test));
+        assertEmpty(getAnnotationsFromCode(test));
+
+        checkTypeAnnotations(testClassDesc,
+                             "lambda$static$0",
+                             this::getAnnotationsFromHeader,
+                             "      0: LTest$TypeAnno;(): METHOD_FORMAL_PARAMETER, param_index=0, location=[TYPE_ARGUMENT(0)]",
+                             "        Test$TypeAnno");
+
+        checkTypeAnnotations(testClassDesc,
+                             "lambda$test$0",
+                             this::getAnnotationsFromHeader,
+                             "      0: LTest$TypeAnno;(): METHOD_FORMAL_PARAMETER, param_index=0, location=[TYPE_ARGUMENT(0)]",
+                             "        Test$TypeAnno");
+    }
+
+    @Test
+    void explicitLambdaHeader4() throws Exception {
+        Path src = base.resolve("src");
+        Path classes = base.resolve("classes");
+        tb.writeJavaFiles(src,
+                          """
+                          import java.lang.annotation.ElementType;
+                          import java.lang.annotation.Target;
+                          import java.util.function.Consumer;
+                          import java.util.List;
+
+                          class Test {
+                              @Target(ElementType.TYPE_USE)
+                              @interface TypeAnno { }
+
+                              static final Consumer<List<String>> TEST =
+                                  id((@TypeAnno List<String> arg) -> {});
+
+                              private static <T> Consumer<T> id(Consumer<T> t) { return t;}
+                              private void test() {
+                                  Object test =
+                                        id((@TypeAnno List<String> arg) -> {});
+                              }
+                          }
+                          """);
+        Files.createDirectories(classes);
+        List<String> actual = new ArrayList<>();
+        new JavacTask(tb)
+                .options("-d", classes.toString())
+                .files(tb.findJavaFiles(src))
+                .callback(task -> {
+                    task.addTaskListener(new TaskListener() {
+                        @Override
+                        public void finished(TaskEvent e) {
+                            if (e.getKind() != TaskEvent.Kind.ANALYZE) {
+                                return ;
+                            }
+                            Trees trees = Trees.instance(task);
+                            new TreePathScanner<Void, Void>() {
+                                @Override
+                                public Void visitVariable(VariableTree node, Void p) {
+                                    actual.add(node.getName() + ": " + typeToString(trees.getTypeMirror(getCurrentPath())));
+                                    return super.visitVariable(node, p);
+                                }
+                                @Override
+                                public Void visitLambdaExpression(LambdaExpressionTree node, Void p) {
+                                    actual.add(treeToString(node)+ ": " + typeToString(trees.getTypeMirror(getCurrentPath())));
+                                    return super.visitLambdaExpression(node, p);
+                                }
+                            }.scan(e.getCompilationUnit(), null);
+                        }
+                    });
+                })
+                .run()
+                .writeAll();
+
+        List<String> expected = List.of(
+            "TEST: java.util.function.Consumer<java.util.List<java.lang.String>>",
+            "(@TypeAnno List<String> arg)->{ }: java.util.function.Consumer<java.util.List<java.lang.String>>",
+            "arg: java.util.@Test.TypeAnno List<java.lang.String>",
+            "t: java.util.function.Consumer<T>",
+            "test: java.lang.Object",
+            "(@TypeAnno List<String> arg)->{ }: java.util.function.Consumer<java.util.List<java.lang.String>>",
+            "arg: java.util.@Test.TypeAnno List<java.lang.String>"
+        );
+
+        actual.forEach(System.out::println);
+        if (!expected.equals(actual)) {
+            throw new AssertionError("Expected: " + expected + ", but got: " + actual);
+        }
+
+        Path testClass = classes.resolve("Test.class");
+        TestClassDesc testClassDesc = TestClassDesc.create(testClass);
+        MethodModel clInit = singletonValue(testClassDesc.name2Method().get("<clinit>"));
+        assertEmpty(getAnnotationsFromHeader(clInit));
+        assertEmpty(getAnnotationsFromCode(clInit));
+        MethodModel test = singletonValue(testClassDesc.name2Method().get("test"));
+        assertEmpty(getAnnotationsFromHeader(test));
+        assertEmpty(getAnnotationsFromCode(test));
+
+        checkTypeAnnotations(testClassDesc,
+                             "lambda$static$0",
+                             this::getAnnotationsFromHeader,
+                             "      0: LTest$TypeAnno;(): METHOD_FORMAL_PARAMETER, param_index=0",
+                             "        Test$TypeAnno");
+
+        checkTypeAnnotations(testClassDesc,
+                             "lambda$test$0",
+                             this::getAnnotationsFromHeader,
+                             "      0: LTest$TypeAnno;(): METHOD_FORMAL_PARAMETER, param_index=0",
+                             "        Test$TypeAnno");
+    }
+
     private void checkTypeAnnotations(TestClassDesc testClassDesc,
                                       String lambdaMethodName,
                                       String... expectedEntries) throws IOException {
diff --git a/test/lib/jdk/test/whitebox/WhiteBox.java b/test/lib/jdk/test/whitebox/WhiteBox.java
index 311502a16e2..c4b2da9beb0 100644
--- a/test/lib/jdk/test/whitebox/WhiteBox.java
+++ b/test/lib/jdk/test/whitebox/WhiteBox.java
@@ -336,6 +336,8 @@ public class WhiteBox {
 
   // Compiler
 
+  public native boolean isC2Included();
+
   public native int     matchesMethod(Executable method, String pattern);
   public native int     matchesInline(Executable method, String pattern);
   public native boolean shouldPrintAssembly(Executable method, int comp_level);
diff --git a/test/lib/jdk/test/whitebox/code/Compiler.java b/test/lib/jdk/test/whitebox/code/Compiler.java
index 333ca42782c..2450fa81ee6 100644
--- a/test/lib/jdk/test/whitebox/code/Compiler.java
+++ b/test/lib/jdk/test/whitebox/code/Compiler.java
@@ -34,6 +34,15 @@ public class Compiler {
 
     private static final WhiteBox WB = WhiteBox.getWhiteBox();
 
+    /**
+     * Check if C2 was included in the VM build
+     *
+     * @return true if C2 was included in the VM build.
+     */
+    public static boolean isC2Included() {
+        return WB.isC2Included();
+    }
+
     /**
      * Check if C2 is used as JIT compiler.
      *