8349522: AArch64: Add backend implementation for new unsigned and saturating vector operations

Reviewed-by: epeter, haosun, bkilambi
2026-01-28 12:09:14 +00:00 · 2025-03-25 01:35:27 +00:00 · 2025-03-25 01:35:27 +00:00 · ba658a71ba
commit ba658a71ba
parent 5625b43ff4
8 changed files with 1146 additions and 477 deletions
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@ -1,5 +1,5 @@
 //
-// Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2020, 2024, Arm Limited. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
@ -254,6 +254,13 @@ source %{
      case Op_CompressBitsV:
      case Op_ExpandBitsV:
        return false;
+      case Op_SaturatingAddV:
+      case Op_SaturatingSubV:
+        // Only SVE2 supports the predicated saturating instructions.
+        if (UseSVE < 2) {
+          return false;
+        }
+        break;
      // We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
      // Hence we turn to check whether Op_LoadVectorMasked is supported. The
      // same as vector store/gather/scatter.
@ -1539,6 +1546,142 @@ instruct vand_notL_masked(vReg dst_src1, vReg src2, immL_M1 m1, pRegGov pg) %{
  ins_pipe(pipe_slow);
 %}

+// ------------------------- Vector saturating add -----------------------------
+
+// Signed saturating add
+
+instruct vsqadd(vReg dst, vReg src1, vReg src2) %{
+  predicate(!n->as_SaturatingVector()->is_unsigned());
+  match(Set dst (SaturatingAddV src1 src2));
+  format %{ "vsqadd $dst, $src1, $src2" %}
+  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    if (VM_Version::use_neon_for_vector(length_in_bytes)) {
+      __ sqaddv($dst$$FloatRegister, get_arrangement(this),
+                $src1$$FloatRegister, $src2$$FloatRegister);
+    } else {
+      assert(UseSVE > 0, "must be sve");
+      BasicType bt = Matcher::vector_element_basic_type(this);
+      __ sve_sqadd($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+                   $src1$$FloatRegister, $src2$$FloatRegister);
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsqadd_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE == 2 && !n->as_SaturatingVector()->is_unsigned());
+  match(Set dst_src1 (SaturatingAddV (Binary dst_src1 src2) pg));
+  format %{ "vsqadd_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ sve_sqadd($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                 $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// Unsigned saturating add
+
+instruct vuqadd(vReg dst, vReg src1, vReg src2) %{
+  predicate(n->as_SaturatingVector()->is_unsigned());
+  match(Set dst (SaturatingAddV src1 src2));
+  format %{ "vuqadd $dst, $src1, $src2" %}
+  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    if (VM_Version::use_neon_for_vector(length_in_bytes)) {
+      __ uqaddv($dst$$FloatRegister, get_arrangement(this),
+                $src1$$FloatRegister, $src2$$FloatRegister);
+    } else {
+      assert(UseSVE > 0, "must be sve");
+      BasicType bt = Matcher::vector_element_basic_type(this);
+      __ sve_uqadd($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+                   $src1$$FloatRegister, $src2$$FloatRegister);
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vuqadd_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE == 2 && n->as_SaturatingVector()->is_unsigned());
+  match(Set dst_src1 (SaturatingAddV (Binary dst_src1 src2) pg));
+  format %{ "vuqadd_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ sve_uqadd($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                 $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// ------------------------- Vector saturating sub -----------------------------
+
+// Signed saturating sub
+
+instruct vsqsub(vReg dst, vReg src1, vReg src2) %{
+  predicate(!n->as_SaturatingVector()->is_unsigned());
+  match(Set dst (SaturatingSubV src1 src2));
+  format %{ "vsqsub $dst, $src1, $src2" %}
+  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    if (VM_Version::use_neon_for_vector(length_in_bytes)) {
+      __ sqsubv($dst$$FloatRegister, get_arrangement(this),
+                $src1$$FloatRegister, $src2$$FloatRegister);
+    } else {
+      assert(UseSVE > 0, "must be sve");
+      BasicType bt = Matcher::vector_element_basic_type(this);
+      __ sve_sqsub($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+                   $src1$$FloatRegister, $src2$$FloatRegister);
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vsqsub_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE == 2 && !n->as_SaturatingVector()->is_unsigned());
+  match(Set dst_src1 (SaturatingSubV (Binary dst_src1 src2) pg));
+  format %{ "vsqsub_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ sve_sqsub($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                 $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// Unsigned saturating sub
+
+instruct vuqsub(vReg dst, vReg src1, vReg src2) %{
+  predicate(n->as_SaturatingVector()->is_unsigned());
+  match(Set dst (SaturatingSubV src1 src2));
+  format %{ "vuqsub $dst, $src1, $src2" %}
+  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    if (VM_Version::use_neon_for_vector(length_in_bytes)) {
+      __ uqsubv($dst$$FloatRegister, get_arrangement(this),
+                $src1$$FloatRegister, $src2$$FloatRegister);
+    } else {
+      assert(UseSVE > 0, "must be sve");
+      BasicType bt = Matcher::vector_element_basic_type(this);
+      __ sve_uqsub($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+                   $src1$$FloatRegister, $src2$$FloatRegister);
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vuqsub_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE == 2 && n->as_SaturatingVector()->is_unsigned());
+  match(Set dst_src1 (SaturatingSubV (Binary dst_src1 src2) pg));
+  format %{ "vuqsub_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ sve_uqsub($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                 $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // ------------------------------ Vector abs -----------------------------------

 // vector abs
@ -1993,6 +2136,76 @@ instruct vmin_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
  ins_pipe(pipe_slow);
 %}

+// vector unsigned min - LONG
+
+instruct vuminL_neon(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_LONG);
+  match(Set dst (UMinV src1 src2));
+  effect(TEMP_DEF dst);
+  format %{ "vuminL_neon $dst, $src1, $src2\t# 2L" %}
+  ins_encode %{
+    __ cm(Assembler::HI, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
+    __ bsl($dst$$FloatRegister, __ T16B, $src2$$FloatRegister, $src1$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vuminL_sve(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
+  match(Set dst_src1 (UMinV dst_src1 src2));
+  format %{ "vuminL_sve $dst_src1, $dst_src1, $src2" %}
+  ins_encode %{
+    __ sve_umin($dst_src1$$FloatRegister, __ D, ptrue, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector unsigned min - B/S/I
+
+instruct vumin_neon(vReg dst, vReg src1, vReg src2) %{
+  predicate(Matcher::vector_element_basic_type(n) != T_LONG &&
+            VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
+  match(Set dst (UMinV src1 src2));
+  format %{ "vumin_neon $dst, $src1, $src2\t# B/S/I" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt) && bt != T_LONG, "unsupported type");
+    __ uminv($dst$$FloatRegister, get_arrangement(this),
+             $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vumin_sve(vReg dst_src1, vReg src2) %{
+  predicate(Matcher::vector_element_basic_type(n) != T_LONG &&
+            !VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
+  match(Set dst_src1 (UMinV dst_src1 src2));
+  format %{ "vumin_sve $dst_src1, $dst_src1, $src2\t# B/S/I" %}
+  ins_encode %{
+    assert(UseSVE > 0, "must be sve");
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt) && bt != T_LONG, "unsupported type");
+    __ sve_umin($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                ptrue, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector unsigned min - predicated
+
+instruct vumin_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE > 0);
+  match(Set dst_src1 (UMinV (Binary dst_src1 src2) pg));
+  format %{ "vumin_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt), "unsupported type");
+    __ sve_umin($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // ------------------------------ Vector max -----------------------------------

 // vector max - LONG
@ -2080,6 +2293,76 @@ instruct vmax_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
  ins_pipe(pipe_slow);
 %}

+// vector unsigned max - LONG
+
+instruct vumaxL_neon(vReg dst, vReg src1, vReg src2) %{
+  predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_LONG);
+  match(Set dst (UMaxV src1 src2));
+  effect(TEMP_DEF dst);
+  format %{ "vumaxL_neon $dst, $src1, $src2\t# 2L" %}
+  ins_encode %{
+    __ cm(Assembler::HI, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
+    __ bsl($dst$$FloatRegister, __ T16B, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vumaxL_sve(vReg dst_src1, vReg src2) %{
+  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
+  match(Set dst_src1 (UMaxV dst_src1 src2));
+  format %{ "vumaxL_sve $dst_src1, $dst_src1, $src2" %}
+  ins_encode %{
+    __ sve_umax($dst_src1$$FloatRegister, __ D, ptrue, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector unsigned max - B/S/I
+
+instruct vumax_neon(vReg dst, vReg src1, vReg src2) %{
+  predicate(Matcher::vector_element_basic_type(n) != T_LONG &&
+            VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
+  match(Set dst (UMaxV src1 src2));
+  format %{ "vumax_neon $dst, $src1, $src2\t# B/S/I" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt) && bt != T_LONG, "unsupported type");
+    __ umaxv($dst$$FloatRegister, get_arrangement(this),
+             $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct vumax_sve(vReg dst_src1, vReg src2) %{
+  predicate(Matcher::vector_element_basic_type(n) != T_LONG &&
+            !VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
+  match(Set dst_src1 (UMaxV dst_src1 src2));
+  format %{ "vumax_sve $dst_src1, $dst_src1, $src2\t# B/S/I" %}
+  ins_encode %{
+    assert(UseSVE > 0, "must be sve");
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt) && bt != T_LONG, "unsupported type");
+    __ sve_umax($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                ptrue, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// vector unsigned max - predicated
+
+instruct vumax_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE > 0);
+  match(Set dst_src1 (UMaxV (Binary dst_src1 src2) pg));
+  format %{ "vumax_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt), "unsupported type");
+    __ sve_umax($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // ------------------------------ MLA RELATED ----------------------------------

 // vector mla
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@ -1,5 +1,5 @@
 //
-// Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2020, 2024, Arm Limited. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
@ -244,6 +244,13 @@ source %{
      case Op_CompressBitsV:
      case Op_ExpandBitsV:
        return false;
+      case Op_SaturatingAddV:
+      case Op_SaturatingSubV:
+        // Only SVE2 supports the predicated saturating instructions.
+        if (UseSVE < 2) {
+          return false;
+        }
+        break;
      // We use Op_LoadVectorMasked to implement the predicated Op_LoadVector.
      // Hence we turn to check whether Op_LoadVectorMasked is supported. The
      // same as vector store/gather/scatter.
@ -813,6 +820,65 @@ dnl
 VECTOR_AND_NOT_PREDICATE(I)
 VECTOR_AND_NOT_PREDICATE(L)

+dnl
+dnl VECTOR_SATURATING_OP($1,     $2, $3     )
+dnl VECTOR_SATURATING_OP(prefix, op, op_name)
+define(`VECTOR_SATURATING_OP', `
+instruct v$1$2(vReg dst, vReg src1, vReg src2) %{
+  predicate(ifelse($1, sq, `!',`')n->as_SaturatingVector()->is_unsigned());
+  match(Set dst ($3 src1 src2));
+  format %{ "v$1$2 $dst, $src1, $src2" %}
+  ins_encode %{
+    uint length_in_bytes = Matcher::vector_length_in_bytes(this);
+    if (VM_Version::use_neon_for_vector(length_in_bytes)) {
+      __ $1$2v($dst$$FloatRegister, get_arrangement(this),
+                $src1$$FloatRegister, $src2$$FloatRegister);
+    } else {
+      assert(UseSVE > 0, "must be sve");
+      BasicType bt = Matcher::vector_element_basic_type(this);
+      __ sve_$1$2($dst$$FloatRegister, __ elemType_to_regVariant(bt),
+                   $src1$$FloatRegister, $src2$$FloatRegister);
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl
+dnl VECTOR_SATURATING_PREDICATE($1,     $2, $3     )
+dnl VECTOR_SATURATING_PREDICATE(prefix, op, op_name)
+define(`VECTOR_SATURATING_PREDICATE', `
+instruct v$1$2_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE == 2 && ifelse($1, sq, `!',`')n->as_SaturatingVector()->is_unsigned());
+  match(Set dst_src1 ($3 (Binary dst_src1 src2) pg));
+  format %{ "v$1$2_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ sve_$1$2($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                 $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+// ------------------------- Vector saturating add -----------------------------
+
+// Signed saturating add
+VECTOR_SATURATING_OP(sq, add, SaturatingAddV)
+VECTOR_SATURATING_PREDICATE(sq, add, SaturatingAddV)
+
+// Unsigned saturating add
+VECTOR_SATURATING_OP(uq, add, SaturatingAddV)
+VECTOR_SATURATING_PREDICATE(uq, add, SaturatingAddV)
+
+// ------------------------- Vector saturating sub -----------------------------
+
+// Signed saturating sub
+VECTOR_SATURATING_OP(sq, sub, SaturatingSubV)
+VECTOR_SATURATING_PREDICATE(sq, sub, SaturatingSubV)
+
+// Unsigned saturating sub
+VECTOR_SATURATING_OP(uq, sub, SaturatingSubV)
+VECTOR_SATURATING_PREDICATE(uq, sub, SaturatingSubV)
+
 dnl
 dnl UNARY_OP($1,        $2,      $3,        $4,       $5  )
 dnl UNARY_OP(rule_name, op_name, insn_neon, insn_sve, size)
@ -964,17 +1030,17 @@ UNARY_OP_PREDICATE_WITH_SIZE(vsqrtF, SqrtVF, sve_fsqrt, S)
 UNARY_OP_PREDICATE_WITH_SIZE(vsqrtD, SqrtVD, sve_fsqrt, D)

 dnl
-dnl VMINMAX_L_NEON($1,   $2     )
-dnl VMINMAX_L_NEON(type, op_name)
+dnl VMINMAX_L_NEON($1,   $2     , $3  )
+dnl VMINMAX_L_NEON(type, op_name, sign)
 define(`VMINMAX_L_NEON', `
-instruct v$1L_neon(vReg dst, vReg src1, vReg src2) %{
+instruct v$3$1L_neon(vReg dst, vReg src1, vReg src2) %{
  predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst ($2 src1 src2));
  effect(TEMP_DEF dst);
-  format %{ "v$1L_neon $dst, $src1, $src2\t# 2L" %}
+  format %{ "v$3$1L_neon $dst, $src1, $src2\t# 2L" %}
  ins_encode %{
-    __ cm(Assembler::GT, $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
-    __ bsl($dst$$FloatRegister, __ T16B, ifelse(min, $1, $src2, $src1)$$FloatRegister, ifelse(min, $1, $src1, $src2)$$FloatRegister);
+    __ cm(Assembler::ifelse($3, u, HI, GT), $dst$$FloatRegister, __ T2D, $src1$$FloatRegister, $src2$$FloatRegister);
+    __ bsl($dst$$FloatRegister, __ T16B, ifelse($1, min, $src2, $src1)$$FloatRegister, ifelse(min, $1, $src1, $src2)$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
 %}')dnl
@ -1058,6 +1124,57 @@ instruct v$1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
  ins_pipe(pipe_slow);
 %}')dnl
 dnl
+dnl VUMINMAX_NEON($1,   $2,      $3  )
+dnl VUMINMAX_NEON(type, op_name, insn)
+define(`VUMINMAX_NEON', `
+instruct v$1_neon(vReg dst, vReg src1, vReg src2) %{
+  predicate(Matcher::vector_element_basic_type(n) != T_LONG &&
+            VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
+  match(Set dst ($2 src1 src2));
+  format %{ "v$1_neon $dst, $src1, $src2\t# B/S/I" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt) && bt != T_LONG, "unsupported type");
+    __ $3($dst$$FloatRegister, get_arrangement(this),
+             $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VUMINMAX_SVE($1,   $2,      $3  )
+dnl VUMINMAX_SVE(type, op_name, insn)
+define(`VUMINMAX_SVE', `
+instruct v$1_sve(vReg dst_src1, vReg src2) %{
+  predicate(Matcher::vector_element_basic_type(n) != T_LONG &&
+            !VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
+  match(Set dst_src1 ($2 dst_src1 src2));
+  format %{ "v$1_sve $dst_src1, $dst_src1, $src2\t# B/S/I" %}
+  ins_encode %{
+    assert(UseSVE > 0, "must be sve");
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt) && bt != T_LONG, "unsupported type");
+    __ $3($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                ptrue, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
+dnl VUMINMAX_PREDICATE($1,   $2,      $3  )
+dnl VUMINMAX_PREDICATE(type, op_name, insn)
+define(`VUMINMAX_PREDICATE', `
+instruct v$1_masked(vReg dst_src1, vReg src2, pRegGov pg) %{
+  predicate(UseSVE > 0);
+  match(Set dst_src1 ($2 (Binary dst_src1 src2) pg));
+  format %{ "v$1_masked $dst_src1, $pg, $dst_src1, $src2" %}
+  ins_encode %{
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    assert(is_integral_type(bt), "unsupported type");
+    __ $3($dst_src1$$FloatRegister, __ elemType_to_regVariant(bt),
+                $pg$$PRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+dnl
 // ------------------------------ Vector min -----------------------------------

 // vector min - LONG
@ -1071,6 +1188,17 @@ VMINMAX_SVE(min, MinV, sve_fmin, sve_smin)
 // vector min - predicated
 VMINMAX_PREDICATE(min, MinV, sve_fmin, sve_smin)

+// vector unsigned min - LONG
+VMINMAX_L_NEON(min, UMinV, u)
+VMINMAX_L_SVE(umin, UMinV, sve_umin)
+
+// vector unsigned min - B/S/I
+VUMINMAX_NEON(umin, UMinV, uminv)
+VUMINMAX_SVE(umin, UMinV, sve_umin)
+
+// vector unsigned min - predicated
+VUMINMAX_PREDICATE(umin, UMinV, sve_umin)
+
 // ------------------------------ Vector max -----------------------------------

 // vector max - LONG
@ -1084,6 +1212,17 @@ VMINMAX_SVE(max, MaxV, sve_fmax, sve_smax)
 // vector max - predicated
 VMINMAX_PREDICATE(max, MaxV, sve_fmax, sve_smax)

+// vector unsigned max - LONG
+VMINMAX_L_NEON(max, UMaxV, u)
+VMINMAX_L_SVE(umax, UMaxV, sve_umax)
+
+// vector unsigned max - B/S/I
+VUMINMAX_NEON(umax, UMaxV, umaxv)
+VUMINMAX_SVE(umax, UMaxV, sve_umax)
+
+// vector unsigned max - predicated
+VUMINMAX_PREDICATE(umax, UMaxV, sve_umax)
+
 // ------------------------------ MLA RELATED ----------------------------------

 // vector mla
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@ -2600,6 +2600,9 @@ template<typename R, typename... Rx>

  INSN(addv,   0, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
  INSN(subv,   1, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+  INSN(sqaddv, 0, 0b000011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+  INSN(sqsubv, 0, 0b001011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+  INSN(uqaddv, 1, 0b000011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
  INSN(uqsubv, 1, 0b001011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
  INSN(mulv,   0, 0b100111, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
  INSN(mlav,   0, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
@ -2613,6 +2616,8 @@ template<typename R, typename... Rx>
  INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
  INSN(maxv,   0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
  INSN(minv,   0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+  INSN(umaxv,  1, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+  INSN(uminv,  1, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
  INSN(smaxp,  0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
  INSN(sminp,  0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
  INSN(sqdmulh,0, 0b101101, false); // accepted arrangements: T4H, T8H, T2S, T4S
@ -3323,8 +3328,12 @@ public:
    f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21),                                     \
    rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
  }
-  INSN(sve_add, 0b000);
-  INSN(sve_sub, 0b001);
+  INSN(sve_add,   0b000);
+  INSN(sve_sub,   0b001);
+  INSN(sve_sqadd, 0b100);
+  INSN(sve_sqsub, 0b110);
+  INSN(sve_uqadd, 0b101);
+  INSN(sve_uqsub, 0b111);
 #undef INSN

 // SVE integer add/subtract immediate (unpredicated)
@ -3435,6 +3444,8 @@ public:
  INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
  INSN(sve_sub,   0b00000100, 0b000001000); // vector sub
  INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
+  INSN(sve_umax,  0b00000100, 0b001001000); // unsigned maximum vectors
+  INSN(sve_umin,  0b00000100, 0b001011000); // unsigned minimum vectors
 #undef INSN

 // SVE floating-point arithmetic - predicate
@ -4226,6 +4237,20 @@ public:
  INSN(sve_eor3, 0b001); // Bitwise exclusive OR of three vectors
 #undef INSN

+// SVE2 saturating operations - predicate
+#define INSN(NAME, op1, op2)                                                          \
+  void NAME(FloatRegister Zdn, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm) {  \
+    assert(T != Q, "invalid register variant");                                       \
+    sve_predicate_reg_insn(op1, op2, Zdn, T, Pg, Znm);                                \
+  }
+
+  INSN(sve_sqadd, 0b01000100, 0b011000100); // signed saturating add
+  INSN(sve_sqsub, 0b01000100, 0b011010100); // signed saturating sub
+  INSN(sve_uqadd, 0b01000100, 0b011001100); // unsigned saturating add
+  INSN(sve_uqsub, 0b01000100, 0b011011100); // unsigned saturating sub
+
+#undef INSN
+
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
  }

--- a/test/hotspot/gtest/aarch64/aarch64-asmtest.py
+++ b/test/hotspot/gtest/aarch64/aarch64-asmtest.py
@ -1792,12 +1792,28 @@ generate(ThreeRegNEONOp,
          ["addv", "add", "4H"], ["addv", "add", "8H"],
          ["addv", "add", "2S"], ["addv", "add", "4S"],
          ["addv", "add", "2D"],
+          ["sqaddv", "sqadd", "8B"], ["sqaddv", "sqadd", "16B"],
+          ["sqaddv", "sqadd", "4H"], ["sqaddv", "sqadd", "8H"],
+          ["sqaddv", "sqadd", "2S"], ["sqaddv", "sqadd", "4S"],
+          ["sqaddv", "sqadd", "2D"],
+          ["uqaddv", "uqadd", "8B"], ["uqaddv", "uqadd", "16B"],
+          ["uqaddv", "uqadd", "4H"], ["uqaddv", "uqadd", "8H"],
+          ["uqaddv", "uqadd", "2S"], ["uqaddv", "uqadd", "4S"],
+          ["uqaddv", "uqadd", "2D"],
          ["fadd", "fadd", "2S"], ["fadd", "fadd", "4S"],
          ["fadd", "fadd", "2D"],
          ["subv", "sub", "8B"], ["subv", "sub", "16B"],
          ["subv", "sub", "4H"], ["subv", "sub", "8H"],
          ["subv", "sub", "2S"], ["subv", "sub", "4S"],
          ["subv", "sub", "2D"],
+          ["sqsubv", "sqsub", "8B"], ["sqsubv", "sqsub", "16B"],
+          ["sqsubv", "sqsub", "4H"], ["sqsubv", "sqsub", "8H"],
+          ["sqsubv", "sqsub", "2S"], ["sqsubv", "sqsub", "4S"],
+          ["sqsubv", "sqsub", "2D"],
+          ["uqsubv", "uqsub", "8B"], ["uqsubv", "uqsub", "16B"],
+          ["uqsubv", "uqsub", "4H"], ["uqsubv", "uqsub", "8H"],
+          ["uqsubv", "uqsub", "2S"], ["uqsubv", "uqsub", "4S"],
+          ["uqsubv", "uqsub", "2D"],
          ["fsub", "fsub", "2S"], ["fsub", "fsub", "4S"],
          ["fsub", "fsub", "2D"],
          ["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
@ -1822,6 +1838,9 @@ generate(ThreeRegNEONOp,
          ["maxv", "smax", "8B"], ["maxv", "smax", "16B"],
          ["maxv", "smax", "4H"], ["maxv", "smax", "8H"],
          ["maxv", "smax", "2S"], ["maxv", "smax", "4S"],
+          ["umaxv", "umax", "8B"], ["umaxv", "umax", "16B"],
+          ["umaxv", "umax", "4H"], ["umaxv", "umax", "8H"],
+          ["umaxv", "umax", "2S"], ["umaxv", "umax", "4S"],
          ["smaxp", "smaxp", "8B"], ["smaxp", "smaxp", "16B"],
          ["smaxp", "smaxp", "4H"], ["smaxp", "smaxp", "8H"],
          ["smaxp", "smaxp", "2S"], ["smaxp", "smaxp", "4S"],
@ -1830,6 +1849,9 @@ generate(ThreeRegNEONOp,
          ["minv", "smin", "8B"], ["minv", "smin", "16B"],
          ["minv", "smin", "4H"], ["minv", "smin", "8H"],
          ["minv", "smin", "2S"], ["minv", "smin", "4S"],
+          ["uminv", "umin", "8B"], ["uminv", "umin", "16B"],
+          ["uminv", "umin", "4H"], ["uminv", "umin", "8H"],
+          ["uminv", "umin", "2S"], ["uminv", "umin", "4S"],
          ["sminp", "sminp", "8B"], ["sminp", "sminp", "16B"],
          ["sminp", "sminp", "4H"], ["sminp", "sminp", "8H"],
          ["sminp", "sminp", "2S"], ["sminp", "sminp", "4S"],
@ -2131,6 +2153,10 @@ generate(SVEVectorOp, [["add", "ZZZ"],
                       ["fadd", "ZZZ"],
                       ["fmul", "ZZZ"],
                       ["fsub", "ZZZ"],
+                       ["sqadd", "ZZZ"],
+                       ["sqsub", "ZZZ"],
+                       ["uqadd", "ZZZ"],
+                       ["uqsub", "ZZZ"],
                       ["abs", "ZPZ", "m"],
                       ["add", "ZPZ", "m", "dn"],
                       ["and", "ZPZ", "m", "dn"],
@ -2149,6 +2175,8 @@ generate(SVEVectorOp, [["add", "ZZZ"],
                       ["revb", "ZPZ", "m"],
                       ["smax", "ZPZ", "m", "dn"],
                       ["smin", "ZPZ", "m", "dn"],
+                       ["umax", "ZPZ", "m", "dn"],
+                       ["umin", "ZPZ", "m", "dn"],
                       ["sub", "ZPZ", "m", "dn"],
                       ["fabs", "ZPZ", "m"],
                       ["fadd", "ZPZ", "m", "dn"],
@ -2183,6 +2211,10 @@ generate(SVEVectorOp, [["add", "ZZZ"],
                       ["bext", "ZZZ"],
                       ["bdep", "ZZZ"],
                       ["eor3", "ZZZ"],
+                       ["sqadd", "ZPZ", "m", "dn"],
+                       ["sqsub", "ZPZ", "m", "dn"],
+                       ["uqadd", "ZPZ", "m", "dn"],
+                       ["uqsub", "ZPZ", "m", "dn"],
                      ])

 generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
--- a/test/hotspot/gtest/aarch64/asmtest.out.h
+++ b/test/hotspot/gtest/aarch64/asmtest.out.h
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
@ -2016,6 +2016,21 @@ public class IRNode {
        vectorNode(VECTOR_BLEND_B, "VectorBlend", TYPE_BYTE);
    }

+    public static final String VECTOR_BLEND_S = VECTOR_PREFIX + "VECTOR_BLEND_S" + POSTFIX;
+    static {
+        vectorNode(VECTOR_BLEND_S, "VectorBlend", TYPE_SHORT);
+    }
+
+    public static final String VECTOR_BLEND_I = VECTOR_PREFIX + "VECTOR_BLEND_I" + POSTFIX;
+    static {
+        vectorNode(VECTOR_BLEND_I, "VectorBlend", TYPE_INT);
+    }
+
+    public static final String VECTOR_BLEND_L = VECTOR_PREFIX + "VECTOR_BLEND_L" + POSTFIX;
+    static {
+        vectorNode(VECTOR_BLEND_L, "VectorBlend", TYPE_LONG);
+    }
+
    public static final String VECTOR_BLEND_F = VECTOR_PREFIX + "VECTOR_BLEND_F" + POSTFIX;
    static {
        vectorNode(VECTOR_BLEND_F, "VectorBlend", TYPE_FLOAT);
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/test/IREncodingPrinter.java
@ -108,6 +108,7 @@ public class IREncodingPrinter {
        "sha3",
        "asimd",
        "sve",
+        "sve2",
        // Riscv64
        "rvv",
        "zbkb",
--- a/test/hotspot/jtreg/compiler/vectorapi/VectorSaturatedOperationsTest.java
+++ b/test/hotspot/jtreg/compiler/vectorapi/VectorSaturatedOperationsTest.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,7 +23,7 @@

 /**
 * @test
-* @bug 8338021 8342677
+* @bug 8338021 8342677 8349522
 * @summary Add IR validation tests for newly added saturated vector add / sub operations
 * @modules jdk.incubator.vector
 * @library /test/lib /
@ -59,6 +59,8 @@ public class VectorSaturatedOperationsTest {
    private short[] short_out;
    private byte[]  byte_out;

+    private boolean[] mask;
+
    public static void main(String[] args) {
        TestFramework testFramework = new TestFramework();
        testFramework.setDefaultWarmup(5000)
@ -137,6 +139,7 @@ public class VectorSaturatedOperationsTest {
        short_in2 = new short[COUNT];
        int_in2   = new int[COUNT];
        long_in2  = new long[COUNT];
+        mask      = new boolean[COUNT];
        IntStream.range(0, COUNT-4).forEach(
            i -> {
                long_in1[i] = r.nextLong(Long.MIN_VALUE, Long.MAX_VALUE);
@ -147,6 +150,7 @@ public class VectorSaturatedOperationsTest {
                short_in2[i] = (short)r.nextInt(Short.MIN_VALUE, Short.MAX_VALUE);
                byte_in1[i] = (byte)r.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE);
                byte_in2[i] = (byte)r.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE);
+                mask[i] = r.nextBoolean();
            }
        );

@ -162,7 +166,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_ADD_VB, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_ADD_VB, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void sadd_byte() {
        for (int i = 0; i < COUNT; i += bspec.length()) {
@ -185,7 +189,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_ADD_VS, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_ADD_VS, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void sadd_short() {
        for (int i = 0; i < COUNT; i += sspec.length()) {
@ -208,7 +212,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_ADD_VI, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_ADD_VI, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void sadd_int() {
        for (int i = 0; i < COUNT; i += ispec.length()) {
@ -231,7 +235,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_ADD_VL, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_ADD_VL, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void sadd_long() {
        for (int i = 0; i < COUNT; i += lspec.length()) {
@ -256,7 +260,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_ADD_VB, " >0 " , "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void suadd_byte() {
        for (int i = 0; i < COUNT; i += bspec.length()) {
@ -281,7 +285,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_ADD_VS, " >0 ", "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void suadd_short() {
        for (int i = 0; i < COUNT; i += sspec.length()) {
@ -306,7 +310,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_ADD_VI, " >0 ", "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void suadd_int() {
        for (int i = 0; i < COUNT; i += ispec.length()) {
@ -331,7 +335,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_ADD_VL, " >0 ", "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void suadd_long() {
        for (int i = 0; i < COUNT; i += lspec.length()) {
@ -354,7 +358,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_SUB_VB, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_SUB_VB, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void ssub_byte() {
        for (int i = 0; i < COUNT; i += bspec.length()) {
@ -377,7 +381,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_SUB_VS, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_SUB_VS, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void ssub_short() {
        for (int i = 0; i < COUNT; i += sspec.length()) {
@ -400,7 +404,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_SUB_VI, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_SUB_VI, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void ssub_int() {
        for (int i = 0; i < COUNT; i += ispec.length()) {
@ -423,7 +427,7 @@ public class VectorSaturatedOperationsTest {
    }

    @Test
-    @IR(counts = {IRNode.SATURATING_SUB_VL, " >0 "}, applyIfCPUFeature = {"avx", "true"})
+    @IR(counts = {IRNode.SATURATING_SUB_VL, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void ssub_long() {
        for (int i = 0; i < COUNT; i += lspec.length()) {
@ -448,7 +452,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_SUB_VB, " >0 " , "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void susub_byte() {
        for (int i = 0; i < COUNT; i += bspec.length()) {
@ -473,7 +477,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_SUB_VS, " >0 ", "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void susub_short() {
        for (int i = 0; i < COUNT; i += sspec.length()) {
@ -498,7 +502,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_SUB_VI, " >0 ", "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void susub_int() {
        for (int i = 0; i < COUNT; i += ispec.length()) {
@ -523,7 +527,7 @@ public class VectorSaturatedOperationsTest {
    @Test
    @IR(counts = {IRNode.SATURATING_SUB_VL, " >0 ", "unsigned_vector_node", " >0 "},
        phase = {CompilePhase.BEFORE_MATCHING},
-        applyIfCPUFeature = {"avx", "true"})
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
    @Warmup(value = 10000)
    public void susub_long() {
        for (int i = 0; i < COUNT; i += lspec.length()) {
@ -544,4 +548,112 @@ public class VectorSaturatedOperationsTest {
            }
        }
    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_ADD_VB, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    @IR(counts = {IRNode.VECTOR_BLEND_B, " >0 "}, applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"})
+    @IR(failOn = IRNode.VECTOR_BLEND_B, applyIfCPUFeature = {"sve2", "true"})
+    @Warmup(value = 10000)
+    public void sadd_masked() {
+        for (int i = 0; i < COUNT; i += bspec.length()) {
+            VectorMask<Byte> m = VectorMask.fromArray(bspec, mask, i);
+            ByteVector.fromArray(bspec, byte_in1, i)
+                      .lanewise(VectorOperators.SADD,
+                                ByteVector.fromArray(bspec, byte_in2, i), m)
+                      .intoArray(byte_out, i);
+        }
+    }
+
+    @Check(test = "sadd_masked")
+    public void sadd_masked_verify() {
+        for (int i = 0; i < COUNT; i++) {
+            byte actual = byte_out[i];
+            byte expected = mask[i] ? VectorMath.addSaturating(byte_in1[i], byte_in2[i]) : byte_in1[i];
+            if (actual != expected) {
+                throw new AssertionError("Result Mismatch : actual (" +  actual + ") !=  expected (" + expected  + ")");
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_SUB_VS, " >0 "}, applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    @IR(counts = {IRNode.VECTOR_BLEND_S, " >0 "}, applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"})
+    @IR(failOn = IRNode.VECTOR_BLEND_S, applyIfCPUFeature = {"sve2", "true"})
+    @Warmup(value = 10000)
+    public void ssub_masked() {
+        for (int i = 0; i < COUNT; i += sspec.length()) {
+            VectorMask<Short> m = VectorMask.fromArray(sspec, mask, i);
+            ShortVector.fromArray(sspec, short_in1, i)
+                       .lanewise(VectorOperators.SSUB,
+                                 ShortVector.fromArray(sspec, short_in2, i), m)
+                       .intoArray(short_out, i);
+        }
+    }
+
+    @Check(test = "ssub_masked")
+    public void ssub_masked_verify() {
+        for (int i = 0; i < COUNT; i++) {
+            short actual = short_out[i];
+            short expected = mask[i] ? VectorMath.subSaturating(short_in1[i], short_in2[i]) : short_in1[i];
+            if (actual != expected) {
+                throw new AssertionError("Result Mismatch : actual (" +  actual + ") !=  expected (" + expected  + ")");
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_ADD_VI, " >0 ", "unsigned_vector_node", " >0 "},
+        phase = {CompilePhase.BEFORE_MATCHING},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
+    @IR(counts = {IRNode.VECTOR_BLEND_I, " >0 "}, applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"})
+    @IR(failOn = IRNode.VECTOR_BLEND_I, applyIfCPUFeature = {"sve2", "true"})
+    @Warmup(value = 10000)
+    public void suadd_masked() {
+        for (int i = 0; i < COUNT; i += ispec.length()) {
+            VectorMask<Integer> m = VectorMask.fromArray(ispec, mask, i);
+            IntVector.fromArray(ispec, int_in1, i)
+                     .lanewise(VectorOperators.SUADD,
+                               IntVector.fromArray(ispec, int_in2, i), m)
+                     .intoArray(int_out, i);
+        }
+    }
+
+    @Check(test = "suadd_masked")
+    public void suadd_masked_verify() {
+        for (int i = 0; i < COUNT; i++) {
+            int actual = int_out[i];
+            int expected = mask[i] ? VectorMath.addSaturatingUnsigned(int_in1[i], int_in2[i]) : int_in1[i];
+            if (actual != expected) {
+                throw new AssertionError("Result Mismatch : actual (" +  actual + ") !=  expected (" + expected  + ")");
+            }
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.SATURATING_SUB_VL, " >0 ", "unsigned_vector_node", " >0 "},
+        phase = {CompilePhase.BEFORE_MATCHING},
+        applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
+    @IR(counts = {IRNode.VECTOR_BLEND_L, " >0 "}, applyIfCPUFeatureAnd = {"asimd", "true", "sve2", "false"})
+    @IR(failOn = IRNode.VECTOR_BLEND_L, applyIfCPUFeature = {"sve2", "true"})
+    @Warmup(value = 10000)
+    public void susub_masked() {
+        for (int i = 0; i < COUNT; i += lspec.length()) {
+            VectorMask<Long> m = VectorMask.fromArray(lspec, mask, i);
+            LongVector.fromArray(lspec, long_in1, i)
+                      .lanewise(VectorOperators.SUSUB,
+                                LongVector.fromArray(lspec, long_in2, i), m)
+                      .intoArray(long_out, i);
+        }
+    }
+
+    @Check(test = "susub_masked")
+    public void susub_masked_verify() {
+        for (int i = 0; i < COUNT; i++) {
+            long actual = long_out[i];
+            long expected = mask[i] ? VectorMath.subSaturatingUnsigned(long_in1[i], long_in2[i]) : long_in1[i];
+            if (actual != expected) {
+                throw new AssertionError("Result Mismatch : actual (" +  actual + ") !=  expected (" + expected  + ")");
+            }
+        }
+    }
 }