8342095: Add autovectorizer support for subword vector casts

Reviewed-by: epeter, qamai
2026-07-14 13:08:09 +00:00 · 2026-02-26 05:15:30 +00:00 · 2026-02-26 05:15:30 +00:00 · 074044c2f3
commit 074044c2f3
parent fd74232d5d
15 changed files with 782 additions and 106 deletions
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@ -2214,7 +2214,7 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) const {
    return true;
  }

-  if (!is_velt_basic_type_compatible_use_def(use, def)) {
+  if (!is_velt_basic_type_compatible_use_def(use, def, d_pk->size())) {
    return false;
  }

@ -2280,7 +2280,7 @@ Node_List* PackSet::strided_pack_input_at_index_or_null(const Node_List* pack, c

 // Check if the output type of def is compatible with the input type of use, i.e. if the
 // types have the same size.
-bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) const {
+bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint pack_size) const {
  assert(in_bb(def) && in_bb(use), "both use and def are in loop");

  // Conversions are trivially compatible.
@ -2306,8 +2306,17 @@ bool SuperWord::is_velt_basic_type_compatible_use_def(Node* use, Node* def) cons
           type2aelembytes(use_bt) == 4;
  }

-  // Default case: input size of use equals output size of def.
-  return type2aelembytes(use_bt) == type2aelembytes(def_bt);
+  // Input size of use equals output size of def
+  if (type2aelembytes(use_bt) == type2aelembytes(def_bt)) {
+    return true;
+  }
+
+  // Subword cast: Element sizes differ, but the platform supports a cast to change the def shape to the use shape.
+  if (VectorCastNode::is_supported_subword_cast(def_bt, use_bt, pack_size)) {
+    return true;
+  }
+
+  return false;
 }

 // Return nullptr if success, else failure message
--- a/src/hotspot/share/opto/superword.hpp
+++ b/src/hotspot/share/opto/superword.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -653,7 +653,7 @@ private:
  // Is use->in(u_idx) a vector use?
  bool is_vector_use(Node* use, int u_idx) const;

-  bool is_velt_basic_type_compatible_use_def(Node* use, Node* def) const;
+  bool is_velt_basic_type_compatible_use_def(Node* use, Node* def, const uint pack_size) const;

  bool do_vtransform() const;
 };
--- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp
+++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp
@ -254,6 +254,20 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i

  Node_List* pack_in = _packset.pack_input_at_index_or_null(pack, index);
  if (pack_in != nullptr) {
+    Node* in_p0 = pack_in->at(0);
+    BasicType def_bt = _vloop_analyzer.types().velt_basic_type(in_p0);
+    BasicType use_bt = _vloop_analyzer.types().velt_basic_type(p0);
+
+    // If the use and def types are different, emit a cast node
+    if (use_bt != def_bt && !p0->is_Convert() && VectorCastNode::is_supported_subword_cast(def_bt, use_bt, pack->size())) {
+      VTransformNode* in = get_vtnode(pack_in->at(0));
+      const VTransformVectorNodeProperties properties = VTransformVectorNodeProperties::make_from_pack(pack, _vloop_analyzer);
+      VTransformNode* cast = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, 2, properties, VectorCastNode::opcode(-1, def_bt));
+      cast->set_req(1, in);
+
+      return cast;
+    }
+
    // Input is a matching pack -> vtnode already exists.
    assert(index != 2 || !VectorNode::is_shift(p0), "shift's count cannot be vector");
    return get_vtnode(pack_in->at(0));
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@ -1561,6 +1561,13 @@ bool VectorCastNode::implemented(int opc, uint vlen, BasicType src_type, BasicTy
  return false;
 }

+bool VectorCastNode::is_supported_subword_cast(BasicType def_bt, BasicType use_bt, const uint pack_size) {
+  assert(def_bt != use_bt, "use and def types must be different");
+
+  // Opcode is only required to disambiguate half float, so we pass -1 as it can't be encountered here.
+  return (is_subword_type(def_bt) || is_subword_type(use_bt)) && VectorCastNode::implemented(-1, pack_size, def_bt, use_bt);
+}
+
 Node* VectorCastNode::Identity(PhaseGVN* phase) {
  if (!in(1)->is_top()) {
    BasicType  in_bt = in(1)->bottom_type()->is_vect()->element_basic_type();
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@ -1846,6 +1846,7 @@ class VectorCastNode : public VectorNode {
  static VectorNode* make(int vopc, Node* n1, BasicType bt, uint vlen);
  static int  opcode(int opc, BasicType bt, bool is_signed = true);
  static bool implemented(int opc, uint vlen, BasicType src_type, BasicType dst_type);
+  static bool is_supported_subword_cast(BasicType def_bt, BasicType use_bt, const uint pack_size);

  virtual Node* Identity(PhaseGVN* phase);
 };
--- a/src/hotspot/share/opto/vtransform.hpp
+++ b/src/hotspot/share/opto/vtransform.hpp
@ -975,4 +975,5 @@ public:
  virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
  NOT_PRODUCT(virtual const char* name() const override { return "StoreVector"; };)
 };
+
 #endif // SHARE_OPTO_VTRANSFORM_HPP
--- a/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java
+++ b/test/hotspot/jtreg/compiler/c2/TestMinMaxSubword.java
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -30,7 +31,7 @@ import java.util.Random;

 /*
 * @test
- * @bug 8294816
+ * @bug 8294816 8342095
 * @key randomness
 * @summary Test Math.min/max vectorization miscompilation for integer subwords
 * @library /test/lib /
@ -58,11 +59,11 @@ public class TestMinMaxSubword {
        }
    }

-    // Ensure vector max/min instructions are not generated for integer subword types
-    // as Java APIs for Math.min/max do not support integer subword types and superword
-    // should not generate vectorized Min/Max nodes for them.
+    // Ensure that casts to/from subword types are emitted, as java APIs for Math.min/max do not support integer subword
+    // types and superword should generate int versions and then cast between them.
+
    @Test
-    @IR(failOn = {IRNode.MIN_VI, IRNode.MIN_VF, IRNode.MIN_VD})
+    @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" })
    public static void testMinShort() {
        for (int i = 0; i < LENGTH; i++) {
           sb[i] = (short) Math.min(sa[i], val);
@ -78,7 +79,7 @@ public class TestMinMaxSubword {
    }

    @Test
-    @IR(failOn = {IRNode.MAX_VI, IRNode.MAX_VF, IRNode.MAX_VD})
+    @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" })
    public static void testMaxShort() {
        for (int i = 0; i < LENGTH; i++) {
            sb[i] = (short) Math.max(sa[i], val);
@ -93,7 +94,7 @@ public class TestMinMaxSubword {
    }

    @Test
-    @IR(failOn = {IRNode.MIN_VI, IRNode.MIN_VF, IRNode.MIN_VD})
+    @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" })
    public static void testMinByte() {
        for (int i = 0; i < LENGTH; i++) {
           bb[i] = (byte) Math.min(ba[i], val);
@ -109,7 +110,7 @@ public class TestMinMaxSubword {
    }

    @Test
-    @IR(failOn = {IRNode.MAX_VI, IRNode.MAX_VF, IRNode.MAX_VD})
+    @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" })
    public static void testMaxByte() {
        for (int i = 0; i < LENGTH; i++) {
            bb[i] = (byte) Math.max(ba[i], val);
--- a/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestCompatibleUseDefTypeSize.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,7 @@ import java.nio.ByteOrder;

 /*
 * @test
- * @bug 8325155
+ * @bug 8325155 8342095
 * @key randomness
 * @summary Test some cases that vectorize after the removal of the alignment boundaries code.
 *          Now, we instead check if use-def connections have compatible type size.
@ -106,6 +106,22 @@ public class TestCompatibleUseDefTypeSize {
        tests.put("test9",       () -> { return test9(aL.clone(), bD.clone()); });
        tests.put("test10",      () -> { return test10(aL.clone(), bD.clone()); });
        tests.put("test11",      () -> { return test11(aC.clone()); });
+        tests.put("testByteToInt",   () -> { return testByteToInt(aB.clone(), bI.clone()); });
+        tests.put("testByteToShort", () -> { return testByteToShort(aB.clone(), bS.clone()); });
+        tests.put("testByteToChar",  () -> { return testByteToChar(aB.clone(), bC.clone()); });
+        tests.put("testByteToLong",  () -> { return testByteToLong(aB.clone(), bL.clone()); });
+        tests.put("testShortToByte", () -> { return testShortToByte(aS.clone(), bB.clone()); });
+        tests.put("testShortToChar", () -> { return testShortToChar(aS.clone(), bC.clone()); });
+        tests.put("testShortToInt",  () -> { return testShortToInt(aS.clone(), bI.clone()); });
+        tests.put("testShortToLong", () -> { return testShortToLong(aS.clone(), bL.clone()); });
+        tests.put("testIntToShort",  () -> { return testIntToShort(aI.clone(), bS.clone()); });
+        tests.put("testIntToChar",   () -> { return testIntToChar(aI.clone(), bC.clone()); });
+        tests.put("testIntToByte",   () -> { return testIntToByte(aI.clone(), bB.clone()); });
+        tests.put("testIntToLong",   () -> { return testIntToLong(aI.clone(), bL.clone()); });
+        tests.put("testLongToByte",  () -> { return testLongToByte(aL.clone(), bB.clone()); });
+        tests.put("testLongToShort", () -> { return testLongToShort(aL.clone(), bS.clone()); });
+        tests.put("testLongToChar",  () -> { return testLongToChar(aL.clone(), bC.clone()); });
+        tests.put("testLongToInt",   () -> { return testLongToInt(aL.clone(), bI.clone()); });

        // Compute gold value for all test methods before compilation
        for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
@ -128,7 +144,23 @@ public class TestCompatibleUseDefTypeSize {
                 "test8",
                 "test9",
                 "test10",
-                 "test11"})
+                 "test11",
+                 "testByteToInt",
+                 "testByteToShort",
+                 "testByteToChar",
+                 "testByteToLong",
+                 "testShortToByte",
+                 "testShortToChar",
+                 "testShortToInt",
+                 "testShortToLong",
+                 "testIntToShort",
+                 "testIntToChar",
+                 "testIntToByte",
+                 "testIntToLong",
+                 "testLongToByte",
+                 "testLongToShort",
+                 "testLongToChar",
+                 "testLongToInt"})
    public void runTests() {
        for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
            String name = entry.getKey();
@ -328,12 +360,12 @@ public class TestCompatibleUseDefTypeSize {
    }

    @Test
-    @IR(counts = {IRNode.STORE_VECTOR, "= 0"},
+    @IR(counts = {IRNode.STORE_VECTOR, "> 0"},
        applyIfPlatform = {"64-bit", "true"},
-        applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
+        applyIf = {"AlignVector", "false"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"})
    // "inflate"  method: 1 byte -> 2 byte.
    // Java scalar code has no explicit conversion.
-    // Vector code would need a conversion. We may add this in the future.
    static Object[] test1(byte[] src, char[] dst) {
        for (int i = 0; i < src.length; i++) {
            dst[i] = (char)(src[i]);
@ -478,4 +510,201 @@ public class TestCompatibleUseDefTypeSize {
        }
        return new Object[]{ a, new char[] { m } };
    }
+
+    // Narrowing
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" })
+    public Object[] testIntToShort(int[] ints, short[] res) {
+        for (int i = 0; i < ints.length; i++) {
+            res[i] = (short) ints[i];
+        }
+
+        return new Object[] { ints, res };
+    }
+
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_char)", ">0" })
+    public Object[] testIntToChar(int[] ints, char[] res) {
+        for (int i = 0; i < ints.length; i++) {
+            res[i] = (char) ints[i];
+        }
+
+        return new Object[] { ints, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" })
+    public Object[] testIntToByte(int[] ints, byte[] res) {
+        for (int i = 0; i < ints.length; i++) {
+            res[i] = (byte) ints[i];
+        }
+
+        return new Object[] { ints, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" })
+    public Object[] testShortToByte(short[] shorts, byte[] res) {
+        for (int i = 0; i < shorts.length; i++) {
+            res[i] = (byte) shorts[i];
+        }
+
+        return new Object[] { shorts, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx2", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_L2B, IRNode.VECTOR_SIZE + "min(max_long, max_byte)", ">0" })
+    public Object[] testLongToByte(long[] longs, byte[] res) {
+        for (int i = 0; i < longs.length; i++) {
+            res[i] = (byte) longs[i];
+        }
+
+        return new Object[] { longs, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_L2S, IRNode.VECTOR_SIZE + "min(max_long, max_short)", ">0" })
+    public Object[] testLongToShort(long[] longs, short[] res) {
+        for (int i = 0; i < longs.length; i++) {
+            res[i] = (short) longs[i];
+        }
+
+        return new Object[] { longs, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_L2S, IRNode.VECTOR_SIZE + "min(max_long, max_char)", ">0" })
+    public Object[] testLongToChar(long[] longs, char[] res) {
+        for (int i = 0; i < longs.length; i++) {
+            res[i] = (char) longs[i];
+        }
+
+        return new Object[] { longs, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_L2I, IRNode.VECTOR_SIZE + "min(max_long, max_int)", ">0" })
+    public Object[] testLongToInt(long[] longs, int[] res) {
+        for (int i = 0; i < longs.length; i++) {
+            res[i] = (int) longs[i];
+        }
+
+        return new Object[] { longs, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.STORE_VECTOR, ">0" })
+    public Object[] testShortToChar(short[] shorts, char[] res) {
+        for (int i = 0; i < shorts.length; i++) {
+            res[i] = (char) shorts[i];
+        }
+
+        return new Object[] { shorts, res };
+    }
+
+    // Widening
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_short, max_int)", ">0" })
+    public Object[] testShortToInt(short[] shorts, int[] res) {
+        for (int i = 0; i < shorts.length; i++) {
+            res[i] = shorts[i];
+        }
+
+        return new Object[] { shorts, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_byte, max_int)", ">0" })
+    public Object[] testByteToInt(byte[] bytes, int[] res) {
+        for (int i = 0; i < bytes.length; i++) {
+            res[i] = bytes[i];
+        }
+
+        return new Object[] { bytes, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_short)", ">0" })
+    public Object[] testByteToShort(byte[] bytes, short[] res) {
+        for (int i = 0; i < bytes.length; i++) {
+            res[i] = bytes[i];
+        }
+
+        return new Object[] { bytes, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_byte, max_char)", ">0" })
+    public Object[] testByteToChar(byte[] bytes, char[] res) {
+        for (int i = 0; i < bytes.length; i++) {
+            res[i] = (char) bytes[i];
+        }
+
+        return new Object[] { bytes, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx2", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_B2L, IRNode.VECTOR_SIZE + "min(max_byte, max_long)", ">0" })
+    public Object[] testByteToLong(byte[] bytes, long[] res) {
+        for (int i = 0; i < bytes.length; i++) {
+            res[i] = bytes[i];
+        }
+
+        return new Object[] { bytes, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_S2L, IRNode.VECTOR_SIZE + "min(max_short, max_long)", ">0" })
+    public Object[] testShortToLong(short[] shorts, long[] res) {
+        for (int i = 0; i < shorts.length; i++) {
+            res[i] = shorts[i];
+        }
+
+        return new Object[] { shorts, res };
+    }
+
+    @Test
+    @IR(applyIfCPUFeatureOr = { "avx", "true", "asimd", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_I2L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", ">0" })
+    public Object[] testIntToLong(int[] ints, long[] res) {
+        for (int i = 0; i < ints.length; i++) {
+            res[i] = ints[i];
+        }
+
+        return new Object[] { ints, res };
+    }
 }
--- a/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,7 +23,7 @@

 /*
 * @test id=no-vectorization
- * @bug 8340093
+ * @bug 8340093 8342095
 * @summary Test vectorization of reduction loops.
 * @library /test/lib /
 * @run driver compiler.loopopts.superword.TestReductions P0
@ -31,7 +31,7 @@

 /*
 * @test id=vanilla
- * @bug 8340093
+ * @bug 8340093 8342095
 * @summary Test vectorization of reduction loops.
 * @library /test/lib /
 * @run driver compiler.loopopts.superword.TestReductions P1
@ -39,7 +39,7 @@

 /*
 * @test id=force-vectorization
- * @bug 8340093
+ * @bug 8340093 8342095
 * @summary Test vectorization of reduction loops.
 * @library /test/lib /
 * @run driver compiler.loopopts.superword.TestReductions P2
@ -455,7 +455,13 @@ public class TestReductions {

    // ---------byte***Simple ------------------------------------------------------------
    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.AND_REDUCTION_V, "> 0",
+                  IRNode.AND_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteAndSimple() {
        byte acc = (byte)0xFF; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -466,7 +472,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.OR_REDUCTION_V, "> 0",
+                  IRNode.OR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteOrSimple() {
        byte acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -477,7 +489,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.XOR_REDUCTION_V, "> 0",
+                  IRNode.XOR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteXorSimple() {
        byte acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -510,7 +528,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.MIN_REDUCTION_V, "> 0",
+                  IRNode.MIN_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteMinSimple() {
        byte acc = Byte.MAX_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -521,7 +545,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.MAX_REDUCTION_V, "> 0",
+                  IRNode.MAX_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteMaxSimple() {
        byte acc = Byte.MIN_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -533,7 +563,13 @@ public class TestReductions {

    // ---------byte***DotProduct ------------------------------------------------------------
    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.AND_REDUCTION_V, "> 0",
+                  IRNode.AND_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteAndDotProduct() {
        byte acc = (byte)0xFF; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -544,7 +580,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.OR_REDUCTION_V, "> 0",
+                  IRNode.OR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteOrDotProduct() {
        byte acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -555,7 +597,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.XOR_REDUCTION_V, "> 0",
+                  IRNode.XOR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteXorDotProduct() {
        byte acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -588,7 +636,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.MIN_REDUCTION_V, "> 0",
+                  IRNode.MIN_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteMinDotProduct() {
        byte acc = Byte.MAX_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -599,7 +653,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.MAX_REDUCTION_V, "> 0",
+                  IRNode.MAX_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteMaxDotProduct() {
        byte acc = Byte.MIN_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -611,7 +671,13 @@ public class TestReductions {

    // ---------byte***Big ------------------------------------------------------------
    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.AND_REDUCTION_V, "> 0",
+                  IRNode.AND_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteAndBig() {
        byte acc = (byte)0xFF; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -622,7 +688,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.OR_REDUCTION_V, "> 0",
+                  IRNode.OR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteOrBig() {
        byte acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -633,7 +705,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.XOR_REDUCTION_V, "> 0",
+                  IRNode.XOR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteXorBig() {
        byte acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -666,7 +744,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.MIN_REDUCTION_V, "> 0",
+                  IRNode.MIN_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteMinBig() {
        byte acc = Byte.MAX_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -677,7 +761,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                  IRNode.MAX_REDUCTION_V, "> 0",
+                  IRNode.MAX_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_B,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static byte byteMaxBig() {
        byte acc = Byte.MIN_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -923,7 +1013,13 @@ public class TestReductions {

    // ---------short***Simple ------------------------------------------------------------
    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.AND_REDUCTION_V, "> 0",
+                  IRNode.AND_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortAndSimple() {
        short acc = (short)0xFFFF; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -934,7 +1030,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.OR_REDUCTION_V, "> 0",
+                  IRNode.OR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortOrSimple() {
        short acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -945,7 +1047,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.XOR_REDUCTION_V, "> 0",
+                  IRNode.XOR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortXorSimple() {
        short acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -978,7 +1086,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.MIN_REDUCTION_V, "> 0",
+                  IRNode.MIN_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortMinSimple() {
        short acc = Short.MAX_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -989,7 +1103,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.MAX_REDUCTION_V, "> 0",
+                  IRNode.MAX_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortMaxSimple() {
        short acc = Short.MIN_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1001,7 +1121,13 @@ public class TestReductions {

    // ---------short***DotProduct ------------------------------------------------------------
    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.AND_REDUCTION_V, "> 0",
+                  IRNode.AND_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortAndDotProduct() {
        short acc = (short)0xFFFF; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1012,7 +1138,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.OR_REDUCTION_V, "> 0",
+                  IRNode.OR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortOrDotProduct() {
        short acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1023,7 +1155,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.XOR_REDUCTION_V, "> 0",
+                  IRNode.XOR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortXorDotProduct() {
        short acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1056,7 +1194,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.MIN_REDUCTION_V, "> 0",
+                  IRNode.MIN_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortMinDotProduct() {
        short acc = Short.MAX_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1067,7 +1211,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.MAX_REDUCTION_V, "> 0",
+                  IRNode.MAX_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortMaxDotProduct() {
        short acc = Short.MIN_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1079,7 +1229,13 @@ public class TestReductions {

    // ---------short***Big ------------------------------------------------------------
    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.AND_REDUCTION_V, "> 0",
+                  IRNode.AND_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortAndBig() {
        short acc = (short)0xFFFF; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1090,7 +1246,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.OR_REDUCTION_V, "> 0",
+                  IRNode.OR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortOrBig() {
        short acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1101,7 +1263,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.XOR_REDUCTION_V, "> 0",
+                  IRNode.XOR_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortXorBig() {
        short acc = 0; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1134,7 +1302,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.MIN_REDUCTION_V, "> 0",
+                  IRNode.MIN_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortMinBig() {
        short acc = Short.MAX_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
@ -1145,7 +1319,13 @@ public class TestReductions {
    }

    @Test
-    @IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
+    @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                  IRNode.MAX_REDUCTION_V, "> 0",
+                  IRNode.MAX_VI,          "> 0"},
+        applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
+        applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
+    @IR(failOn = IRNode.LOAD_VECTOR_S,
+        applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
    private static short shortMaxBig() {
        short acc = Short.MIN_VALUE; // neutral element
        for (int i = 0; i < SIZE; i++) {
--- a/test/hotspot/jtreg/compiler/vectorization/TestRotateByteAndShortVector.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestRotateByteAndShortVector.java
@ -1,6 +1,7 @@
 /*
 * Copyright (c) 2022, 2025 Loongson Technology Co. Ltd. All rights reserved.
 * Copyright (c) 2025, Rivos Inc. All rights reserved.
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -24,7 +25,7 @@

 /**
 * @test
- * @bug 8286847 8353600
+ * @bug 8286847 8353600 8342095
 * @key randomness
 * @summary Test vectorization of rotate byte and short
 * @library /test/lib /
@ -116,11 +117,10 @@ public class TestRotateByteAndShortVector {
        }
    }

-    // NOTE: currently, there is no platform supporting RotateLeftV/RotateRightV intrinsic.
-    // If there is some implementation, it could probably in a wrong way which is different
-    // from what java language spec expects.
    @Test
-    @IR(failOn = { IRNode.ROTATE_LEFT_V })
+    @IR(counts = { IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                   IRNode.ROTATE_LEFT_V, "> 0" },
+        applyIfCPUFeature = {"avx512f", "true"})
    @IR(failOn = { IRNode.ROTATE_RIGHT_V })
    static void testRotateLeftByte(byte[] test, byte[] arr, int shift) {
        for (int i = 0; i < ARRLEN; i++) {
@ -130,7 +130,9 @@ public class TestRotateByteAndShortVector {

    @Test
    @IR(failOn = { IRNode.ROTATE_LEFT_V })
-    @IR(failOn = { IRNode.ROTATE_RIGHT_V })
+    @IR(counts = { IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
+                   IRNode.ROTATE_RIGHT_V, "> 0" },
+        applyIfCPUFeature = {"avx512f", "true"})
    static void testRotateRightByte(byte[] test, byte[] arr, int shift) {
        for (int i = 0; i < ARRLEN; i++) {
            test[i] = (byte) ((arr[i] >>> shift) | (arr[i] << -shift));
@ -138,7 +140,9 @@ public class TestRotateByteAndShortVector {
    }

    @Test
-    @IR(failOn = { IRNode.ROTATE_LEFT_V })
+    @IR(counts = { IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                   IRNode.ROTATE_LEFT_V, "> 0" },
+        applyIfCPUFeature = {"avx512f", "true"})
    @IR(failOn = { IRNode.ROTATE_RIGHT_V })
    static void testRotateLeftShort(short[] test, short[] arr, int shift) {
        for (int i = 0; i < ARRLEN; i++) {
@ -148,7 +152,9 @@ public class TestRotateByteAndShortVector {

    @Test
    @IR(failOn = { IRNode.ROTATE_LEFT_V })
-    @IR(failOn = { IRNode.ROTATE_RIGHT_V })
+    @IR(counts = { IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
+                   IRNode.ROTATE_RIGHT_V, "> 0" },
+        applyIfCPUFeature = {"avx512f", "true"})
    static void testRotateRightShort(short[] test, short[] arr, int shift) {
        for (int i = 0; i < ARRLEN; i++) {
            test[i] = (short) ((arr[i] >>> shift) | (arr[i] << -shift));
--- a/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestSubwordTruncation.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved.g
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -29,7 +29,7 @@ import compiler.lib.generators.*;

 /*
 * @test
- * @bug 8350177 8362171 8369881
+ * @bug 8350177 8362171 8369881 8342095
 * @summary Ensure that truncation of subword vectors produces correct results
 * @library /test/lib /
 * @run driver compiler.vectorization.TestSubwordTruncation
@ -73,7 +73,8 @@ public class TestSubwordTruncation {
    // Shorts

    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupShortArray")
    public Object[] testShortLeadingZeros(short[] in) {
        short[] res = new short[SIZE];
@ -98,7 +99,8 @@ public class TestSubwordTruncation {
    }

    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupShortArray")
    public Object[] testShortTrailingZeros(short[] in) {
        short[] res = new short[SIZE];
@ -123,7 +125,8 @@ public class TestSubwordTruncation {
    }

    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupShortArray")
    public Object[] testShortReverse(short[] in) {
        short[] res = new short[SIZE];
@ -148,7 +151,8 @@ public class TestSubwordTruncation {
    }

    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupShortArray")
    public Object[] testShortBitCount(short[] in) {
        short[] res = new short[SIZE];
@ -277,7 +281,8 @@ public class TestSubwordTruncation {
    // Bytes

    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupByteArray")
    public Object[] testByteLeadingZeros(byte[] in) {
        byte[] res = new byte[SIZE];
@ -302,7 +307,8 @@ public class TestSubwordTruncation {
    }

    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupByteArray")
    public Object[] testByteTrailingZeros(byte[] in) {
        byte[] res = new byte[SIZE];
@ -327,7 +333,8 @@ public class TestSubwordTruncation {
    }

    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupByteArray")
    public Object[] testByteReverse(byte[] in) {
        byte[] res = new byte[SIZE];
@ -403,7 +410,8 @@ public class TestSubwordTruncation {


    @Test
-    @IR(counts = { IRNode.STORE_VECTOR, "=0" })
+    @IR(counts = { IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0" },
+        applyIfCPUFeatureOr = { "avx2", "true", "asimd", "true" })
    @Arguments(setup = "setupByteArray")
    public Object[] testByteBitCount(byte[] in) {
        byte[] res = new byte[SIZE];
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayShiftOpTest.java
@ -247,9 +247,8 @@ public class ArrayShiftOpTest extends VectorizationTestRunner {
    }

    @Test
-    // Note that right shift operations on subword expressions cannot be
-    // vectorized since precise type info about signedness is missing.
-    @IR(failOn = {IRNode.STORE_VECTOR})
+    @IR(applyIfCPUFeature = {"avx", "true"},
+            counts = {IRNode.RSHIFT_VI, ">0"})
    public short[] subwordExpressionRightShift() {
        short[] res = new short[SIZE];
        for (int i = 0; i < SIZE; i++) {
--- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
- * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -24,6 +24,7 @@

 /*
 * @test
+ * @bug 8183390 8340010 8342095
 * @summary Vectorization test on array type conversions
 * @library /test/lib /
 *
@ -108,10 +109,9 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {

    // ---------------- Integer Extension ----------------
    @Test
-    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
-    // Assert the vectorization failure so that we are reminded to update
-    // the test when this limitation is addressed in the future.
+    @IR(applyIfCPUFeature = { "avx", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_S2I, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" })
    public int[] signExtension() {
        int[] res = new int[SIZE];
        for (int i = 0; i < SIZE; i++) {
@ -122,7 +122,7 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {

    @Test
    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
+    // Subword vector casts with char do not work currently, see JDK-8349562.
    // Assert the vectorization failure so that we are reminded to update
    // the test when this limitation is addressed in the future.
    public int[] zeroExtension() {
@ -134,10 +134,9 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
    }

    @Test
-    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
-    // Assert the vectorization failure so that we are reminded to update
-    // the test when this limitation is addressed in the future.
+    @IR(applyIfCPUFeature = { "avx", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" })
    public int[] signExtensionFromByte() {
        int[] res = new int[SIZE];
        for (int i = 0; i < SIZE; i++) {
@ -146,12 +145,23 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
        return res;
    }

+    @Test
+    @IR(applyIfCPUFeature = { "avx", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_B2S, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" })
+    public short[] signExtensionFromByteToShort() {
+        short[] res = new short[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = bytes[i];
+        }
+        return res;
+    }
+
    // ---------------- Integer Narrow ----------------
    @Test
-    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
-    // Assert the vectorization failure so that we are reminded to update
-    // the test when this limitation is addressed in the future.
+    @IR(applyIfCPUFeature = { "avx", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" })
    public short[] narrowToSigned() {
        short[] res = new short[SIZE];
        for (int i = 0; i < SIZE; i++) {
@ -161,10 +171,9 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
    }

    @Test
-    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
-    // Assert the vectorization failure so that we are reminded to update
-    // the test when this limitation is addressed in the future.
+    @IR(applyIfCPUFeature = { "avx", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_char)", ">0" })
    public char[] narrowToUnsigned() {
        char[] res = new char[SIZE];
        for (int i = 0; i < SIZE; i++) {
@ -174,11 +183,10 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
    }

    @Test
-    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
-    // Assert the vectorization failure so that we are reminded to update
-    // the test when this limitation is addressed in the future.
-    public byte[] NarrowToByte() {
+    @IR(applyIfCPUFeature = { "avx", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_I2B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", ">0" })
+    public byte[] narrowToByte() {
        byte[] res = new byte[SIZE];
        for (int i = 0; i < SIZE; i++) {
            res[i] = (byte) ints[i];
@ -186,6 +194,18 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
        return res;
    }

+    @Test
+    @IR(applyIfCPUFeature = { "avx", "true" },
+        applyIfOr = {"AlignVector", "false", "UseCompactObjectHeaders", "false"},
+        counts = { IRNode.VECTOR_CAST_S2B, IRNode.VECTOR_SIZE + "min(max_short, max_byte)", ">0" })
+    public byte[] narrowShortToByte() {
+        byte[] res = new byte[SIZE];
+        for (int i = 0; i < SIZE; i++) {
+            res[i] = (byte) shorts[i];
+        }
+        return res;
+    }
+
    // ---------------- Convert I/L to F/D ----------------
    @Test
    @IR(applyIfCPUFeatureOr = {"asimd", "true", "avx", "true", "rvv", "true"},
@ -268,7 +288,7 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {

    @Test
    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
+    // Subword vector casts with char do not work currently, see JDK-8349562.
    // Assert the vectorization failure so that we are reminded to update
    // the test when this limitation is addressed in the future.
    public float[] convertCharToFloat() {
@ -281,7 +301,7 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {

    @Test
    @IR(failOn = {IRNode.STORE_VECTOR})
-    // Subword vector casts do not work currently, see JDK-8342095.
+    // Subword vector casts with char do not work currently, see JDK-8349562.
    // Assert the vectorization failure so that we are reminded to update
    // the test when this limitation is addressed in the future.
    public double[] convertCharToDouble() {
--- a/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java
+++ b/test/hotspot/jtreg/compiler/vectorization/runner/BasicShortOpTest.java
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,6 +24,7 @@

 /*
 * @test
+ * @bug 8183390 8342095
 * @summary Vectorization test on basic short operations
 * @library /test/lib /
 *
@ -210,10 +212,10 @@ public class BasicShortOpTest extends VectorizationTestRunner {
        return res;
    }

+    // Min/Max vectorization requires a cast from subword to int and back to subword, to avoid losing the higher order bits.
+
    @Test
-    // Note that min operations on subword types cannot be vectorized
-    // because higher bits will be lost.
-    @IR(failOn = {IRNode.STORE_VECTOR})
+    @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" })
    public short[] vectorMin() {
        short[] res = new short[SIZE];
        for (int i = 0; i < SIZE; i++) {
@ -223,9 +225,7 @@ public class BasicShortOpTest extends VectorizationTestRunner {
    }

    @Test
-    // Note that max operations on subword types cannot be vectorized
-    // because higher bits will be lost.
-    @IR(failOn = {IRNode.STORE_VECTOR})
+    @IR(applyIfCPUFeature = { "avx", "true" }, counts = { IRNode.VECTOR_CAST_I2S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", ">0" })
    public short[] vectorMax() {
        short[] res = new short[SIZE];
        for (int i = 0; i < SIZE; i++) {
--- a/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/VectorSubword.java
@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package org.openjdk.bench.vm.compiler;
+
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.*;
+
+import java.util.concurrent.TimeUnit;
+import java.util.Random;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Thread)
+@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS)
+@Fork(value = 3)
+public class VectorSubword {
+    @Param({"1024"})
+    public int SIZE;
+
+    private byte[] bytes;
+    private short[] shorts;
+    private char[] chars;
+    private int[] ints;
+    private long[] longs;
+
+    @Setup
+    public void init() {
+        bytes = new byte[SIZE];
+        shorts = new short[SIZE];
+        chars = new char[SIZE];
+        ints = new int[SIZE];
+        longs = new long[SIZE];
+    }
+
+    // Narrowing
+
+    @Benchmark
+    public void shortToByte() {
+        for (int i = 0; i < SIZE; i++) {
+            bytes[i] = (byte) shorts[i];
+        }
+    }
+
+    @Benchmark
+    public void shortToChar() {
+        for (int i = 0; i < SIZE; i++) {
+            chars[i] = (char) shorts[i];
+        }
+    }
+
+    @Benchmark
+    public void charToByte() {
+        for (int i = 0; i < SIZE; i++) {
+            bytes[i] = (byte) chars[i];
+        }
+    }
+
+    @Benchmark
+    public void charToShort() {
+        for (int i = 0; i < SIZE; i++) {
+            shorts[i] = (short) chars[i];
+        }
+    }
+
+    @Benchmark
+    public void intToByte() {
+        for (int i = 0; i < SIZE; i++) {
+            bytes[i] = (byte) ints[i];
+        }
+    }
+
+    @Benchmark
+    public void intToShort() {
+        for (int i = 0; i < SIZE; i++) {
+            shorts[i] = (short) ints[i];
+        }
+    }
+
+    @Benchmark
+    public void intToChar() {
+        for (int i = 0; i < SIZE; i++) {
+            chars[i] = (char) ints[i];
+        }
+    }
+
+    @Benchmark
+    public void longToByte() {
+        for (int i = 0; i < SIZE; i++) {
+            bytes[i] = (byte) longs[i];
+        }
+    }
+
+    @Benchmark
+    public void longToShort() {
+        for (int i = 0; i < SIZE; i++) {
+            shorts[i] = (short) longs[i];
+        }
+    }
+
+    @Benchmark
+    public void longToChar() {
+        for (int i = 0; i < SIZE; i++) {
+            chars[i] = (char) longs[i];
+        }
+    }
+
+    @Benchmark
+    public void longToInt() {
+        for (int i = 0; i < SIZE; i++) {
+            ints[i] = (int) longs[i];
+        }
+    }
+
+    // Widening
+
+    @Benchmark
+    public void byteToShort() {
+        for (int i = 0; i < SIZE; i++) {
+            shorts[i] = bytes[i];
+        }
+    }
+
+    @Benchmark
+    public void byteToChar() {
+        for (int i = 0; i < SIZE; i++) {
+            chars[i] = (char) bytes[i];
+        }
+    }
+
+    @Benchmark
+    public void byteToInt() {
+        for (int i = 0; i < SIZE; i++) {
+            ints[i] = bytes[i];
+        }
+    }
+
+    @Benchmark
+    public void byteToLong() {
+        for (int i = 0; i < SIZE; i++) {
+            longs[i] = bytes[i];
+        }
+    }
+
+    @Benchmark
+    public void shortToInt() {
+        for (int i = 0; i < SIZE; i++) {
+            ints[i] = shorts[i];
+        }
+    }
+
+    @Benchmark
+    public void shortToLong() {
+        for (int i = 0; i < SIZE; i++) {
+            longs[i] = shorts[i];
+        }
+    }
+
+    @Benchmark
+    public void charToInt() {
+        for (int i = 0; i < SIZE; i++) {
+            ints[i] = chars[i];
+        }
+    }
+
+    @Benchmark
+    public void charToLong() {
+        for (int i = 0; i < SIZE; i++) {
+            longs[i] = chars[i];
+        }
+    }
+
+    @Benchmark
+    public void intToLong() {
+        for (int i = 0; i < SIZE; i++) {
+            longs[i] = ints[i];
+        }
+    }
+
+}