From 27c452ce9749ab96ff2940f85f855855ab0d247e Mon Sep 17 00:00:00 2001
From: Emanuel Peter <emanuel.peter@oracle.com>
Date: Fri, 16 Jan 2026 17:00:47 +0100
Subject: [PATCH] add hashCodeB test and benchmark

---
 .../vectorization/TestVectorAlgorithms.java   | 34 ++++++++++
 .../vectorization/VectorAlgorithmsImpl.java   | 65 +++++++++++++++++++
 .../bench/vm/compiler/VectorAlgorithms.java   | 20 ++++++
 .../vm/compiler/VectorAlgorithmsImpl.java     | 65 +++++++++++++++++++
 4 files changed, 184 insertions(+)

diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java
index 16b03ae542c..3f179cdca3d 100644
--- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java
@@ -87,6 +87,8 @@ public class TestVectorAlgorithms {
     float[] aF;
     float[] bF;
 
+    byte[] aB;
+
     int[] oopsX4;
     int[] memX4;
 
@@ -141,6 +143,11 @@ public class TestVectorAlgorithms {
         testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive",                () -> { return dotProductF_VectorAPI_naive(aF, bF); });
         testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", () -> { return dotProductF_VectorAPI_reduction_after_loop(aF, bF); });
 
+        testGroups.put("hashCodeB", new HashMap<String,TestFunction>());
+        testGroups.get("hashCodeB").put("hashCodeB_loop",         () -> { return hashCodeB_loop(aB); });
+        testGroups.get("hashCodeB").put("hashCodeB_Arrays",       () -> { return hashCodeB_Arrays(aB); });
+        testGroups.get("hashCodeB").put("hashCodeB_VectorAPI_v1", () -> { return hashCodeB_VectorAPI_v1(aB); });
+
         testGroups.put("scanAddI", new HashMap<String,TestFunction>());
         testGroups.get("scanAddI").put("scanAddI_loop",                      () -> { return scanAddI_loop(aI, rI1); });
         testGroups.get("scanAddI").put("scanAddI_loop_reassociate",          () -> { return scanAddI_loop_reassociate(aI, rI2); });
@@ -185,6 +192,9 @@ public class TestVectorAlgorithms {
                  "dotProductF_loop",
                  "dotProductF_VectorAPI_naive",
                  "dotProductF_VectorAPI_reduction_after_loop",
+                 "hashCodeB_loop",
+                 "hashCodeB_Arrays",
+                 "hashCodeB_VectorAPI_v1",
                  "scanAddI_loop",
                  "scanAddI_loop_reassociate",
                  "scanAddI_VectorAPI_permute_add",
@@ -236,6 +246,9 @@ public class TestVectorAlgorithms {
                 bF[i] = RANDOM.nextInt(32) - 16;
             }
 
+            aB = new byte[size];
+            RANDOM.nextBytes(aB);
+
             // Run all tests
             for (Map.Entry<String, Map<String,TestFunction>> group_entry : testGroups.entrySet()) {
                 String group_name = group_entry.getKey();
@@ -409,6 +422,27 @@ public class TestVectorAlgorithms {
         return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b);
     }
 
+    @Test
+    public int hashCodeB_loop(byte[] a) {
+        return VectorAlgorithmsImpl.hashCodeB_loop(a);
+    }
+
+    @Test
+    public int hashCodeB_Arrays(byte[] a) {
+        return VectorAlgorithmsImpl.hashCodeB_Arrays(a);
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_B,    IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.MUL_VI,           IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.VECTOR_CAST_B2I,  IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.ADD_VI,           IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.ADD_REDUCTION_VI,                       "> 0"},
+        applyIfCPUFeature = {"avx2", "true"})
+    public int hashCodeB_VectorAPI_v1(byte[] a) {
+        return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(a);
+    }
+
     @Test
     @IR(counts = {IRNode.LOAD_VECTOR_I,    "> 0",
                   IRNode.ADD_REDUCTION_VI, "> 0",
diff --git a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java
index ff36b07f0f9..c480cb54005 100644
--- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java
+++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java
@@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
 public class VectorAlgorithmsImpl {
     private static final VectorSpecies<Integer> SPECIES_I    = IntVector.SPECIES_PREFERRED;
     private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
+    private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
+    private static final VectorSpecies<Byte> SPECIES_B64     = ByteVector.SPECIES_64;
     private static final VectorSpecies<Float> SPECIES_F      = FloatVector.SPECIES_PREFERRED;
 
     public static Object fillI_loop(int[] r) {
@@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
         return sum;
     }
 
+    public static int hashCodeB_loop(byte[] a) {
+        int h = 1;
+        for (int i = 0; i < a.length; i++) {
+            h = 31 * h + a[i];
+        }
+        return h;
+    }
+
+    public static int hashCodeB_Arrays(byte[] a) {
+        return Arrays.hashCode(a);
+    }
+
+    // Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
+    //
+    // Ideas that may help understand the code:
+    //
+    // h(i) = 31 * h(i-1) + a[i]
+    // "unroll" by factor of L=8:
+    // h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
+    //          -----------   ------------------------------------------------
+    //          scalar        vector: notice the powers of 31 in reverse
+    //
+    // We notice that we can load a[i+1 .. i+8], then element-wise multiply with
+    // the vector of reversed powers-of-31, and then do reduceLanes(ADD).
+    // But we can do even better: By looking at multiple such 8-unrolled iterations.
+    // Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
+    // already apply it element-wise. That allows us to move the reduction out
+    // of the loop.
+    //
+    // Note: the intrinsic additionally unrolls the loop by a factor of 4,
+    //       but we want to keep thins simple for demonstration purposes.
+    //
+    private static int[] REVERSE_POWERS_OF_31 = new int[9];
+    static {
+        int p = 1;
+        for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
+            REVERSE_POWERS_OF_31[i] = p;
+            p *= 31;
+        }
+    }
+    public static int hashCodeB_VectorAPI_v1(byte[] a) {
+        int result = 1; // initialValue
+        var vresult = IntVector.zero(SPECIES_I256);
+        int next = REVERSE_POWERS_OF_31[0]; // 31^L
+        var vnext = IntVector.broadcast(SPECIES_I256, next);
+        var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
+        int i;
+        for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
+            // scalar part: result *= 31^L
+            result *= next;
+            // vector part: element-wise apply the next factor and add in the new values.
+            var vb = ByteVector.fromArray(SPECIES_B64, a, i);
+            var vi = vb.castShape(SPECIES_I256, 0);
+            vresult = vresult.mul(vnext).add(vi);
+        }
+        // reduce the partial hashes in the elements, using the reverse list of powers of 2.
+        result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
+        for (; i < a.length; i++) {
+            result = 31 * result + a[i];
+        }
+        return result;
+    }
+
     public static Object scanAddI_loop(int[] a, int[] r) {
         int sum = 0;
         for (int i = 0; i < a.length; i++) {
diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java
index 26a02622897..879388de99a 100644
--- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java
@@ -76,6 +76,8 @@ public class VectorAlgorithms {
     public static float[] aF;
     public static float[] bF;
 
+    byte[] aB;
+
     @Setup
     public void init() {
         RANDOM = new Random(SEED);
@@ -90,6 +92,9 @@ public class VectorAlgorithms {
 
         aF = new float[SIZE];
         bF = new float[SIZE];
+
+        aB = new byte[SIZE];
+        RANDOM.nextBytes(aB);
     }
 
     @Setup(Level.Iteration)
@@ -206,6 +211,21 @@ public class VectorAlgorithms {
         return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(aF, bF);
     }
 
+    @Benchmark
+    public int hashCodeB_loop() {
+        return VectorAlgorithmsImpl.hashCodeB_loop(aB);
+    }
+
+    @Benchmark
+    public int hashCodeB_Arrays() {
+        return VectorAlgorithmsImpl.hashCodeB_Arrays(aB);
+    }
+
+    @Benchmark
+    public int hashCodeB_VectorAPI_v1() {
+        return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(aB);
+    }
+
     @Benchmark
     public Object scanAddI_loop() {
         return VectorAlgorithmsImpl.scanAddI_loop(aI, rI);
diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java
index fbeabef6fc0..aaec016d72d 100644
--- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java
@@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
 public class VectorAlgorithmsImpl {
     private static final VectorSpecies<Integer> SPECIES_I    = IntVector.SPECIES_PREFERRED;
     private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
+    private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
+    private static final VectorSpecies<Byte> SPECIES_B64     = ByteVector.SPECIES_64;
     private static final VectorSpecies<Float> SPECIES_F      = FloatVector.SPECIES_PREFERRED;
 
     public static Object fillI_loop(int[] r) {
@@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
         return sum;
     }
 
+    public static int hashCodeB_loop(byte[] a) {
+        int h = 1;
+        for (int i = 0; i < a.length; i++) {
+            h = 31 * h + a[i];
+        }
+        return h;
+    }
+
+    public static int hashCodeB_Arrays(byte[] a) {
+        return Arrays.hashCode(a);
+    }
+
+    // Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
+    //
+    // Ideas that may help understand the code:
+    //
+    // h(i) = 31 * h(i-1) + a[i]
+    // "unroll" by factor of L=8:
+    // h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
+    //          -----------   ------------------------------------------------
+    //          scalar        vector: notice the powers of 31 in reverse
+    //
+    // We notice that we can load a[i+1 .. i+8], then element-wise multiply with
+    // the vector of reversed powers-of-31, and then do reduceLanes(ADD).
+    // But we can do even better: By looking at multiple such 8-unrolled iterations.
+    // Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
+    // already apply it element-wise. That allows us to move the reduction out
+    // of the loop.
+    //
+    // Note: the intrinsic additionally unrolls the loop by a factor of 4,
+    //       but we want to keep thins simple for demonstration purposes.
+    //
+    private static int[] REVERSE_POWERS_OF_31 = new int[9];
+    static {
+        int p = 1;
+        for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
+            REVERSE_POWERS_OF_31[i] = p;
+            p *= 31;
+        }
+    }
+    public static int hashCodeB_VectorAPI_v1(byte[] a) {
+        int result = 1; // initialValue
+        var vresult = IntVector.zero(SPECIES_I256);
+        int next = REVERSE_POWERS_OF_31[0]; // 31^L
+        var vnext = IntVector.broadcast(SPECIES_I256, next);
+        var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
+        int i;
+        for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
+            // scalar part: result *= 31^L
+            result *= next;
+            // vector part: element-wise apply the next factor and add in the new values.
+            var vb = ByteVector.fromArray(SPECIES_B64, a, i);
+            var vi = vb.castShape(SPECIES_I256, 0);
+            vresult = vresult.mul(vnext).add(vi);
+        }
+        // reduce the partial hashes in the elements, using the reverse list of powers of 2.
+        result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
+        for (; i < a.length; i++) {
+            result = 31 * result + a[i];
+        }
+        return result;
+    }
+
     public static Object scanAddI_loop(int[] a, int[] r) {
         int sum = 0;
         for (int i = 0; i < a.length; i++) {