add hashCodeB test and benchmark

2026-01-28 03:58:21 +00:00 · 2026-01-16 17:00:47 +01:00 · 2026-01-16 17:00:47 +01:00 · 27c452ce97
commit 27c452ce97
parent 4ecbfdd382
4 changed files with 184 additions and 0 deletions
--- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java
@ -87,6 +87,8 @@ public class TestVectorAlgorithms {
    float[] aF;
    float[] bF;

+    byte[] aB;
+
    int[] oopsX4;
    int[] memX4;

@ -141,6 +143,11 @@ public class TestVectorAlgorithms {
        testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive",                () -> { return dotProductF_VectorAPI_naive(aF, bF); });
        testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", () -> { return dotProductF_VectorAPI_reduction_after_loop(aF, bF); });

+        testGroups.put("hashCodeB", new HashMap<String,TestFunction>());
+        testGroups.get("hashCodeB").put("hashCodeB_loop",         () -> { return hashCodeB_loop(aB); });
+        testGroups.get("hashCodeB").put("hashCodeB_Arrays",       () -> { return hashCodeB_Arrays(aB); });
+        testGroups.get("hashCodeB").put("hashCodeB_VectorAPI_v1", () -> { return hashCodeB_VectorAPI_v1(aB); });
+
        testGroups.put("scanAddI", new HashMap<String,TestFunction>());
        testGroups.get("scanAddI").put("scanAddI_loop",                      () -> { return scanAddI_loop(aI, rI1); });
        testGroups.get("scanAddI").put("scanAddI_loop_reassociate",          () -> { return scanAddI_loop_reassociate(aI, rI2); });
@ -185,6 +192,9 @@ public class TestVectorAlgorithms {
                 "dotProductF_loop",
                 "dotProductF_VectorAPI_naive",
                 "dotProductF_VectorAPI_reduction_after_loop",
+                 "hashCodeB_loop",
+                 "hashCodeB_Arrays",
+                 "hashCodeB_VectorAPI_v1",
                 "scanAddI_loop",
                 "scanAddI_loop_reassociate",
                 "scanAddI_VectorAPI_permute_add",
@ -236,6 +246,9 @@ public class TestVectorAlgorithms {
                bF[i] = RANDOM.nextInt(32) - 16;
            }

+            aB = new byte[size];
+            RANDOM.nextBytes(aB);
+
            // Run all tests
            for (Map.Entry<String, Map<String,TestFunction>> group_entry : testGroups.entrySet()) {
                String group_name = group_entry.getKey();
@ -409,6 +422,27 @@ public class TestVectorAlgorithms {
        return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b);
    }

+    @Test
+    public int hashCodeB_loop(byte[] a) {
+        return VectorAlgorithmsImpl.hashCodeB_loop(a);
+    }
+
+    @Test
+    public int hashCodeB_Arrays(byte[] a) {
+        return VectorAlgorithmsImpl.hashCodeB_Arrays(a);
+    }
+
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR_B,    IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.MUL_VI,           IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.VECTOR_CAST_B2I,  IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.ADD_VI,           IRNode.VECTOR_SIZE_8, "> 0",
+                  IRNode.ADD_REDUCTION_VI,                       "> 0"},
+        applyIfCPUFeature = {"avx2", "true"})
+    public int hashCodeB_VectorAPI_v1(byte[] a) {
+        return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(a);
+    }
+
    @Test
    @IR(counts = {IRNode.LOAD_VECTOR_I,    "> 0",
                  IRNode.ADD_REDUCTION_VI, "> 0",
--- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java
+++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java
@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
 public class VectorAlgorithmsImpl {
    private static final VectorSpecies<Integer> SPECIES_I    = IntVector.SPECIES_PREFERRED;
    private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
+    private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
+    private static final VectorSpecies<Byte> SPECIES_B64     = ByteVector.SPECIES_64;
    private static final VectorSpecies<Float> SPECIES_F      = FloatVector.SPECIES_PREFERRED;

    public static Object fillI_loop(int[] r) {
@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
        return sum;
    }

+    public static int hashCodeB_loop(byte[] a) {
+        int h = 1;
+        for (int i = 0; i < a.length; i++) {
+            h = 31 * h + a[i];
+        }
+        return h;
+    }
+
+    public static int hashCodeB_Arrays(byte[] a) {
+        return Arrays.hashCode(a);
+    }
+
+    // Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
+    //
+    // Ideas that may help understand the code:
+    //
+    // h(i) = 31 * h(i-1) + a[i]
+    // "unroll" by factor of L=8:
+    // h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
+    //          -----------   ------------------------------------------------
+    //          scalar        vector: notice the powers of 31 in reverse
+    //
+    // We notice that we can load a[i+1 .. i+8], then element-wise multiply with
+    // the vector of reversed powers-of-31, and then do reduceLanes(ADD).
+    // But we can do even better: By looking at multiple such 8-unrolled iterations.
+    // Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
+    // already apply it element-wise. That allows us to move the reduction out
+    // of the loop.
+    //
+    // Note: the intrinsic additionally unrolls the loop by a factor of 4,
+    //       but we want to keep thins simple for demonstration purposes.
+    //
+    private static int[] REVERSE_POWERS_OF_31 = new int[9];
+    static {
+        int p = 1;
+        for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
+            REVERSE_POWERS_OF_31[i] = p;
+            p *= 31;
+        }
+    }
+    public static int hashCodeB_VectorAPI_v1(byte[] a) {
+        int result = 1; // initialValue
+        var vresult = IntVector.zero(SPECIES_I256);
+        int next = REVERSE_POWERS_OF_31[0]; // 31^L
+        var vnext = IntVector.broadcast(SPECIES_I256, next);
+        var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
+        int i;
+        for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
+            // scalar part: result *= 31^L
+            result *= next;
+            // vector part: element-wise apply the next factor and add in the new values.
+            var vb = ByteVector.fromArray(SPECIES_B64, a, i);
+            var vi = vb.castShape(SPECIES_I256, 0);
+            vresult = vresult.mul(vnext).add(vi);
+        }
+        // reduce the partial hashes in the elements, using the reverse list of powers of 2.
+        result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
+        for (; i < a.length; i++) {
+            result = 31 * result + a[i];
+        }
+        return result;
+    }
+
    public static Object scanAddI_loop(int[] a, int[] r) {
        int sum = 0;
        for (int i = 0; i < a.length; i++) {
--- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java
@ -76,6 +76,8 @@ public class VectorAlgorithms {
    public static float[] aF;
    public static float[] bF;

+    byte[] aB;
+
    @Setup
    public void init() {
        RANDOM = new Random(SEED);
@ -90,6 +92,9 @@ public class VectorAlgorithms {

        aF = new float[SIZE];
        bF = new float[SIZE];
+
+        aB = new byte[SIZE];
+        RANDOM.nextBytes(aB);
    }

    @Setup(Level.Iteration)
@ -206,6 +211,21 @@ public class VectorAlgorithms {
        return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(aF, bF);
    }

+    @Benchmark
+    public int hashCodeB_loop() {
+        return VectorAlgorithmsImpl.hashCodeB_loop(aB);
+    }
+
+    @Benchmark
+    public int hashCodeB_Arrays() {
+        return VectorAlgorithmsImpl.hashCodeB_Arrays(aB);
+    }
+
+    @Benchmark
+    public int hashCodeB_VectorAPI_v1() {
+        return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(aB);
+    }
+
    @Benchmark
    public Object scanAddI_loop() {
        return VectorAlgorithmsImpl.scanAddI_loop(aI, rI);
--- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java
@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
 public class VectorAlgorithmsImpl {
    private static final VectorSpecies<Integer> SPECIES_I    = IntVector.SPECIES_PREFERRED;
    private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
+    private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
+    private static final VectorSpecies<Byte> SPECIES_B64     = ByteVector.SPECIES_64;
    private static final VectorSpecies<Float> SPECIES_F      = FloatVector.SPECIES_PREFERRED;

    public static Object fillI_loop(int[] r) {
@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
        return sum;
    }

+    public static int hashCodeB_loop(byte[] a) {
+        int h = 1;
+        for (int i = 0; i < a.length; i++) {
+            h = 31 * h + a[i];
+        }
+        return h;
+    }
+
+    public static int hashCodeB_Arrays(byte[] a) {
+        return Arrays.hashCode(a);
+    }
+
+    // Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
+    //
+    // Ideas that may help understand the code:
+    //
+    // h(i) = 31 * h(i-1) + a[i]
+    // "unroll" by factor of L=8:
+    // h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
+    //          -----------   ------------------------------------------------
+    //          scalar        vector: notice the powers of 31 in reverse
+    //
+    // We notice that we can load a[i+1 .. i+8], then element-wise multiply with
+    // the vector of reversed powers-of-31, and then do reduceLanes(ADD).
+    // But we can do even better: By looking at multiple such 8-unrolled iterations.
+    // Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
+    // already apply it element-wise. That allows us to move the reduction out
+    // of the loop.
+    //
+    // Note: the intrinsic additionally unrolls the loop by a factor of 4,
+    //       but we want to keep thins simple for demonstration purposes.
+    //
+    private static int[] REVERSE_POWERS_OF_31 = new int[9];
+    static {
+        int p = 1;
+        for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
+            REVERSE_POWERS_OF_31[i] = p;
+            p *= 31;
+        }
+    }
+    public static int hashCodeB_VectorAPI_v1(byte[] a) {
+        int result = 1; // initialValue
+        var vresult = IntVector.zero(SPECIES_I256);
+        int next = REVERSE_POWERS_OF_31[0]; // 31^L
+        var vnext = IntVector.broadcast(SPECIES_I256, next);
+        var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
+        int i;
+        for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
+            // scalar part: result *= 31^L
+            result *= next;
+            // vector part: element-wise apply the next factor and add in the new values.
+            var vb = ByteVector.fromArray(SPECIES_B64, a, i);
+            var vi = vb.castShape(SPECIES_I256, 0);
+            vresult = vresult.mul(vnext).add(vi);
+        }
+        // reduce the partial hashes in the elements, using the reverse list of powers of 2.
+        result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
+        for (; i < a.length; i++) {
+            result = 31 * result + a[i];
+        }
+        return result;
+    }
+
    public static Object scanAddI_loop(int[] a, int[] r) {
        int sum = 0;
        for (int i = 0; i < a.length; i++) {