From 27c452ce9749ab96ff2940f85f855855ab0d247e Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Fri, 16 Jan 2026 17:00:47 +0100 Subject: [PATCH] add hashCodeB test and benchmark --- .../vectorization/TestVectorAlgorithms.java | 34 ++++++++++ .../vectorization/VectorAlgorithmsImpl.java | 65 +++++++++++++++++++ .../bench/vm/compiler/VectorAlgorithms.java | 20 ++++++ .../vm/compiler/VectorAlgorithmsImpl.java | 65 +++++++++++++++++++ 4 files changed, 184 insertions(+) diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java index 16b03ae542c..3f179cdca3d 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java @@ -87,6 +87,8 @@ public class TestVectorAlgorithms { float[] aF; float[] bF; + byte[] aB; + int[] oopsX4; int[] memX4; @@ -141,6 +143,11 @@ public class TestVectorAlgorithms { testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive", () -> { return dotProductF_VectorAPI_naive(aF, bF); }); testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", () -> { return dotProductF_VectorAPI_reduction_after_loop(aF, bF); }); + testGroups.put("hashCodeB", new HashMap()); + testGroups.get("hashCodeB").put("hashCodeB_loop", () -> { return hashCodeB_loop(aB); }); + testGroups.get("hashCodeB").put("hashCodeB_Arrays", () -> { return hashCodeB_Arrays(aB); }); + testGroups.get("hashCodeB").put("hashCodeB_VectorAPI_v1", () -> { return hashCodeB_VectorAPI_v1(aB); }); + testGroups.put("scanAddI", new HashMap()); testGroups.get("scanAddI").put("scanAddI_loop", () -> { return scanAddI_loop(aI, rI1); }); testGroups.get("scanAddI").put("scanAddI_loop_reassociate", () -> { return scanAddI_loop_reassociate(aI, rI2); }); @@ -185,6 +192,9 @@ public class TestVectorAlgorithms { "dotProductF_loop", "dotProductF_VectorAPI_naive", "dotProductF_VectorAPI_reduction_after_loop", + "hashCodeB_loop", + "hashCodeB_Arrays", + "hashCodeB_VectorAPI_v1", "scanAddI_loop", "scanAddI_loop_reassociate", "scanAddI_VectorAPI_permute_add", @@ -236,6 +246,9 @@ public class TestVectorAlgorithms { bF[i] = RANDOM.nextInt(32) - 16; } + aB = new byte[size]; + RANDOM.nextBytes(aB); + // Run all tests for (Map.Entry> group_entry : testGroups.entrySet()) { String group_name = group_entry.getKey(); @@ -409,6 +422,27 @@ public class TestVectorAlgorithms { return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b); } + @Test + public int hashCodeB_loop(byte[] a) { + return VectorAlgorithmsImpl.hashCodeB_loop(a); + } + + @Test + public int hashCodeB_Arrays(byte[] a) { + return VectorAlgorithmsImpl.hashCodeB_Arrays(a); + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", + IRNode.MUL_VI, IRNode.VECTOR_SIZE_8, "> 0", + IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE_8, "> 0", + IRNode.ADD_VI, IRNode.VECTOR_SIZE_8, "> 0", + IRNode.ADD_REDUCTION_VI, "> 0"}, + applyIfCPUFeature = {"avx2", "true"}) + public int hashCodeB_VectorAPI_v1(byte[] a) { + return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(a); + } + @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.ADD_REDUCTION_VI, "> 0", diff --git a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java index ff36b07f0f9..c480cb54005 100644 --- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java +++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java @@ -34,6 +34,8 @@ import jdk.incubator.vector.*; public class VectorAlgorithmsImpl { private static final VectorSpecies SPECIES_I = IntVector.SPECIES_PREFERRED; private static final VectorSpecies SPECIES_I512 = IntVector.SPECIES_512; + private static final VectorSpecies SPECIES_I256 = IntVector.SPECIES_256; + private static final VectorSpecies SPECIES_B64 = ByteVector.SPECIES_64; private static final VectorSpecies SPECIES_F = FloatVector.SPECIES_PREFERRED; public static Object fillI_loop(int[] r) { @@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl { return sum; } + public static int hashCodeB_loop(byte[] a) { + int h = 1; + for (int i = 0; i < a.length; i++) { + h = 31 * h + a[i]; + } + return h; + } + + public static int hashCodeB_Arrays(byte[] a) { + return Arrays.hashCode(a); + } + + // Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp + // + // Ideas that may help understand the code: + // + // h(i) = 31 * h(i-1) + a[i] + // "unroll" by factor of L=8: + // h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1 + // ----------- ------------------------------------------------ + // scalar vector: notice the powers of 31 in reverse + // + // We notice that we can load a[i+1 .. i+8], then element-wise multiply with + // the vector of reversed powers-of-31, and then do reduceLanes(ADD). + // But we can do even better: By looking at multiple such 8-unrolled iterations. + // Instead of applying the "next" factor of "31^8" to the reduced scalar, we can + // already apply it element-wise. That allows us to move the reduction out + // of the loop. + // + // Note: the intrinsic additionally unrolls the loop by a factor of 4, + // but we want to keep thins simple for demonstration purposes. + // + private static int[] REVERSE_POWERS_OF_31 = new int[9]; + static { + int p = 1; + for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) { + REVERSE_POWERS_OF_31[i] = p; + p *= 31; + } + } + public static int hashCodeB_VectorAPI_v1(byte[] a) { + int result = 1; // initialValue + var vresult = IntVector.zero(SPECIES_I256); + int next = REVERSE_POWERS_OF_31[0]; // 31^L + var vnext = IntVector.broadcast(SPECIES_I256, next); + var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse + int i; + for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) { + // scalar part: result *= 31^L + result *= next; + // vector part: element-wise apply the next factor and add in the new values. + var vb = ByteVector.fromArray(SPECIES_B64, a, i); + var vi = vb.castShape(SPECIES_I256, 0); + vresult = vresult.mul(vnext).add(vi); + } + // reduce the partial hashes in the elements, using the reverse list of powers of 2. + result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD); + for (; i < a.length; i++) { + result = 31 * result + a[i]; + } + return result; + } + public static Object scanAddI_loop(int[] a, int[] r) { int sum = 0; for (int i = 0; i < a.length; i++) { diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java index 26a02622897..879388de99a 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java @@ -76,6 +76,8 @@ public class VectorAlgorithms { public static float[] aF; public static float[] bF; + byte[] aB; + @Setup public void init() { RANDOM = new Random(SEED); @@ -90,6 +92,9 @@ public class VectorAlgorithms { aF = new float[SIZE]; bF = new float[SIZE]; + + aB = new byte[SIZE]; + RANDOM.nextBytes(aB); } @Setup(Level.Iteration) @@ -206,6 +211,21 @@ public class VectorAlgorithms { return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(aF, bF); } + @Benchmark + public int hashCodeB_loop() { + return VectorAlgorithmsImpl.hashCodeB_loop(aB); + } + + @Benchmark + public int hashCodeB_Arrays() { + return VectorAlgorithmsImpl.hashCodeB_Arrays(aB); + } + + @Benchmark + public int hashCodeB_VectorAPI_v1() { + return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(aB); + } + @Benchmark public Object scanAddI_loop() { return VectorAlgorithmsImpl.scanAddI_loop(aI, rI); diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java index fbeabef6fc0..aaec016d72d 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java @@ -34,6 +34,8 @@ import jdk.incubator.vector.*; public class VectorAlgorithmsImpl { private static final VectorSpecies SPECIES_I = IntVector.SPECIES_PREFERRED; private static final VectorSpecies SPECIES_I512 = IntVector.SPECIES_512; + private static final VectorSpecies SPECIES_I256 = IntVector.SPECIES_256; + private static final VectorSpecies SPECIES_B64 = ByteVector.SPECIES_64; private static final VectorSpecies SPECIES_F = FloatVector.SPECIES_PREFERRED; public static Object fillI_loop(int[] r) { @@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl { return sum; } + public static int hashCodeB_loop(byte[] a) { + int h = 1; + for (int i = 0; i < a.length; i++) { + h = 31 * h + a[i]; + } + return h; + } + + public static int hashCodeB_Arrays(byte[] a) { + return Arrays.hashCode(a); + } + + // Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp + // + // Ideas that may help understand the code: + // + // h(i) = 31 * h(i-1) + a[i] + // "unroll" by factor of L=8: + // h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1 + // ----------- ------------------------------------------------ + // scalar vector: notice the powers of 31 in reverse + // + // We notice that we can load a[i+1 .. i+8], then element-wise multiply with + // the vector of reversed powers-of-31, and then do reduceLanes(ADD). + // But we can do even better: By looking at multiple such 8-unrolled iterations. + // Instead of applying the "next" factor of "31^8" to the reduced scalar, we can + // already apply it element-wise. That allows us to move the reduction out + // of the loop. + // + // Note: the intrinsic additionally unrolls the loop by a factor of 4, + // but we want to keep thins simple for demonstration purposes. + // + private static int[] REVERSE_POWERS_OF_31 = new int[9]; + static { + int p = 1; + for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) { + REVERSE_POWERS_OF_31[i] = p; + p *= 31; + } + } + public static int hashCodeB_VectorAPI_v1(byte[] a) { + int result = 1; // initialValue + var vresult = IntVector.zero(SPECIES_I256); + int next = REVERSE_POWERS_OF_31[0]; // 31^L + var vnext = IntVector.broadcast(SPECIES_I256, next); + var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse + int i; + for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) { + // scalar part: result *= 31^L + result *= next; + // vector part: element-wise apply the next factor and add in the new values. + var vb = ByteVector.fromArray(SPECIES_B64, a, i); + var vi = vb.castShape(SPECIES_I256, 0); + vresult = vresult.mul(vnext).add(vi); + } + // reduce the partial hashes in the elements, using the reverse list of powers of 2. + result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD); + for (; i < a.length; i++) { + result = 31 * result + a[i]; + } + return result; + } + public static Object scanAddI_loop(int[] a, int[] r) { int sum = 0; for (int i = 0; i < a.length; i++) {