add hashCodeB test and benchmark

This commit is contained in:
Emanuel Peter 2026-01-16 17:00:47 +01:00
parent 4ecbfdd382
commit 27c452ce97
4 changed files with 184 additions and 0 deletions

View File

@ -87,6 +87,8 @@ public class TestVectorAlgorithms {
float[] aF;
float[] bF;
byte[] aB;
int[] oopsX4;
int[] memX4;
@ -141,6 +143,11 @@ public class TestVectorAlgorithms {
testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive", () -> { return dotProductF_VectorAPI_naive(aF, bF); });
testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", () -> { return dotProductF_VectorAPI_reduction_after_loop(aF, bF); });
testGroups.put("hashCodeB", new HashMap<String,TestFunction>());
testGroups.get("hashCodeB").put("hashCodeB_loop", () -> { return hashCodeB_loop(aB); });
testGroups.get("hashCodeB").put("hashCodeB_Arrays", () -> { return hashCodeB_Arrays(aB); });
testGroups.get("hashCodeB").put("hashCodeB_VectorAPI_v1", () -> { return hashCodeB_VectorAPI_v1(aB); });
testGroups.put("scanAddI", new HashMap<String,TestFunction>());
testGroups.get("scanAddI").put("scanAddI_loop", () -> { return scanAddI_loop(aI, rI1); });
testGroups.get("scanAddI").put("scanAddI_loop_reassociate", () -> { return scanAddI_loop_reassociate(aI, rI2); });
@ -185,6 +192,9 @@ public class TestVectorAlgorithms {
"dotProductF_loop",
"dotProductF_VectorAPI_naive",
"dotProductF_VectorAPI_reduction_after_loop",
"hashCodeB_loop",
"hashCodeB_Arrays",
"hashCodeB_VectorAPI_v1",
"scanAddI_loop",
"scanAddI_loop_reassociate",
"scanAddI_VectorAPI_permute_add",
@ -236,6 +246,9 @@ public class TestVectorAlgorithms {
bF[i] = RANDOM.nextInt(32) - 16;
}
aB = new byte[size];
RANDOM.nextBytes(aB);
// Run all tests
for (Map.Entry<String, Map<String,TestFunction>> group_entry : testGroups.entrySet()) {
String group_name = group_entry.getKey();
@ -409,6 +422,27 @@ public class TestVectorAlgorithms {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b);
}
@Test
public int hashCodeB_loop(byte[] a) {
return VectorAlgorithmsImpl.hashCodeB_loop(a);
}
@Test
public int hashCodeB_Arrays(byte[] a) {
return VectorAlgorithmsImpl.hashCodeB_Arrays(a);
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
IRNode.MUL_VI, IRNode.VECTOR_SIZE_8, "> 0",
IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE_8, "> 0",
IRNode.ADD_VI, IRNode.VECTOR_SIZE_8, "> 0",
IRNode.ADD_REDUCTION_VI, "> 0"},
applyIfCPUFeature = {"avx2", "true"})
public int hashCodeB_VectorAPI_v1(byte[] a) {
return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(a);
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.ADD_REDUCTION_VI, "> 0",

View File

@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
public class VectorAlgorithmsImpl {
private static final VectorSpecies<Integer> SPECIES_I = IntVector.SPECIES_PREFERRED;
private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
private static final VectorSpecies<Byte> SPECIES_B64 = ByteVector.SPECIES_64;
private static final VectorSpecies<Float> SPECIES_F = FloatVector.SPECIES_PREFERRED;
public static Object fillI_loop(int[] r) {
@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
return sum;
}
public static int hashCodeB_loop(byte[] a) {
int h = 1;
for (int i = 0; i < a.length; i++) {
h = 31 * h + a[i];
}
return h;
}
public static int hashCodeB_Arrays(byte[] a) {
return Arrays.hashCode(a);
}
// Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
//
// Ideas that may help understand the code:
//
// h(i) = 31 * h(i-1) + a[i]
// "unroll" by factor of L=8:
// h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
// ----------- ------------------------------------------------
// scalar vector: notice the powers of 31 in reverse
//
// We notice that we can load a[i+1 .. i+8], then element-wise multiply with
// the vector of reversed powers-of-31, and then do reduceLanes(ADD).
// But we can do even better: By looking at multiple such 8-unrolled iterations.
// Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
// already apply it element-wise. That allows us to move the reduction out
// of the loop.
//
// Note: the intrinsic additionally unrolls the loop by a factor of 4,
// but we want to keep thins simple for demonstration purposes.
//
private static int[] REVERSE_POWERS_OF_31 = new int[9];
static {
int p = 1;
for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
REVERSE_POWERS_OF_31[i] = p;
p *= 31;
}
}
public static int hashCodeB_VectorAPI_v1(byte[] a) {
int result = 1; // initialValue
var vresult = IntVector.zero(SPECIES_I256);
int next = REVERSE_POWERS_OF_31[0]; // 31^L
var vnext = IntVector.broadcast(SPECIES_I256, next);
var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
int i;
for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
// scalar part: result *= 31^L
result *= next;
// vector part: element-wise apply the next factor and add in the new values.
var vb = ByteVector.fromArray(SPECIES_B64, a, i);
var vi = vb.castShape(SPECIES_I256, 0);
vresult = vresult.mul(vnext).add(vi);
}
// reduce the partial hashes in the elements, using the reverse list of powers of 2.
result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
for (; i < a.length; i++) {
result = 31 * result + a[i];
}
return result;
}
public static Object scanAddI_loop(int[] a, int[] r) {
int sum = 0;
for (int i = 0; i < a.length; i++) {

View File

@ -76,6 +76,8 @@ public class VectorAlgorithms {
public static float[] aF;
public static float[] bF;
byte[] aB;
@Setup
public void init() {
RANDOM = new Random(SEED);
@ -90,6 +92,9 @@ public class VectorAlgorithms {
aF = new float[SIZE];
bF = new float[SIZE];
aB = new byte[SIZE];
RANDOM.nextBytes(aB);
}
@Setup(Level.Iteration)
@ -206,6 +211,21 @@ public class VectorAlgorithms {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(aF, bF);
}
@Benchmark
public int hashCodeB_loop() {
return VectorAlgorithmsImpl.hashCodeB_loop(aB);
}
@Benchmark
public int hashCodeB_Arrays() {
return VectorAlgorithmsImpl.hashCodeB_Arrays(aB);
}
@Benchmark
public int hashCodeB_VectorAPI_v1() {
return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(aB);
}
@Benchmark
public Object scanAddI_loop() {
return VectorAlgorithmsImpl.scanAddI_loop(aI, rI);

View File

@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
public class VectorAlgorithmsImpl {
private static final VectorSpecies<Integer> SPECIES_I = IntVector.SPECIES_PREFERRED;
private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
private static final VectorSpecies<Byte> SPECIES_B64 = ByteVector.SPECIES_64;
private static final VectorSpecies<Float> SPECIES_F = FloatVector.SPECIES_PREFERRED;
public static Object fillI_loop(int[] r) {
@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
return sum;
}
public static int hashCodeB_loop(byte[] a) {
int h = 1;
for (int i = 0; i < a.length; i++) {
h = 31 * h + a[i];
}
return h;
}
public static int hashCodeB_Arrays(byte[] a) {
return Arrays.hashCode(a);
}
// Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
//
// Ideas that may help understand the code:
//
// h(i) = 31 * h(i-1) + a[i]
// "unroll" by factor of L=8:
// h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
// ----------- ------------------------------------------------
// scalar vector: notice the powers of 31 in reverse
//
// We notice that we can load a[i+1 .. i+8], then element-wise multiply with
// the vector of reversed powers-of-31, and then do reduceLanes(ADD).
// But we can do even better: By looking at multiple such 8-unrolled iterations.
// Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
// already apply it element-wise. That allows us to move the reduction out
// of the loop.
//
// Note: the intrinsic additionally unrolls the loop by a factor of 4,
// but we want to keep thins simple for demonstration purposes.
//
private static int[] REVERSE_POWERS_OF_31 = new int[9];
static {
int p = 1;
for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
REVERSE_POWERS_OF_31[i] = p;
p *= 31;
}
}
public static int hashCodeB_VectorAPI_v1(byte[] a) {
int result = 1; // initialValue
var vresult = IntVector.zero(SPECIES_I256);
int next = REVERSE_POWERS_OF_31[0]; // 31^L
var vnext = IntVector.broadcast(SPECIES_I256, next);
var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
int i;
for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
// scalar part: result *= 31^L
result *= next;
// vector part: element-wise apply the next factor and add in the new values.
var vb = ByteVector.fromArray(SPECIES_B64, a, i);
var vi = vb.castShape(SPECIES_I256, 0);
vresult = vresult.mul(vnext).add(vi);
}
// reduce the partial hashes in the elements, using the reverse list of powers of 2.
result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
for (; i < a.length; i++) {
result = 31 * result + a[i];
}
return result;
}
public static Object scanAddI_loop(int[] a, int[] r) {
int sum = 0;
for (int i = 0; i < a.length; i++) {