mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 03:58:21 +00:00
add hashCodeB test and benchmark
This commit is contained in:
parent
4ecbfdd382
commit
27c452ce97
@ -87,6 +87,8 @@ public class TestVectorAlgorithms {
|
||||
float[] aF;
|
||||
float[] bF;
|
||||
|
||||
byte[] aB;
|
||||
|
||||
int[] oopsX4;
|
||||
int[] memX4;
|
||||
|
||||
@ -141,6 +143,11 @@ public class TestVectorAlgorithms {
|
||||
testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive", () -> { return dotProductF_VectorAPI_naive(aF, bF); });
|
||||
testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", () -> { return dotProductF_VectorAPI_reduction_after_loop(aF, bF); });
|
||||
|
||||
testGroups.put("hashCodeB", new HashMap<String,TestFunction>());
|
||||
testGroups.get("hashCodeB").put("hashCodeB_loop", () -> { return hashCodeB_loop(aB); });
|
||||
testGroups.get("hashCodeB").put("hashCodeB_Arrays", () -> { return hashCodeB_Arrays(aB); });
|
||||
testGroups.get("hashCodeB").put("hashCodeB_VectorAPI_v1", () -> { return hashCodeB_VectorAPI_v1(aB); });
|
||||
|
||||
testGroups.put("scanAddI", new HashMap<String,TestFunction>());
|
||||
testGroups.get("scanAddI").put("scanAddI_loop", () -> { return scanAddI_loop(aI, rI1); });
|
||||
testGroups.get("scanAddI").put("scanAddI_loop_reassociate", () -> { return scanAddI_loop_reassociate(aI, rI2); });
|
||||
@ -185,6 +192,9 @@ public class TestVectorAlgorithms {
|
||||
"dotProductF_loop",
|
||||
"dotProductF_VectorAPI_naive",
|
||||
"dotProductF_VectorAPI_reduction_after_loop",
|
||||
"hashCodeB_loop",
|
||||
"hashCodeB_Arrays",
|
||||
"hashCodeB_VectorAPI_v1",
|
||||
"scanAddI_loop",
|
||||
"scanAddI_loop_reassociate",
|
||||
"scanAddI_VectorAPI_permute_add",
|
||||
@ -236,6 +246,9 @@ public class TestVectorAlgorithms {
|
||||
bF[i] = RANDOM.nextInt(32) - 16;
|
||||
}
|
||||
|
||||
aB = new byte[size];
|
||||
RANDOM.nextBytes(aB);
|
||||
|
||||
// Run all tests
|
||||
for (Map.Entry<String, Map<String,TestFunction>> group_entry : testGroups.entrySet()) {
|
||||
String group_name = group_entry.getKey();
|
||||
@ -409,6 +422,27 @@ public class TestVectorAlgorithms {
|
||||
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b);
|
||||
}
|
||||
|
||||
@Test
|
||||
public int hashCodeB_loop(byte[] a) {
|
||||
return VectorAlgorithmsImpl.hashCodeB_loop(a);
|
||||
}
|
||||
|
||||
@Test
|
||||
public int hashCodeB_Arrays(byte[] a) {
|
||||
return VectorAlgorithmsImpl.hashCodeB_Arrays(a);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
|
||||
IRNode.MUL_VI, IRNode.VECTOR_SIZE_8, "> 0",
|
||||
IRNode.VECTOR_CAST_B2I, IRNode.VECTOR_SIZE_8, "> 0",
|
||||
IRNode.ADD_VI, IRNode.VECTOR_SIZE_8, "> 0",
|
||||
IRNode.ADD_REDUCTION_VI, "> 0"},
|
||||
applyIfCPUFeature = {"avx2", "true"})
|
||||
public int hashCodeB_VectorAPI_v1(byte[] a) {
|
||||
return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(a);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
||||
IRNode.ADD_REDUCTION_VI, "> 0",
|
||||
|
||||
@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
|
||||
public class VectorAlgorithmsImpl {
|
||||
private static final VectorSpecies<Integer> SPECIES_I = IntVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
|
||||
private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
|
||||
private static final VectorSpecies<Byte> SPECIES_B64 = ByteVector.SPECIES_64;
|
||||
private static final VectorSpecies<Float> SPECIES_F = FloatVector.SPECIES_PREFERRED;
|
||||
|
||||
public static Object fillI_loop(int[] r) {
|
||||
@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
|
||||
return sum;
|
||||
}
|
||||
|
||||
public static int hashCodeB_loop(byte[] a) {
|
||||
int h = 1;
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
h = 31 * h + a[i];
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
public static int hashCodeB_Arrays(byte[] a) {
|
||||
return Arrays.hashCode(a);
|
||||
}
|
||||
|
||||
// Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
|
||||
//
|
||||
// Ideas that may help understand the code:
|
||||
//
|
||||
// h(i) = 31 * h(i-1) + a[i]
|
||||
// "unroll" by factor of L=8:
|
||||
// h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
|
||||
// ----------- ------------------------------------------------
|
||||
// scalar vector: notice the powers of 31 in reverse
|
||||
//
|
||||
// We notice that we can load a[i+1 .. i+8], then element-wise multiply with
|
||||
// the vector of reversed powers-of-31, and then do reduceLanes(ADD).
|
||||
// But we can do even better: By looking at multiple such 8-unrolled iterations.
|
||||
// Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
|
||||
// already apply it element-wise. That allows us to move the reduction out
|
||||
// of the loop.
|
||||
//
|
||||
// Note: the intrinsic additionally unrolls the loop by a factor of 4,
|
||||
// but we want to keep thins simple for demonstration purposes.
|
||||
//
|
||||
private static int[] REVERSE_POWERS_OF_31 = new int[9];
|
||||
static {
|
||||
int p = 1;
|
||||
for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
|
||||
REVERSE_POWERS_OF_31[i] = p;
|
||||
p *= 31;
|
||||
}
|
||||
}
|
||||
public static int hashCodeB_VectorAPI_v1(byte[] a) {
|
||||
int result = 1; // initialValue
|
||||
var vresult = IntVector.zero(SPECIES_I256);
|
||||
int next = REVERSE_POWERS_OF_31[0]; // 31^L
|
||||
var vnext = IntVector.broadcast(SPECIES_I256, next);
|
||||
var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
|
||||
int i;
|
||||
for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
|
||||
// scalar part: result *= 31^L
|
||||
result *= next;
|
||||
// vector part: element-wise apply the next factor and add in the new values.
|
||||
var vb = ByteVector.fromArray(SPECIES_B64, a, i);
|
||||
var vi = vb.castShape(SPECIES_I256, 0);
|
||||
vresult = vresult.mul(vnext).add(vi);
|
||||
}
|
||||
// reduce the partial hashes in the elements, using the reverse list of powers of 2.
|
||||
result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
|
||||
for (; i < a.length; i++) {
|
||||
result = 31 * result + a[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static Object scanAddI_loop(int[] a, int[] r) {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
|
||||
@ -76,6 +76,8 @@ public class VectorAlgorithms {
|
||||
public static float[] aF;
|
||||
public static float[] bF;
|
||||
|
||||
byte[] aB;
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
RANDOM = new Random(SEED);
|
||||
@ -90,6 +92,9 @@ public class VectorAlgorithms {
|
||||
|
||||
aF = new float[SIZE];
|
||||
bF = new float[SIZE];
|
||||
|
||||
aB = new byte[SIZE];
|
||||
RANDOM.nextBytes(aB);
|
||||
}
|
||||
|
||||
@Setup(Level.Iteration)
|
||||
@ -206,6 +211,21 @@ public class VectorAlgorithms {
|
||||
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(aF, bF);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int hashCodeB_loop() {
|
||||
return VectorAlgorithmsImpl.hashCodeB_loop(aB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int hashCodeB_Arrays() {
|
||||
return VectorAlgorithmsImpl.hashCodeB_Arrays(aB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int hashCodeB_VectorAPI_v1() {
|
||||
return VectorAlgorithmsImpl.hashCodeB_VectorAPI_v1(aB);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public Object scanAddI_loop() {
|
||||
return VectorAlgorithmsImpl.scanAddI_loop(aI, rI);
|
||||
|
||||
@ -34,6 +34,8 @@ import jdk.incubator.vector.*;
|
||||
public class VectorAlgorithmsImpl {
|
||||
private static final VectorSpecies<Integer> SPECIES_I = IntVector.SPECIES_PREFERRED;
|
||||
private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
|
||||
private static final VectorSpecies<Integer> SPECIES_I256 = IntVector.SPECIES_256;
|
||||
private static final VectorSpecies<Byte> SPECIES_B64 = ByteVector.SPECIES_64;
|
||||
private static final VectorSpecies<Float> SPECIES_F = FloatVector.SPECIES_PREFERRED;
|
||||
|
||||
public static Object fillI_loop(int[] r) {
|
||||
@ -216,6 +218,69 @@ public class VectorAlgorithmsImpl {
|
||||
return sum;
|
||||
}
|
||||
|
||||
public static int hashCodeB_loop(byte[] a) {
|
||||
int h = 1;
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
h = 31 * h + a[i];
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
public static int hashCodeB_Arrays(byte[] a) {
|
||||
return Arrays.hashCode(a);
|
||||
}
|
||||
|
||||
// Simplified intrinsic code from C2_MacroAssembler::arrays_hashcode in c2_MacroAssembler_x86.cpp
|
||||
//
|
||||
// Ideas that may help understand the code:
|
||||
//
|
||||
// h(i) = 31 * h(i-1) + a[i]
|
||||
// "unroll" by factor of L=8:
|
||||
// h(i+8) = h(i) * 31^8 + a[i+1] * 31^7 + a[i+2] * 31^6 + ... + a[i+8] * 1
|
||||
// ----------- ------------------------------------------------
|
||||
// scalar vector: notice the powers of 31 in reverse
|
||||
//
|
||||
// We notice that we can load a[i+1 .. i+8], then element-wise multiply with
|
||||
// the vector of reversed powers-of-31, and then do reduceLanes(ADD).
|
||||
// But we can do even better: By looking at multiple such 8-unrolled iterations.
|
||||
// Instead of applying the "next" factor of "31^8" to the reduced scalar, we can
|
||||
// already apply it element-wise. That allows us to move the reduction out
|
||||
// of the loop.
|
||||
//
|
||||
// Note: the intrinsic additionally unrolls the loop by a factor of 4,
|
||||
// but we want to keep thins simple for demonstration purposes.
|
||||
//
|
||||
private static int[] REVERSE_POWERS_OF_31 = new int[9];
|
||||
static {
|
||||
int p = 1;
|
||||
for (int i = REVERSE_POWERS_OF_31.length - 1; i >= 0; i--) {
|
||||
REVERSE_POWERS_OF_31[i] = p;
|
||||
p *= 31;
|
||||
}
|
||||
}
|
||||
public static int hashCodeB_VectorAPI_v1(byte[] a) {
|
||||
int result = 1; // initialValue
|
||||
var vresult = IntVector.zero(SPECIES_I256);
|
||||
int next = REVERSE_POWERS_OF_31[0]; // 31^L
|
||||
var vnext = IntVector.broadcast(SPECIES_I256, next);
|
||||
var vcoef = IntVector.fromArray(SPECIES_I256, REVERSE_POWERS_OF_31, 1); // powers of 2 in reverse
|
||||
int i;
|
||||
for (i = 0; i < SPECIES_B64.loopBound(a.length); i += SPECIES_B64.length()) {
|
||||
// scalar part: result *= 31^L
|
||||
result *= next;
|
||||
// vector part: element-wise apply the next factor and add in the new values.
|
||||
var vb = ByteVector.fromArray(SPECIES_B64, a, i);
|
||||
var vi = vb.castShape(SPECIES_I256, 0);
|
||||
vresult = vresult.mul(vnext).add(vi);
|
||||
}
|
||||
// reduce the partial hashes in the elements, using the reverse list of powers of 2.
|
||||
result += vresult.mul(vcoef).reduceLanes(VectorOperators.ADD);
|
||||
for (; i < a.length; i++) {
|
||||
result = 31 * result + a[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static Object scanAddI_loop(int[] a, int[] r) {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user