8379395: [VectorAlgorithms] new dot-product implementation using fma

Reviewed-by: mchevalier, chagedorn
This commit is contained in:
Emanuel Peter 2026-04-01 12:18:29 +00:00
parent 92b1d8237e
commit 7df06d1489
4 changed files with 47 additions and 0 deletions

View File

@ -122,6 +122,7 @@ public class TestVectorAlgorithms {
testGroups.get("dotProductF").put("dotProductF_loop", i -> { return dotProductF_loop(d.aF, d.bF); });
testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive", i -> { return dotProductF_VectorAPI_naive(d.aF, d.bF); });
testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", i -> { return dotProductF_VectorAPI_reduction_after_loop(d.aF, d.bF); });
testGroups.get("dotProductF").put("dotProductF_VectorAPI_fma", i -> { return dotProductF_VectorAPI_fma(d.aF, d.bF); });
testGroups.put("hashCodeB", new HashMap<String,TestFunction>());
testGroups.get("hashCodeB").put("hashCodeB_loop", i -> { return hashCodeB_loop(d.aB); });
@ -192,6 +193,7 @@ public class TestVectorAlgorithms {
"dotProductF_loop",
"dotProductF_VectorAPI_naive",
"dotProductF_VectorAPI_reduction_after_loop",
"dotProductF_VectorAPI_fma",
"hashCodeB_loop",
"hashCodeB_Arrays",
"hashCodeB_VectorAPI_v1",
@ -409,6 +411,16 @@ public class TestVectorAlgorithms {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b);
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
IRNode.ADD_REDUCTION_V, "> 0",
IRNode.FMA_VF, "> 0"},
applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"},
applyIf = {"UseSuperWord", "true"})
public float dotProductF_VectorAPI_fma(float[] a, float[] b) {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_fma(a, b);
}
@Test
public int hashCodeB_loop(byte[] a) {
return VectorAlgorithmsImpl.hashCodeB_loop(a);

View File

@ -348,6 +348,21 @@ public class VectorAlgorithmsImpl {
return sum;
}
public static float dotProductF_VectorAPI_fma(float[] a, float[] b) {
var sums = FloatVector.broadcast(SPECIES_F, 0.0f);
int i;
for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) {
var va = FloatVector.fromArray(SPECIES_F, a, i);
var vb = FloatVector.fromArray(SPECIES_F, b, i);
sums = va.fma(vb, sums);
}
float sum = sums.reduceLanes(VectorOperators.ADD);
for (; i < a.length; i++) {
sum = Math.fma(a[i], b[i], sum);
}
return sum;
}
public static int hashCodeB_loop(byte[] a) {
int h = 1;
for (int i = 0; i < a.length; i++) {

View File

@ -165,6 +165,11 @@ public class VectorAlgorithms {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(d.aF, d.bF);
}
@Benchmark
public float dotProductF_VectorAPI_fma() {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_fma(d.aF, d.bF);
}
@Benchmark
public int hashCodeB_loop() {
return VectorAlgorithmsImpl.hashCodeB_loop(d.aB);

View File

@ -348,6 +348,21 @@ public class VectorAlgorithmsImpl {
return sum;
}
public static float dotProductF_VectorAPI_fma(float[] a, float[] b) {
var sums = FloatVector.broadcast(SPECIES_F, 0.0f);
int i;
for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) {
var va = FloatVector.fromArray(SPECIES_F, a, i);
var vb = FloatVector.fromArray(SPECIES_F, b, i);
sums = va.fma(vb, sums);
}
float sum = sums.reduceLanes(VectorOperators.ADD);
for (; i < a.length; i++) {
sum = Math.fma(a[i], b[i], sum);
}
return sum;
}
public static int hashCodeB_loop(byte[] a) {
int h = 1;
for (int i = 0; i < a.length; i++) {