add dotProductF

This commit is contained in:
Emanuel Peter 2026-01-15 17:17:48 +01:00
parent 42401e2d84
commit cf94436b26
4 changed files with 156 additions and 4 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -84,6 +84,9 @@ public class TestVectorAlgorithms {
int[] rI4;
int eI;
float[] aF;
float[] bF;
int[] oopsX4;
int[] memX4;
@ -133,6 +136,11 @@ public class TestVectorAlgorithms {
testGroups.get("reduceAddI").put("reduceAddI_VectorAPI_naive", () -> { return reduceAddI_VectorAPI_naive(aI); });
testGroups.get("reduceAddI").put("reduceAddI_VectorAPI_reduction_after_loop", () -> { return reduceAddI_VectorAPI_reduction_after_loop(aI); });
testGroups.put("dotProductF", new HashMap<String,TestFunction>());
testGroups.get("dotProductF").put("dotProductF_loop", () -> { return dotProductF_loop(aF, bF); });
testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive", () -> { return dotProductF_VectorAPI_naive(aF, bF); });
testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", () -> { return dotProductF_VectorAPI_reduction_after_loop(aF, bF); });
testGroups.put("scanAddI", new HashMap<String,TestFunction>());
testGroups.get("scanAddI").put("scanAddI_loop", () -> { return scanAddI_loop(aI, rI1); });
testGroups.get("scanAddI").put("scanAddI_loop_reassociate", () -> { return scanAddI_loop_reassociate(aI, rI2); });
@ -174,6 +182,9 @@ public class TestVectorAlgorithms {
"reduceAddI_reassociate",
"reduceAddI_VectorAPI_naive",
"reduceAddI_VectorAPI_reduction_after_loop",
"dotProductF_loop",
"dotProductF_VectorAPI_naive",
"dotProductF_VectorAPI_reduction_after_loop",
"scanAddI_loop",
"scanAddI_loop_reassociate",
"scanAddI_VectorAPI_permute_add",
@ -217,6 +228,14 @@ public class TestVectorAlgorithms {
memX4[i] = RANDOM.nextInt();
}
// float inputs. To avoid rounding issues, only use small integers.
aF = new float[size];
bF = new float[size];
for (int i = 0; i < size; i++) {
aF[i] = RANDOM.nextInt(32) - 16;
bF[i] = RANDOM.nextInt(32) - 16;
}
// Run all tests
for (Map.Entry<String, Map<String,TestFunction>> group_entry : testGroups.entrySet()) {
String group_name = group_entry.getKey();
@ -359,6 +378,37 @@ public class TestVectorAlgorithms {
return VectorAlgorithmsImpl.reduceAddI_VectorAPI_naive(aI);
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
IRNode.ADD_REDUCTION_V, "> 0",
IRNode.MUL_VF, "> 0"},
applyIfCPUFeature = {"sse4.1", "true"},
applyIf = {"UseSuperWord", "true"})
// See also TestReduction.floatAddDotProduct
public float dotProductF_loop(float[] a, float[] b) {
return VectorAlgorithmsImpl.dotProductF_loop(a, b);
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
IRNode.ADD_REDUCTION_V, "> 0",
IRNode.MUL_VF, "> 0"},
applyIfCPUFeature = {"sse4.1", "true"},
applyIf = {"UseSuperWord", "true"})
public float dotProductF_VectorAPI_naive(float[] a, float[] b) {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_naive(a, b);
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
IRNode.ADD_REDUCTION_V, "> 0",
IRNode.MUL_VF, "> 0"},
applyIfCPUFeature = {"sse4.1", "true"},
applyIf = {"UseSuperWord", "true"})
public float dotProductF_VectorAPI_reduction_after_loop(float[] a, float[] b) {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b);
}
@Test
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.ADD_REDUCTION_VI, "> 0",

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,6 +34,7 @@ import jdk.incubator.vector.*;
public class VectorAlgorithmsImpl {
private static final VectorSpecies<Integer> SPECIES_I = IntVector.SPECIES_PREFERRED;
private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
private static final VectorSpecies<Float> SPECIES_F = FloatVector.SPECIES_PREFERRED;
public static Object fillI_loop(int[] r) {
for (int i = 0; i < r.length; i++) {
@ -161,6 +162,43 @@ public class VectorAlgorithmsImpl {
return sum;
}
public static float dotProductF_loop(float[] a, float[] b) {
float sum = 0;
for (int i = 0; i < a.length; i++) {
sum += a[i] * b[i];
}
return sum;
}
public static float dotProductF_VectorAPI_naive(float[] a, float[] b) {
float sum = 0;
int i;
for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) {
var va = FloatVector.fromArray(SPECIES_F, a, i);
var vb = FloatVector.fromArray(SPECIES_F, b, i);
sum += va.mul(vb).reduceLanes(VectorOperators.ADD);
}
for (; i < a.length; i++) {
sum += a[i] * b[i];
}
return sum;
}
public static float dotProductF_VectorAPI_reduction_after_loop(float[] a, float[] b) {
var sums = FloatVector.broadcast(SPECIES_F, 0.0f);
int i;
for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) {
var va = FloatVector.fromArray(SPECIES_F, a, i);
var vb = FloatVector.fromArray(SPECIES_F, b, i);
sums = sums.add(va.mul(vb));
}
float sum = sums.reduceLanes(VectorOperators.ADD);
for (; i < a.length; i++) {
sum += a[i] * b[i];
}
return sum;
}
public static int reduceAddI_VectorAPI_reduction_after_loop(int[] a) {
var acc = IntVector.broadcast(SPECIES_I, 0);
int i;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -73,6 +73,9 @@ public class VectorAlgorithms {
public static int[] oopsX4;
public static int[] memX4;
public static float[] aF;
public static float[] bF;
@Setup
public void init() {
RANDOM = new Random(SEED);
@ -84,6 +87,9 @@ public class VectorAlgorithms {
oopsX4 = new int[SIZE];
memX4 = new int[NUM_X_OBJECTS * 4];
aF = new float[SIZE];
bF = new float[SIZE];
}
@Setup(Level.Iteration)
@ -104,6 +110,11 @@ public class VectorAlgorithms {
for (int i = 0; i < memX4.length; i++) {
memX4[i] = RANDOM.nextInt();
}
for (int i = 0; i < aF.length; i++) {
aF[i] = RANDOM.nextFloat();
bF[i] = RANDOM.nextFloat();
}
}
// ------------------------------------------------------------------------------------------
@ -180,6 +191,21 @@ public class VectorAlgorithms {
return VectorAlgorithmsImpl.reduceAddI_VectorAPI_reduction_after_loop(aI);
}
@Benchmark
public float dotProductF_loop() {
return VectorAlgorithmsImpl.dotProductF_loop(aF, bF);
}
@Benchmark
public float dotProductF_VectorAPI_naive() {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_naive(aF, bF);
}
@Benchmark
public float dotProductF_VectorAPI_reduction_after_loop() {
return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(aF, bF);
}
@Benchmark
public Object scanAddI_loop() {
return VectorAlgorithmsImpl.scanAddI_loop(aI, rI);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,6 +34,7 @@ import jdk.incubator.vector.*;
public class VectorAlgorithmsImpl {
private static final VectorSpecies<Integer> SPECIES_I = IntVector.SPECIES_PREFERRED;
private static final VectorSpecies<Integer> SPECIES_I512 = IntVector.SPECIES_512;
private static final VectorSpecies<Float> SPECIES_F = FloatVector.SPECIES_PREFERRED;
public static Object fillI_loop(int[] r) {
for (int i = 0; i < r.length; i++) {
@ -178,6 +179,43 @@ public class VectorAlgorithmsImpl {
return sum;
}
public static float dotProductF_loop(float[] a, float[] b) {
float sum = 0;
for (int i = 0; i < a.length; i++) {
sum += a[i] * b[i];
}
return sum;
}
public static float dotProductF_VectorAPI_naive(float[] a, float[] b) {
float sum = 0;
int i;
for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) {
var va = FloatVector.fromArray(SPECIES_F, a, i);
var vb = FloatVector.fromArray(SPECIES_F, b, i);
sum += va.mul(vb).reduceLanes(VectorOperators.ADD);
}
for (; i < a.length; i++) {
sum += a[i] * b[i];
}
return sum;
}
public static float dotProductF_VectorAPI_reduction_after_loop(float[] a, float[] b) {
var sums = FloatVector.broadcast(SPECIES_F, 0.0f);
int i;
for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) {
var va = FloatVector.fromArray(SPECIES_F, a, i);
var vb = FloatVector.fromArray(SPECIES_F, b, i);
sums = sums.add(va.mul(vb));
}
float sum = sums.reduceLanes(VectorOperators.ADD);
for (; i < a.length; i++) {
sum += a[i] * b[i];
}
return sum;
}
public static Object scanAddI_loop(int[] a, int[] r) {
int sum = 0;
for (int i = 0; i < a.length; i++) {