diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java index 2667ac59471..70dc9a4a0b4 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java @@ -122,6 +122,7 @@ public class TestVectorAlgorithms { testGroups.get("dotProductF").put("dotProductF_loop", i -> { return dotProductF_loop(d.aF, d.bF); }); testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive", i -> { return dotProductF_VectorAPI_naive(d.aF, d.bF); }); testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", i -> { return dotProductF_VectorAPI_reduction_after_loop(d.aF, d.bF); }); + testGroups.get("dotProductF").put("dotProductF_VectorAPI_fma", i -> { return dotProductF_VectorAPI_fma(d.aF, d.bF); }); testGroups.put("hashCodeB", new HashMap()); testGroups.get("hashCodeB").put("hashCodeB_loop", i -> { return hashCodeB_loop(d.aB); }); @@ -192,6 +193,7 @@ public class TestVectorAlgorithms { "dotProductF_loop", "dotProductF_VectorAPI_naive", "dotProductF_VectorAPI_reduction_after_loop", + "dotProductF_VectorAPI_fma", "hashCodeB_loop", "hashCodeB_Arrays", "hashCodeB_VectorAPI_v1", @@ -409,6 +411,16 @@ public class TestVectorAlgorithms { return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b); } + @Test + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", + IRNode.ADD_REDUCTION_V, "> 0", + IRNode.FMA_VF, "> 0"}, + applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"}, + applyIf = {"UseSuperWord", "true"}) + public float dotProductF_VectorAPI_fma(float[] a, float[] b) { + return VectorAlgorithmsImpl.dotProductF_VectorAPI_fma(a, b); + } + @Test public int hashCodeB_loop(byte[] a) { return VectorAlgorithmsImpl.hashCodeB_loop(a); diff --git a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java index 8276d90509f..e4c9eb74e7d 100644 --- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java +++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java @@ -348,6 +348,21 @@ public class VectorAlgorithmsImpl { return sum; } + public static float dotProductF_VectorAPI_fma(float[] a, float[] b) { + var sums = FloatVector.broadcast(SPECIES_F, 0.0f); + int i; + for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) { + var va = FloatVector.fromArray(SPECIES_F, a, i); + var vb = FloatVector.fromArray(SPECIES_F, b, i); + sums = va.fma(vb, sums); + } + float sum = sums.reduceLanes(VectorOperators.ADD); + for (; i < a.length; i++) { + sum = Math.fma(a[i], b[i], sum); + } + return sum; + } + public static int hashCodeB_loop(byte[] a) { int h = 1; for (int i = 0; i < a.length; i++) { diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java index f60dfcb2d7c..0a6aa03586b 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java @@ -165,6 +165,11 @@ public class VectorAlgorithms { return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(d.aF, d.bF); } + @Benchmark + public float dotProductF_VectorAPI_fma() { + return VectorAlgorithmsImpl.dotProductF_VectorAPI_fma(d.aF, d.bF); + } + @Benchmark public int hashCodeB_loop() { return VectorAlgorithmsImpl.hashCodeB_loop(d.aB); diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java index 3ae4ed81634..0d33a109d5b 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java @@ -348,6 +348,21 @@ public class VectorAlgorithmsImpl { return sum; } + public static float dotProductF_VectorAPI_fma(float[] a, float[] b) { + var sums = FloatVector.broadcast(SPECIES_F, 0.0f); + int i; + for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) { + var va = FloatVector.fromArray(SPECIES_F, a, i); + var vb = FloatVector.fromArray(SPECIES_F, b, i); + sums = va.fma(vb, sums); + } + float sum = sums.reduceLanes(VectorOperators.ADD); + for (; i < a.length; i++) { + sum = Math.fma(a[i], b[i], sum); + } + return sum; + } + public static int hashCodeB_loop(byte[] a) { int h = 1; for (int i = 0; i < a.length; i++) {