diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java index 2b026b15385..16b03ae542c 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -84,6 +84,9 @@ public class TestVectorAlgorithms { int[] rI4; int eI; + float[] aF; + float[] bF; + int[] oopsX4; int[] memX4; @@ -133,6 +136,11 @@ public class TestVectorAlgorithms { testGroups.get("reduceAddI").put("reduceAddI_VectorAPI_naive", () -> { return reduceAddI_VectorAPI_naive(aI); }); testGroups.get("reduceAddI").put("reduceAddI_VectorAPI_reduction_after_loop", () -> { return reduceAddI_VectorAPI_reduction_after_loop(aI); }); + testGroups.put("dotProductF", new HashMap()); + testGroups.get("dotProductF").put("dotProductF_loop", () -> { return dotProductF_loop(aF, bF); }); + testGroups.get("dotProductF").put("dotProductF_VectorAPI_naive", () -> { return dotProductF_VectorAPI_naive(aF, bF); }); + testGroups.get("dotProductF").put("dotProductF_VectorAPI_reduction_after_loop", () -> { return dotProductF_VectorAPI_reduction_after_loop(aF, bF); }); + testGroups.put("scanAddI", new HashMap()); testGroups.get("scanAddI").put("scanAddI_loop", () -> { return scanAddI_loop(aI, rI1); }); testGroups.get("scanAddI").put("scanAddI_loop_reassociate", () -> { return scanAddI_loop_reassociate(aI, rI2); }); @@ -174,6 +182,9 @@ public class TestVectorAlgorithms { "reduceAddI_reassociate", "reduceAddI_VectorAPI_naive", "reduceAddI_VectorAPI_reduction_after_loop", + "dotProductF_loop", + "dotProductF_VectorAPI_naive", + "dotProductF_VectorAPI_reduction_after_loop", "scanAddI_loop", "scanAddI_loop_reassociate", "scanAddI_VectorAPI_permute_add", @@ -217,6 +228,14 @@ public class TestVectorAlgorithms { memX4[i] = RANDOM.nextInt(); } + // float inputs. To avoid rounding issues, only use small integers. + aF = new float[size]; + bF = new float[size]; + for (int i = 0; i < size; i++) { + aF[i] = RANDOM.nextInt(32) - 16; + bF[i] = RANDOM.nextInt(32) - 16; + } + // Run all tests for (Map.Entry> group_entry : testGroups.entrySet()) { String group_name = group_entry.getKey(); @@ -359,6 +378,37 @@ public class TestVectorAlgorithms { return VectorAlgorithmsImpl.reduceAddI_VectorAPI_naive(aI); } + @Test + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", + IRNode.ADD_REDUCTION_V, "> 0", + IRNode.MUL_VF, "> 0"}, + applyIfCPUFeature = {"sse4.1", "true"}, + applyIf = {"UseSuperWord", "true"}) + // See also TestReduction.floatAddDotProduct + public float dotProductF_loop(float[] a, float[] b) { + return VectorAlgorithmsImpl.dotProductF_loop(a, b); + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", + IRNode.ADD_REDUCTION_V, "> 0", + IRNode.MUL_VF, "> 0"}, + applyIfCPUFeature = {"sse4.1", "true"}, + applyIf = {"UseSuperWord", "true"}) + public float dotProductF_VectorAPI_naive(float[] a, float[] b) { + return VectorAlgorithmsImpl.dotProductF_VectorAPI_naive(a, b); + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", + IRNode.ADD_REDUCTION_V, "> 0", + IRNode.MUL_VF, "> 0"}, + applyIfCPUFeature = {"sse4.1", "true"}, + applyIf = {"UseSuperWord", "true"}) + public float dotProductF_VectorAPI_reduction_after_loop(float[] a, float[] b) { + return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(a, b); + } + @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.ADD_REDUCTION_VI, "> 0", diff --git a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java index 78023011e9c..d7fe56d9c45 100644 --- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java +++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,7 @@ import jdk.incubator.vector.*; public class VectorAlgorithmsImpl { private static final VectorSpecies SPECIES_I = IntVector.SPECIES_PREFERRED; private static final VectorSpecies SPECIES_I512 = IntVector.SPECIES_512; + private static final VectorSpecies SPECIES_F = FloatVector.SPECIES_PREFERRED; public static Object fillI_loop(int[] r) { for (int i = 0; i < r.length; i++) { @@ -161,6 +162,43 @@ public class VectorAlgorithmsImpl { return sum; } + public static float dotProductF_loop(float[] a, float[] b) { + float sum = 0; + for (int i = 0; i < a.length; i++) { + sum += a[i] * b[i]; + } + return sum; + } + + public static float dotProductF_VectorAPI_naive(float[] a, float[] b) { + float sum = 0; + int i; + for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) { + var va = FloatVector.fromArray(SPECIES_F, a, i); + var vb = FloatVector.fromArray(SPECIES_F, b, i); + sum += va.mul(vb).reduceLanes(VectorOperators.ADD); + } + for (; i < a.length; i++) { + sum += a[i] * b[i]; + } + return sum; + } + + public static float dotProductF_VectorAPI_reduction_after_loop(float[] a, float[] b) { + var sums = FloatVector.broadcast(SPECIES_F, 0.0f); + int i; + for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) { + var va = FloatVector.fromArray(SPECIES_F, a, i); + var vb = FloatVector.fromArray(SPECIES_F, b, i); + sums = sums.add(va.mul(vb)); + } + float sum = sums.reduceLanes(VectorOperators.ADD); + for (; i < a.length; i++) { + sum += a[i] * b[i]; + } + return sum; + } + public static int reduceAddI_VectorAPI_reduction_after_loop(int[] a) { var acc = IntVector.broadcast(SPECIES_I, 0); int i; diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java index 8b56bc8d9e2..af0a5781719 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -73,6 +73,9 @@ public class VectorAlgorithms { public static int[] oopsX4; public static int[] memX4; + public static float[] aF; + public static float[] bF; + @Setup public void init() { RANDOM = new Random(SEED); @@ -84,6 +87,9 @@ public class VectorAlgorithms { oopsX4 = new int[SIZE]; memX4 = new int[NUM_X_OBJECTS * 4]; + + aF = new float[SIZE]; + bF = new float[SIZE]; } @Setup(Level.Iteration) @@ -104,6 +110,11 @@ public class VectorAlgorithms { for (int i = 0; i < memX4.length; i++) { memX4[i] = RANDOM.nextInt(); } + + for (int i = 0; i < aF.length; i++) { + aF[i] = RANDOM.nextFloat(); + bF[i] = RANDOM.nextFloat(); + } } // ------------------------------------------------------------------------------------------ @@ -180,6 +191,21 @@ public class VectorAlgorithms { return VectorAlgorithmsImpl.reduceAddI_VectorAPI_reduction_after_loop(aI); } + @Benchmark + public float dotProductF_loop() { + return VectorAlgorithmsImpl.dotProductF_loop(aF, bF); + } + + @Benchmark + public float dotProductF_VectorAPI_naive() { + return VectorAlgorithmsImpl.dotProductF_VectorAPI_naive(aF, bF); + } + + @Benchmark + public float dotProductF_VectorAPI_reduction_after_loop() { + return VectorAlgorithmsImpl.dotProductF_VectorAPI_reduction_after_loop(aF, bF); + } + @Benchmark public Object scanAddI_loop() { return VectorAlgorithmsImpl.scanAddI_loop(aI, rI); diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java index 0117709a29f..fbeabef6fc0 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,7 @@ import jdk.incubator.vector.*; public class VectorAlgorithmsImpl { private static final VectorSpecies SPECIES_I = IntVector.SPECIES_PREFERRED; private static final VectorSpecies SPECIES_I512 = IntVector.SPECIES_512; + private static final VectorSpecies SPECIES_F = FloatVector.SPECIES_PREFERRED; public static Object fillI_loop(int[] r) { for (int i = 0; i < r.length; i++) { @@ -178,6 +179,43 @@ public class VectorAlgorithmsImpl { return sum; } + public static float dotProductF_loop(float[] a, float[] b) { + float sum = 0; + for (int i = 0; i < a.length; i++) { + sum += a[i] * b[i]; + } + return sum; + } + + public static float dotProductF_VectorAPI_naive(float[] a, float[] b) { + float sum = 0; + int i; + for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) { + var va = FloatVector.fromArray(SPECIES_F, a, i); + var vb = FloatVector.fromArray(SPECIES_F, b, i); + sum += va.mul(vb).reduceLanes(VectorOperators.ADD); + } + for (; i < a.length; i++) { + sum += a[i] * b[i]; + } + return sum; + } + + public static float dotProductF_VectorAPI_reduction_after_loop(float[] a, float[] b) { + var sums = FloatVector.broadcast(SPECIES_F, 0.0f); + int i; + for (i = 0; i < SPECIES_F.loopBound(a.length); i += SPECIES_F.length()) { + var va = FloatVector.fromArray(SPECIES_F, a, i); + var vb = FloatVector.fromArray(SPECIES_F, b, i); + sums = sums.add(va.mul(vb)); + } + float sum = sums.reduceLanes(VectorOperators.ADD); + for (; i < a.length; i++) { + sum += a[i] * b[i]; + } + return sum; + } + public static Object scanAddI_loop(int[] a, int[] r) { int sum = 0; for (int i = 0; i < a.length; i++) {