From 57f0d5eb79b7d9ffe1b78be205c926d7020fcfa9 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 4 Dec 2025 16:13:42 +0100 Subject: [PATCH] benchmark and test for scanAddI --- .../vectorization/TestVectorAlgorithms.java | 7 - .../vectorization/VectorAlgorithmsImpl.java | 30 +--- .../bench/vm/compiler/VectorAlgorithms.java | 17 +++ .../vm/compiler/VectorAlgorithmsImpl.java | 129 +++++++++--------- 4 files changed, 84 insertions(+), 99 deletions(-) diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java index adb424900a8..a7876476439 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java @@ -90,7 +90,6 @@ public class TestVectorAlgorithms { testGroups.put("scanAddI", new HashMap()); testGroups.get("scanAddI").put("scanAddI_loop", () -> { return scanAddI_loop(aI, rI1); }); testGroups.get("scanAddI").put("scanAddI_loop_reassociate", () -> { return scanAddI_loop_reassociate(aI, rI2); }); - testGroups.get("scanAddI").put("scanAddI_VectorAPI_shift_blend_add", () -> { return scanAddI_VectorAPI_shift_blend_add(aI, rI3); }); testGroups.get("scanAddI").put("scanAddI_VectorAPI_permute_add", () -> { return scanAddI_VectorAPI_permute_add(aI, rI4); }); } @@ -101,7 +100,6 @@ public class TestVectorAlgorithms { "reduceAddI_VectorAPI_reduction_after_loop", "scanAddI_loop", "scanAddI_loop_reassociate", - "scanAddI_VectorAPI_shift_blend_add", "scanAddI_VectorAPI_permute_add"}) public void runTests(RunInfo info) { // Repeat many times, so that we also have multiple iterations for post-warmup to potentially recompile @@ -177,11 +175,6 @@ public class TestVectorAlgorithms { return VectorAlgorithmsImpl.scanAddI_loop_reassociate(a, r); } - @Test - public Object scanAddI_VectorAPI_shift_blend_add(int[] a, int[] r) { - return VectorAlgorithmsImpl.scanAddI_VectorAPI_shift_blend_add(a, r); - } - @Test public Object scanAddI_VectorAPI_permute_add(int[] a, int[] r) { return VectorAlgorithmsImpl.scanAddI_VectorAPI_permute_add(a, r); diff --git a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java index 608a88ca6cc..5fd019ecac9 100644 --- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java +++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java @@ -124,34 +124,14 @@ public class VectorAlgorithmsImpl { return r; } - public static Object scanAddI_VectorAPI_shift_blend_add(int[] a, int[] r) { - // Using Naive Parallel Algorithm: Hills and Steele - int sum = 0; - int i = 0; - //for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) { - // IntVector v = IntVector.fromArray(SPECIES_I512, a, i); - // v = v.add(v.lanewise(VectorOperators.LSHL, 1 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111111111110))); - // v = v.add(v.lanewise(VectorOperators.LSHL, 2 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111111111100))); - // v = v.add(v.lanewise(VectorOperators.LSHL, 4 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111111110000))); - // v = v.add(v.lanewise(VectorOperators.LSHL, 8 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111100000000))); - // v = v.add(sum); - // v.intoArray(r, i); - // sum = v.lane(SPECIES_I512.length() - 1); - //} - for (; i < a.length; i++) { - sum += a[i]; - r[i] = sum; - } - return r; - } - public static Object scanAddI_VectorAPI_permute_add(int[] a, int[] r) { // Using Naive Parallel Algorithm: Hills and Steele int sum = 0; - var shf1 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, 0); - var shf2 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, 0); - var shf3 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 0); - var shf4 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7}, 0); + int xx = 0; // masked later anyway + var shf1 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, 0); + var shf2 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, xx, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, 0); + var shf3 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, xx, xx, xx, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 0); + var shf4 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, xx, xx, xx, xx, xx, xx, xx, 0, 1, 2, 3, 4, 5, 6, 7}, 0); var mask1 = VectorMask.fromLong(SPECIES_I512, 0b1111111111111110); var mask2 = VectorMask.fromLong(SPECIES_I512, 0b1111111111111100); var mask3 = VectorMask.fromLong(SPECIES_I512, 0b1111111111110000); diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java index a827b894875..95617856914 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java @@ -51,10 +51,12 @@ public class VectorAlgorithms { public int SIZE; public static int[] aI; + public static int[] rI; @Setup public void init() { aI = new int[SIZE]; + rI = new int[SIZE]; } // ------------------------------------------------------------------------------------------ @@ -80,4 +82,19 @@ public class VectorAlgorithms { public int reduceAddI_VectorAPI_reduction_after_loop() { return VectorAlgorithmsImpl.reduceAddI_VectorAPI_reduction_after_loop(aI); } + + @Benchmark + public Object scanAddI_loop() { + return VectorAlgorithmsImpl.scanAddI_loop(aI, rI); + } + + @Benchmark + public Object scanAddI_loop_reassociate() { + return VectorAlgorithmsImpl.scanAddI_loop_reassociate(aI, rI); + } + + @Benchmark + public Object scanAddI_VectorAPI_permute_add() { + return VectorAlgorithmsImpl.scanAddI_VectorAPI_permute_add(aI, rI); + } } diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java index 6fbf659d0f6..c9b8a7c4be4 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java @@ -89,73 +89,68 @@ public class VectorAlgorithmsImpl { return sum; } - //@Benchmark - //public void scanAddI_loop() { - // int sum = 0; - // for (int i = 0; i < AI.length; i++) { - // sum += AI[i]; - // RI[i] = sum; - // } - //} + public static Object scanAddI_loop(int[] a, int[] r) { + int sum = 0; + for (int i = 0; i < a.length; i++) { + sum += a[i]; + r[i] = sum; + } + return r; + } - //@Benchmark - //public void scanAddI_loop_reassociate() { - // int sum = 0; - // for (int i = 0; i < AI.length; i+=4) { - // // We cut the latency by a factor of 4, but increase the number of additions. - // int old_sum = sum; - // int v0 = AI[i + 0]; - // int v1 = AI[i + 1]; - // int v2 = AI[i + 2]; - // int v3 = AI[i + 3]; - // int v01 = v0 + v1; - // int v23 = v2 + v3; - // int v0123 = v01 + v23; - // sum += v0123; - // RI[i + 0] = old_sum + v0; - // RI[i + 1] = old_sum + v01; - // RI[i + 2] = old_sum + v01 + v2; - // RI[i + 3] = old_sum + v0123; - // } - //} + public static Object scanAddI_loop_reassociate(int[] a, int[] r) { + int sum = 0; + int i = 0; + for (; i < a.length - 3; i+=4) { + // We cut the latency by a factor of 4, but increase the number of additions. + int old_sum = sum; + int v0 = a[i + 0]; + int v1 = a[i + 1]; + int v2 = a[i + 2]; + int v3 = a[i + 3]; + int v01 = v0 + v1; + int v23 = v2 + v3; + int v0123 = v01 + v23; + sum += v0123; + r[i + 0] = old_sum + v0; + r[i + 1] = old_sum + v01; + r[i + 2] = old_sum + v01 + v2; + r[i + 3] = old_sum + v0123; + } + for (; i < a.length; i++) { + sum += a[i]; + r[i] = sum; + } + return r; + } - //@Benchmark - //public void scanAddI_VectorAPI_shift_blend_add() { - // // Using Naive Parallel Algorithm: Hills and Steele - // int sum = 0; - // for (int i = 0; i < SPECIES_I512.loopBound(AI.length); i += SPECIES_I512.length()) { - // IntVector v = IntVector.fromArray(SPECIES_I512, AI, i); - // v = v.add(v.lanewise(VectorOperators.LSHL, 1 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111111111110))); - // v = v.add(v.lanewise(VectorOperators.LSHL, 2 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111111111100))); - // v = v.add(v.lanewise(VectorOperators.LSHL, 4 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111111110000))); - // v = v.add(v.lanewise(VectorOperators.LSHL, 8 ).blend(0, VectorMask.fromLong(SPECIES_I512, 0b1111111100000000))); - // v = v.add(sum); - // v.intoArray(RI, i); - // sum = v.lane(SPECIES_I512.length() - 1); - // } - //} - - //@Benchmark - //public void scanAddI_VectorAPI_permute_add() { - // // Using Naive Parallel Algorithm: Hills and Steele - // int sum = 0; - // var shf1 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, 0); - // var shf2 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, 0); - // var shf3 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, -1, -1, -1, 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12}, 0); - // var shf4 = VectorShuffle.fromArray(SPECIES_I512, new int[]{-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 6, 7, 8}, 0); - // var mask1 = VectorMask.fromLong(SPECIES_I512, 0b1111111111111110); - // var mask2 = VectorMask.fromLong(SPECIES_I512, 0b1111111111111100); - // var mask3 = VectorMask.fromLong(SPECIES_I512, 0b1111111111110000); - // var mask4 = VectorMask.fromLong(SPECIES_I512, 0b1111111100000000); - // for (int i = 0; i < SPECIES_I512.loopBound(AI.length); i += SPECIES_I512.length()) { - // IntVector v = IntVector.fromArray(SPECIES_I512, AI, i); - // v = v.add(v.rearrange(shf1), mask1); - // v = v.add(v.rearrange(shf2), mask2); - // v = v.add(v.rearrange(shf3), mask3); - // v = v.add(v.rearrange(shf4), mask4); - // v = v.add(sum); - // v.intoArray(RI, i); - // sum = v.lane(SPECIES_I512.length() - 1); - // } - //} + public static Object scanAddI_VectorAPI_permute_add(int[] a, int[] r) { + // Using Naive Parallel Algorithm: Hills and Steele + int sum = 0; + int xx = 0; // masked later anyway + var shf1 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, 0); + var shf2 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, xx, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, 0); + var shf3 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, xx, xx, xx, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, 0); + var shf4 = VectorShuffle.fromArray(SPECIES_I512, new int[]{xx, xx, xx, xx, xx, xx, xx, xx, 0, 1, 2, 3, 4, 5, 6, 7}, 0); + var mask1 = VectorMask.fromLong(SPECIES_I512, 0b1111111111111110); + var mask2 = VectorMask.fromLong(SPECIES_I512, 0b1111111111111100); + var mask3 = VectorMask.fromLong(SPECIES_I512, 0b1111111111110000); + var mask4 = VectorMask.fromLong(SPECIES_I512, 0b1111111100000000); + int i = 0; + for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) { + IntVector v = IntVector.fromArray(SPECIES_I512, a, i); + v = v.add(v.rearrange(shf1), mask1); + v = v.add(v.rearrange(shf2), mask2); + v = v.add(v.rearrange(shf3), mask3); + v = v.add(v.rearrange(shf4), mask4); + v = v.add(sum); + v.intoArray(r, i); + sum = v.lane(SPECIES_I512.length() - 1); + } + for (; i < a.length; i++) { + sum += a[i]; + r[i] = sum; + } + return r; + } }