diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java index 192ad174eaf..dbfa293b1a4 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java @@ -115,7 +115,7 @@ public class TestVectorAlgorithms { int size = 100_000 + RANDOM.nextInt(10_000); aI = new int[size]; G.fill(INT_GEN, aI); - for (int i = 0; i < aI.length; i++) { aI[i] = i; } + //for (int i = 0; i < aI.length; i++) { aI[i] = i; } rI1 = new int[size]; rI2 = new int[size]; rI3 = new int[size]; diff --git a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java index 79571d6a502..4e4ce4302af 100644 --- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java +++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java @@ -168,25 +168,23 @@ public class VectorAlgorithmsImpl { } public static int findMinIndex_VectorAPI(int[] a) { - var mins = IntVector.broadcast(SPECIES_I, a[0]); - var idxs = IntVector.broadcast(SPECIES_I, 0); - var iota = IntVector.broadcast(SPECIES_I, 1).addIndex(1); + // Main approach: have partial results in mins and idxs. + var mins = IntVector.broadcast(SPECIES_I512, a[0]); + var idxs = IntVector.broadcast(SPECIES_I512, 0); + var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1); int i = 0; - for (; i < SPECIES_I.loopBound(a.length); i += SPECIES_I.length()) { - IntVector v = IntVector.fromArray(SPECIES_I, a, i); + for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) { + IntVector v = IntVector.fromArray(SPECIES_I512, a, i); var mask = v.compare(VectorOperators.LT, mins); mins = mins.blend(v, mask); idxs = idxs.blend(iota, mask); - iota = iota.add(SPECIES_I.length()); - } - int min = mins.lane(0); - int index = idxs.lane(0); - for (int j = 1; j < SPECIES_I.length(); j++) { - if (mins.lane(j) < min) { - min = mins.lane(j); - index = idxs.lane(j); - } + iota = iota.add(SPECIES_I512.length()); } + // Reduce the vectors down + int min = mins.reduceLanes(VectorOperators.MIN); + var not_min_mask = mins.compare(VectorOperators.NE, min); + int index = idxs.blend(a.length, not_min_mask).reduceLanes(VectorOperators.MIN); + // Tail loop for (; i < a.length; i++) { int ai = a[i]; if (ai < min) { @@ -196,6 +194,4 @@ public class VectorAlgorithmsImpl { } return index; } - - } diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java index 95617856914..60f8a34189c 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java @@ -97,4 +97,14 @@ public class VectorAlgorithms { public Object scanAddI_VectorAPI_permute_add() { return VectorAlgorithmsImpl.scanAddI_VectorAPI_permute_add(aI, rI); } + + @Benchmark + public int findMinIndex_loop() { + return VectorAlgorithmsImpl.findMinIndex_loop(aI); + } + + @Benchmark + public int findMinIndex_VectorAPI() { + return VectorAlgorithmsImpl.findMinIndex_VectorAPI(aI); + } } diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java index c9b8a7c4be4..3d38030689f 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java @@ -153,4 +153,45 @@ public class VectorAlgorithmsImpl { } return r; } + + public static int findMinIndex_loop(int[] a) { + int min = a[0]; + int index = 0; + for (int i = 1; i < a.length; i++) { + int ai = a[i]; + if (ai < min) { + min = ai; + index = i; + } + } + return index; + } + + public static int findMinIndex_VectorAPI(int[] a) { + // Main approach: have partial results in mins and idxs. + var mins = IntVector.broadcast(SPECIES_I512, a[0]); + var idxs = IntVector.broadcast(SPECIES_I512, 0); + var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1); + int i = 0; + for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) { + IntVector v = IntVector.fromArray(SPECIES_I512, a, i); + var mask = v.compare(VectorOperators.LT, mins); + mins = mins.blend(v, mask); + idxs = idxs.blend(iota, mask); + iota = iota.add(SPECIES_I512.length()); + } + // Reduce the vectors down + int min = mins.reduceLanes(VectorOperators.MIN); + var not_min_mask = mins.compare(VectorOperators.NE, min); + int index = idxs.blend(a.length, not_min_mask).reduceLanes(VectorOperators.MIN); + // Tail loop + for (; i < a.length; i++) { + int ai = a[i]; + if (ai < min) { + min = ai; + index = i; + } + } + return index; + } }