diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java index 6eedac2233c..886bd38ae06 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java @@ -89,8 +89,6 @@ public class TestVectorAlgorithms { public static void main(String[] args) { TestFramework framework = new TestFramework(); - // TODO: run with and without SuperWord, and also some intrinsics should be disabled in a run. - // make sure that all those flags are also mentioned in the JMH. framework.addFlags("--add-modules=jdk.incubator.vector", "-XX:CompileCommand=inline,*VectorAlgorithmsImpl::*"); switch (args[0]) { case "vanilla" -> { /* no extra flags */ } @@ -278,8 +276,10 @@ public class TestVectorAlgorithms { @Test @IR(counts = {IRNode.POPULATE_INDEX, "> 0", IRNode.STORE_VECTOR, "> 0"}, - applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, + applyIfCPUFeatureOr = {"avx2", "true", "sve", "true"}, applyIf = {"UseSuperWord", "true"}) + // Note: the Vector API example below can also vectorize for AVX, + // because it does not use a PopulateIndex. public Object iotaI_loop(int[] r) { return VectorAlgorithmsImpl.iotaI_loop(r); } @@ -385,7 +385,8 @@ public class TestVectorAlgorithms { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.REARRANGE_VI, "> 0", IRNode.AND_VI, "> 0", - IRNode.ADD_VI, "> 0"}, + IRNode.ADD_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, applyIf = {"MaxVectorSize", ">=64"}) public Object scanAddI_VectorAPI_permute_add(int[] a, int[] r) { @@ -400,6 +401,12 @@ public class TestVectorAlgorithms { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.VECTOR_MASK_CMP, "> 0", + IRNode.VECTOR_BLEND_I, "> 0", + IRNode.MIN_REDUCTION_V, "> 0", + IRNode.ADD_VI, "> 0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) public int findMinIndexI_VectorAPI(int[] a) { return VectorAlgorithmsImpl.findMinIndexI_VectorAPI(a); } @@ -412,6 +419,10 @@ public class TestVectorAlgorithms { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.VECTOR_MASK_CMP, "> 0", + IRNode.VECTOR_TEST, "> 0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) public int findI_VectorAPI(int[] a, int e) { return VectorAlgorithmsImpl.findI_VectorAPI(a, e); } @@ -425,6 +436,11 @@ public class TestVectorAlgorithms { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.REARRANGE_VI, "> 0", + IRNode.AND_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) public Object reverseI_VectorAPI(int[] a, int[] r) { return VectorAlgorithmsImpl.reverseI_VectorAPI(a, r); } @@ -437,6 +453,12 @@ public class TestVectorAlgorithms { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.VECTOR_MASK_CMP, "> 0", + IRNode.VECTOR_TEST, "> 0", + IRNode.VECTOR_LONG_TO_MASK, "> 0", + IRNode.STORE_VECTOR_MASKED, "> 0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) public Object filterI_VectorAPI(int[] a, int[] r, int threshold) { return VectorAlgorithmsImpl.filterI_VectorAPI(a, r, threshold); } @@ -449,6 +471,14 @@ public class TestVectorAlgorithms { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.VECTOR_MASK_CMP, "> 0", + IRNode.VECTOR_TEST, "> 0", + IRNode.LOAD_VECTOR_GATHER_MASKED, "> 0", + IRNode.OR_V_MASK, "> 0", + IRNode.ADD_VI, "> 0", + IRNode.ADD_REDUCTION_VI, "> 0"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}) public int reduceAddIFieldsX4_VectorAPI(int[] oops, int[] mem) { return VectorAlgorithmsImpl.reduceAddIFieldsX4_VectorAPI(oops, mem); } diff --git a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java index 278d9b8d9b7..57638bc350a 100644 --- a/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java +++ b/test/hotspot/jtreg/compiler/vectorization/VectorAlgorithmsImpl.java @@ -258,16 +258,16 @@ public class VectorAlgorithmsImpl { public static int findMinIndexI_VectorAPI(int[] a) { // Main approach: have partial results in mins and idxs. - var mins = IntVector.broadcast(SPECIES_I512, a[0]); - var idxs = IntVector.broadcast(SPECIES_I512, 0); - var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1); + var mins = IntVector.broadcast(SPECIES_I, a[0]); + var idxs = IntVector.broadcast(SPECIES_I, 0); + var iota = IntVector.broadcast(SPECIES_I, 0).addIndex(1); int i = 0; - for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) { - IntVector v = IntVector.fromArray(SPECIES_I512, a, i); + for (; i < SPECIES_I.loopBound(a.length); i += SPECIES_I.length()) { + IntVector v = IntVector.fromArray(SPECIES_I, a, i); var mask = v.compare(VectorOperators.LT, mins); mins = mins.blend(v, mask); idxs = idxs.blend(iota, mask); - iota = iota.add(SPECIES_I512.length()); + iota = iota.add(SPECIES_I.length()); } // Reduce the vectors down int min = mins.reduceLanes(VectorOperators.MIN); diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java index 43d3bdf6c18..d5916cbc4f6 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java @@ -41,6 +41,8 @@ import org.openjdk.jmh.annotations.*; * -XX:+UnlockDiagnosticVMOptions -XX:AutoVectorizationOverrideProfitability=0 * - Smaller vector size: * -XX:MaxVectorSize=16 + * - Disable fill loop detection, so we don't use intrinsic but auto vectorization: + * -XX:-OptimizeFill */ @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java index bab872d80c6..1739f2fe799 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithmsImpl.java @@ -258,16 +258,16 @@ public class VectorAlgorithmsImpl { public static int findMinIndexI_VectorAPI(int[] a) { // Main approach: have partial results in mins and idxs. - var mins = IntVector.broadcast(SPECIES_I512, a[0]); - var idxs = IntVector.broadcast(SPECIES_I512, 0); - var iota = IntVector.broadcast(SPECIES_I512, 0).addIndex(1); + var mins = IntVector.broadcast(SPECIES_I, a[0]); + var idxs = IntVector.broadcast(SPECIES_I, 0); + var iota = IntVector.broadcast(SPECIES_I, 0).addIndex(1); int i = 0; - for (; i < SPECIES_I512.loopBound(a.length); i += SPECIES_I512.length()) { - IntVector v = IntVector.fromArray(SPECIES_I512, a, i); + for (; i < SPECIES_I.loopBound(a.length); i += SPECIES_I.length()) { + IntVector v = IntVector.fromArray(SPECIES_I, a, i); var mask = v.compare(VectorOperators.LT, mins); mins = mins.blend(v, mask); idxs = idxs.blend(iota, mask); - iota = iota.add(SPECIES_I512.length()); + iota = iota.add(SPECIES_I.length()); } // Reduce the vectors down int min = mins.reduceLanes(VectorOperators.MIN);