diff --git a/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java new file mode 100644 index 00000000000..f0d08b95fc2 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8373026 + * @summary Test auto vectorization and Vector API with some vector + * algorithms. Related benchmark: VectorAlgorithms.java + * @library /test/lib / + * @run driver ${test.main.class} + */ + +package compiler.loopopts.superword; + +import compiler.lib.ir_framework.*; +import compiler.lib.verify.*; + +/** + * The goal of this benchmark is to show the power of auto vectorization + * and the Vector API. + * + * Please only modify this benchark in synchronization with the JMH benchmark: + * micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java + */ +public class TestVectorAlgorithms { + + public static void main(String[] args) { + } +} + diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java new file mode 100644 index 00000000000..916efb0a55c --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorAlgorithms.java @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package org.openjdk.bench.vm.compiler; + +import jdk.incubator.vector.*; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.*; + +/** + * The goal of this benchmark is to show the power of auto vectorization + * and the Vector API. + * + * Please only modify this benchark in synchronization with the IR test: + * test/hotspot/jtreg/compiler/vectorization/TestVectorAlgorithms.java + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 2, time = 1) +@Measurement(iterations = 3, time = 1) +@Fork(value = 1, jvmArgs = {"--add-modules=jdk.incubator.vector"}) +public class VectorAlgorithms { + private static final VectorSpecies SPECIES_I = IntVector.SPECIES_512; + + @Param({"640000"}) + public int SIZE; + + public static int[] AI; + public static int[] RI; + + @Setup + public void init() { + AI = new int[SIZE]; + RI = new int[SIZE]; + } + + //@Benchmark + //public int reduceAddI_loop() { + // int sum = 0; + // for (int i = 0; i < AI.length; i++) { + // sum += AI[i]; + // } + // return sum; + //} + + //@Benchmark + //public int reduceAddI_loop_reassociate() { + // int sum = 0; + // for (int i = 0; i < AI.length; i+=4) { + // sum += (AI[i + 0] + AI[i + 1]) + (AI[i + 2] + AI[i + 3]); + // } + // return sum; + //} + + //@Benchmark + //public int reduceAddI_VectorAPI_naive() { + // var sum = 0; + // for (int i = 0; i < SPECIES_I.loopBound(AI.length); i += SPECIES_I.length()) { + // IntVector v = IntVector.fromArray(SPECIES_I, AI, i); + // sum += v.reduceLanes(VectorOperators.ADD); + // } + // return sum; + //} + + //@Benchmark + //public int reduceAddI_VectorAPI_reduction_after_loop() { + // var acc = IntVector.broadcast(SPECIES_I, 0); + // for (int i = 0; i < SPECIES_I.loopBound(AI.length); i += SPECIES_I.length()) { + // IntVector v = IntVector.fromArray(SPECIES_I, AI, i); + // acc = acc.add(v); + // } + // return acc.reduceLanes(VectorOperators.ADD); + //} + + //@Benchmark + //public void scanAddI_loop() { + // int sum = 0; + // for (int i = 0; i < AI.length; i++) { + // sum += AI[i]; + // RI[i] = sum; + // } + //} + + //@Benchmark + //public void scanAddI_loop_reassociate() { + // int sum = 0; + // for (int i = 0; i < AI.length; i+=4) { + // // We cut the latency by a factor of 4, but increase the number of additions. + // int old_sum = sum; + // int v0 = AI[i + 0]; + // int v1 = AI[i + 1]; + // int v2 = AI[i + 2]; + // int v3 = AI[i + 3]; + // int v01 = v0 + v1; + // int v23 = v2 + v3; + // int v0123 = v01 + v23; + // sum += v0123; + // RI[i + 0] = old_sum + v0; + // RI[i + 1] = old_sum + v01; + // RI[i + 2] = old_sum + v01 + v2; + // RI[i + 3] = old_sum + v0123; + // } + //} + + //@Benchmark + //public void scanAddI_VectorAPI_shift_blend_add() { + // // Using Naive Parallel Algorithm: Hills and Steele + // int sum = 0; + // for (int i = 0; i < SPECIES_I.loopBound(AI.length); i += SPECIES_I.length()) { + // IntVector v = IntVector.fromArray(SPECIES_I, AI, i); + // v = v.add(v.lanewise(VectorOperators.LSHL, 1 ).blend(0, VectorMask.fromLong(SPECIES_I, 0b1111111111111110))); + // v = v.add(v.lanewise(VectorOperators.LSHL, 2 ).blend(0, VectorMask.fromLong(SPECIES_I, 0b1111111111111100))); + // v = v.add(v.lanewise(VectorOperators.LSHL, 4 ).blend(0, VectorMask.fromLong(SPECIES_I, 0b1111111111110000))); + // v = v.add(v.lanewise(VectorOperators.LSHL, 8 ).blend(0, VectorMask.fromLong(SPECIES_I, 0b1111111100000000))); + // v = v.add(sum); + // v.intoArray(RI, i); + // sum = v.lane(SPECIES_I.length() - 1); + // } + //} + + //@Benchmark + //public void scanAddI_VectorAPI_permute_add() { + // // Using Naive Parallel Algorithm: Hills and Steele + // int sum = 0; + // var shf1 = VectorShuffle.fromArray(SPECIES_I, new int[]{-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, 0); + // var shf2 = VectorShuffle.fromArray(SPECIES_I, new int[]{-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, 0); + // var shf3 = VectorShuffle.fromArray(SPECIES_I, new int[]{-1, -1, -1, -1, 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12}, 0); + // var shf4 = VectorShuffle.fromArray(SPECIES_I, new int[]{-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 6, 7, 8}, 0); + // var mask1 = VectorMask.fromLong(SPECIES_I, 0b1111111111111110); + // var mask2 = VectorMask.fromLong(SPECIES_I, 0b1111111111111100); + // var mask3 = VectorMask.fromLong(SPECIES_I, 0b1111111111110000); + // var mask4 = VectorMask.fromLong(SPECIES_I, 0b1111111100000000); + // for (int i = 0; i < SPECIES_I.loopBound(AI.length); i += SPECIES_I.length()) { + // IntVector v = IntVector.fromArray(SPECIES_I, AI, i); + // v = v.add(v.rearrange(shf1), mask1); + // v = v.add(v.rearrange(shf2), mask2); + // v = v.add(v.rearrange(shf3), mask3); + // v = v.add(v.rearrange(shf4), mask4); + // v = v.add(sum); + // v.intoArray(RI, i); + // sum = v.lane(SPECIES_I.length() - 1); + // } + //} +}