mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-20 18:37:51 +00:00
2633 lines
106 KiB
Java
2633 lines
106 KiB
Java
/*
|
|
* Copyright (c) 2024, 2026, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
/*
|
|
* @test id=no-vectorization
|
|
* @bug 8340093 8342095
|
|
* @summary Test vectorization of reduction loops.
|
|
* @library /test/lib /
|
|
* @run driver compiler.loopopts.superword.TestReductions P0
|
|
*/
|
|
|
|
/*
|
|
* @test id=vanilla
|
|
* @bug 8340093 8342095
|
|
* @summary Test vectorization of reduction loops.
|
|
* @library /test/lib /
|
|
* @run driver compiler.loopopts.superword.TestReductions P1
|
|
*/
|
|
|
|
/*
|
|
* @test id=force-vectorization
|
|
* @bug 8340093 8342095
|
|
* @summary Test vectorization of reduction loops.
|
|
* @library /test/lib /
|
|
* @run driver compiler.loopopts.superword.TestReductions P2
|
|
*/
|
|
|
|
package compiler.loopopts.superword;
|
|
|
|
import java.util.Map;
|
|
import java.util.HashMap;
|
|
|
|
import compiler.lib.ir_framework.*;
|
|
import compiler.lib.verify.*;
|
|
import static compiler.lib.generators.Generators.G;
|
|
import compiler.lib.generators.Generator;
|
|
|
|
/**
|
|
* Note: there is a corresponding JMH benchmark:
|
|
* test/micro/org/openjdk/bench/vm/compiler/VectorReduction2.java
|
|
*/
|
|
public class TestReductions {
|
|
private static int SIZE = 1024*8;
|
|
private static final Generator<Integer> GEN_I = G.ints();
|
|
private static final Generator<Long> GEN_L = G.longs();
|
|
private static final Generator<Float> GEN_F = G.floats();
|
|
private static final Generator<Double> GEN_D = G.doubles();
|
|
|
|
private static byte[] in1B = fillRandom(new byte[SIZE]);
|
|
private static byte[] in2B = fillRandom(new byte[SIZE]);
|
|
private static byte[] in3B = fillRandom(new byte[SIZE]);
|
|
private static char[] in1C = fillRandom(new char[SIZE]);
|
|
private static char[] in2C = fillRandom(new char[SIZE]);
|
|
private static char[] in3C = fillRandom(new char[SIZE]);
|
|
private static short[] in1S = fillRandom(new short[SIZE]);
|
|
private static short[] in2S = fillRandom(new short[SIZE]);
|
|
private static short[] in3S = fillRandom(new short[SIZE]);
|
|
|
|
private static int[] in1I = fillRandom(new int[SIZE]);
|
|
private static int[] in2I = fillRandom(new int[SIZE]);
|
|
private static int[] in3I = fillRandom(new int[SIZE]);
|
|
private static long[] in1L = fillRandom(new long[SIZE]);
|
|
private static long[] in2L = fillRandom(new long[SIZE]);
|
|
private static long[] in3L = fillRandom(new long[SIZE]);
|
|
|
|
private static float[] in1F = fillRandom(new float[SIZE]);
|
|
private static float[] in2F = fillRandom(new float[SIZE]);
|
|
private static float[] in3F = fillRandom(new float[SIZE]);
|
|
private static double[] in1D = fillRandom(new double[SIZE]);
|
|
private static double[] in2D = fillRandom(new double[SIZE]);
|
|
private static double[] in3D = fillRandom(new double[SIZE]);
|
|
|
|
interface TestFunction {
|
|
Object run();
|
|
}
|
|
|
|
// Map of test names to tests.
|
|
Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
|
|
|
|
// Map of gold, the results from the first run (before compilation), one per tests entry.
|
|
Map<String,Object> golds = new HashMap<String,Object>();
|
|
|
|
public static void main(String[] args) {
|
|
TestFramework framework = new TestFramework(TestReductions.class);
|
|
switch (args[0]) {
|
|
case "P0" -> { framework.addFlags("-XX:+UnlockDiagnosticVMOptions", "-XX:AutoVectorizationOverrideProfitability=0"); }
|
|
case "P1" -> { framework.addFlags("-XX:+UnlockDiagnosticVMOptions", "-XX:AutoVectorizationOverrideProfitability=1"); }
|
|
// Note: increasing the node count limit also helps in some cases.
|
|
case "P2" -> { framework.addFlags("-XX:+UnlockDiagnosticVMOptions", "-XX:AutoVectorizationOverrideProfitability=2", "-XX:LoopUnrollLimit=1000"); }
|
|
default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
|
|
};
|
|
framework.start();
|
|
}
|
|
|
|
public TestReductions() {
|
|
// Add all tests to list
|
|
tests.put("byteAndSimple", TestReductions::byteAndSimple);
|
|
tests.put("byteOrSimple", TestReductions::byteOrSimple);
|
|
tests.put("byteXorSimple", TestReductions::byteXorSimple);
|
|
tests.put("byteAddSimple", TestReductions::byteAddSimple);
|
|
tests.put("byteMulSimple", TestReductions::byteMulSimple);
|
|
tests.put("byteMinSimple", TestReductions::byteMinSimple);
|
|
tests.put("byteMaxSimple", TestReductions::byteMaxSimple);
|
|
tests.put("byteAndDotProduct", TestReductions::byteAndDotProduct);
|
|
tests.put("byteOrDotProduct", TestReductions::byteOrDotProduct);
|
|
tests.put("byteXorDotProduct", TestReductions::byteXorDotProduct);
|
|
tests.put("byteAddDotProduct", TestReductions::byteAddDotProduct);
|
|
tests.put("byteMulDotProduct", TestReductions::byteMulDotProduct);
|
|
tests.put("byteMinDotProduct", TestReductions::byteMinDotProduct);
|
|
tests.put("byteMaxDotProduct", TestReductions::byteMaxDotProduct);
|
|
tests.put("byteAndBig", TestReductions::byteAndBig);
|
|
tests.put("byteOrBig", TestReductions::byteOrBig);
|
|
tests.put("byteXorBig", TestReductions::byteXorBig);
|
|
tests.put("byteAddBig", TestReductions::byteAddBig);
|
|
tests.put("byteMulBig", TestReductions::byteMulBig);
|
|
tests.put("byteMinBig", TestReductions::byteMinBig);
|
|
tests.put("byteMaxBig", TestReductions::byteMaxBig);
|
|
|
|
tests.put("charAndSimple", TestReductions::charAndSimple);
|
|
tests.put("charOrSimple", TestReductions::charOrSimple);
|
|
tests.put("charXorSimple", TestReductions::charXorSimple);
|
|
tests.put("charAddSimple", TestReductions::charAddSimple);
|
|
tests.put("charMulSimple", TestReductions::charMulSimple);
|
|
tests.put("charMinSimple", TestReductions::charMinSimple);
|
|
tests.put("charMaxSimple", TestReductions::charMaxSimple);
|
|
tests.put("charAndDotProduct", TestReductions::charAndDotProduct);
|
|
tests.put("charOrDotProduct", TestReductions::charOrDotProduct);
|
|
tests.put("charXorDotProduct", TestReductions::charXorDotProduct);
|
|
tests.put("charAddDotProduct", TestReductions::charAddDotProduct);
|
|
tests.put("charMulDotProduct", TestReductions::charMulDotProduct);
|
|
tests.put("charMinDotProduct", TestReductions::charMinDotProduct);
|
|
tests.put("charMaxDotProduct", TestReductions::charMaxDotProduct);
|
|
tests.put("charAndBig", TestReductions::charAndBig);
|
|
tests.put("charOrBig", TestReductions::charOrBig);
|
|
tests.put("charXorBig", TestReductions::charXorBig);
|
|
tests.put("charAddBig", TestReductions::charAddBig);
|
|
tests.put("charMulBig", TestReductions::charMulBig);
|
|
tests.put("charMinBig", TestReductions::charMinBig);
|
|
tests.put("charMaxBig", TestReductions::charMaxBig);
|
|
|
|
tests.put("shortAndSimple", TestReductions::shortAndSimple);
|
|
tests.put("shortOrSimple", TestReductions::shortOrSimple);
|
|
tests.put("shortXorSimple", TestReductions::shortXorSimple);
|
|
tests.put("shortAddSimple", TestReductions::shortAddSimple);
|
|
tests.put("shortMulSimple", TestReductions::shortMulSimple);
|
|
tests.put("shortMinSimple", TestReductions::shortMinSimple);
|
|
tests.put("shortMaxSimple", TestReductions::shortMaxSimple);
|
|
tests.put("shortAndDotProduct", TestReductions::shortAndDotProduct);
|
|
tests.put("shortOrDotProduct", TestReductions::shortOrDotProduct);
|
|
tests.put("shortXorDotProduct", TestReductions::shortXorDotProduct);
|
|
tests.put("shortAddDotProduct", TestReductions::shortAddDotProduct);
|
|
tests.put("shortMulDotProduct", TestReductions::shortMulDotProduct);
|
|
tests.put("shortMinDotProduct", TestReductions::shortMinDotProduct);
|
|
tests.put("shortMaxDotProduct", TestReductions::shortMaxDotProduct);
|
|
tests.put("shortAndBig", TestReductions::shortAndBig);
|
|
tests.put("shortOrBig", TestReductions::shortOrBig);
|
|
tests.put("shortXorBig", TestReductions::shortXorBig);
|
|
tests.put("shortAddBig", TestReductions::shortAddBig);
|
|
tests.put("shortMulBig", TestReductions::shortMulBig);
|
|
tests.put("shortMinBig", TestReductions::shortMinBig);
|
|
tests.put("shortMaxBig", TestReductions::shortMaxBig);
|
|
|
|
tests.put("intAndSimple", TestReductions::intAndSimple);
|
|
tests.put("intOrSimple", TestReductions::intOrSimple);
|
|
tests.put("intXorSimple", TestReductions::intXorSimple);
|
|
tests.put("intAddSimple", TestReductions::intAddSimple);
|
|
tests.put("intMulSimple", TestReductions::intMulSimple);
|
|
tests.put("intMinSimple", TestReductions::intMinSimple);
|
|
tests.put("intMaxSimple", TestReductions::intMaxSimple);
|
|
tests.put("intAndDotProduct", TestReductions::intAndDotProduct);
|
|
tests.put("intOrDotProduct", TestReductions::intOrDotProduct);
|
|
tests.put("intXorDotProduct", TestReductions::intXorDotProduct);
|
|
tests.put("intAddDotProduct", TestReductions::intAddDotProduct);
|
|
tests.put("intMulDotProduct", TestReductions::intMulDotProduct);
|
|
tests.put("intMinDotProduct", TestReductions::intMinDotProduct);
|
|
tests.put("intMaxDotProduct", TestReductions::intMaxDotProduct);
|
|
tests.put("intAndBig", TestReductions::intAndBig);
|
|
tests.put("intOrBig", TestReductions::intOrBig);
|
|
tests.put("intXorBig", TestReductions::intXorBig);
|
|
tests.put("intAddBig", TestReductions::intAddBig);
|
|
tests.put("intMulBig", TestReductions::intMulBig);
|
|
tests.put("intMinBig", TestReductions::intMinBig);
|
|
tests.put("intMaxBig", TestReductions::intMaxBig);
|
|
|
|
tests.put("longAndSimple", TestReductions::longAndSimple);
|
|
tests.put("longOrSimple", TestReductions::longOrSimple);
|
|
tests.put("longXorSimple", TestReductions::longXorSimple);
|
|
tests.put("longAddSimple", TestReductions::longAddSimple);
|
|
tests.put("longMulSimple", TestReductions::longMulSimple);
|
|
tests.put("longMinSimple", TestReductions::longMinSimple);
|
|
tests.put("longMaxSimple", TestReductions::longMaxSimple);
|
|
tests.put("longAndDotProduct", TestReductions::longAndDotProduct);
|
|
tests.put("longOrDotProduct", TestReductions::longOrDotProduct);
|
|
tests.put("longXorDotProduct", TestReductions::longXorDotProduct);
|
|
tests.put("longAddDotProduct", TestReductions::longAddDotProduct);
|
|
tests.put("longMulDotProduct", TestReductions::longMulDotProduct);
|
|
tests.put("longMinDotProduct", TestReductions::longMinDotProduct);
|
|
tests.put("longMaxDotProduct", TestReductions::longMaxDotProduct);
|
|
tests.put("longAndBig", TestReductions::longAndBig);
|
|
tests.put("longOrBig", TestReductions::longOrBig);
|
|
tests.put("longXorBig", TestReductions::longXorBig);
|
|
tests.put("longAddBig", TestReductions::longAddBig);
|
|
tests.put("longMulBig", TestReductions::longMulBig);
|
|
tests.put("longMinBig", TestReductions::longMinBig);
|
|
tests.put("longMaxBig", TestReductions::longMaxBig);
|
|
|
|
tests.put("floatAddSimple", TestReductions::floatAddSimple);
|
|
tests.put("floatMulSimple", TestReductions::floatMulSimple);
|
|
tests.put("floatMinSimple", TestReductions::floatMinSimple);
|
|
tests.put("floatMaxSimple", TestReductions::floatMaxSimple);
|
|
tests.put("floatAddDotProduct", TestReductions::floatAddDotProduct);
|
|
tests.put("floatMulDotProduct", TestReductions::floatMulDotProduct);
|
|
tests.put("floatMinDotProduct", TestReductions::floatMinDotProduct);
|
|
tests.put("floatMaxDotProduct", TestReductions::floatMaxDotProduct);
|
|
tests.put("floatAddBig", TestReductions::floatAddBig);
|
|
tests.put("floatMulBig", TestReductions::floatMulBig);
|
|
tests.put("floatMinBig", TestReductions::floatMinBig);
|
|
tests.put("floatMaxBig", TestReductions::floatMaxBig);
|
|
|
|
tests.put("doubleAddSimple", TestReductions::doubleAddSimple);
|
|
tests.put("doubleMulSimple", TestReductions::doubleMulSimple);
|
|
tests.put("doubleMinSimple", TestReductions::doubleMinSimple);
|
|
tests.put("doubleMaxSimple", TestReductions::doubleMaxSimple);
|
|
tests.put("doubleAddDotProduct", TestReductions::doubleAddDotProduct);
|
|
tests.put("doubleMulDotProduct", TestReductions::doubleMulDotProduct);
|
|
tests.put("doubleMinDotProduct", TestReductions::doubleMinDotProduct);
|
|
tests.put("doubleMaxDotProduct", TestReductions::doubleMaxDotProduct);
|
|
tests.put("doubleAddBig", TestReductions::doubleAddBig);
|
|
tests.put("doubleMulBig", TestReductions::doubleMulBig);
|
|
tests.put("doubleMinBig", TestReductions::doubleMinBig);
|
|
tests.put("doubleMaxBig", TestReductions::doubleMaxBig);
|
|
|
|
// Compute gold value for all test methods before compilation
|
|
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
|
String name = entry.getKey();
|
|
TestFunction test = entry.getValue();
|
|
Object gold = test.run();
|
|
golds.put(name, gold);
|
|
}
|
|
}
|
|
|
|
@Warmup(100)
|
|
@Run(test = {"byteAndSimple",
|
|
"byteOrSimple",
|
|
"byteXorSimple",
|
|
"byteAddSimple",
|
|
"byteMulSimple",
|
|
"byteMinSimple",
|
|
"byteMaxSimple",
|
|
"byteAndDotProduct",
|
|
"byteOrDotProduct",
|
|
"byteXorDotProduct",
|
|
"byteAddDotProduct",
|
|
"byteMulDotProduct",
|
|
"byteMinDotProduct",
|
|
"byteMaxDotProduct",
|
|
"byteAndBig",
|
|
"byteOrBig",
|
|
"byteXorBig",
|
|
"byteAddBig",
|
|
"byteMulBig",
|
|
"byteMinBig",
|
|
"byteMaxBig",
|
|
|
|
"charAndSimple",
|
|
"charOrSimple",
|
|
"charXorSimple",
|
|
"charAddSimple",
|
|
"charMulSimple",
|
|
"charMinSimple",
|
|
"charMaxSimple",
|
|
"charAndDotProduct",
|
|
"charOrDotProduct",
|
|
"charXorDotProduct",
|
|
"charAddDotProduct",
|
|
"charMulDotProduct",
|
|
"charMinDotProduct",
|
|
"charMaxDotProduct",
|
|
"charAndBig",
|
|
"charOrBig",
|
|
"charXorBig",
|
|
"charAddBig",
|
|
"charMulBig",
|
|
"charMinBig",
|
|
"charMaxBig",
|
|
|
|
"shortAndSimple",
|
|
"shortOrSimple",
|
|
"shortXorSimple",
|
|
"shortAddSimple",
|
|
"shortMulSimple",
|
|
"shortMinSimple",
|
|
"shortMaxSimple",
|
|
"shortAndDotProduct",
|
|
"shortOrDotProduct",
|
|
"shortXorDotProduct",
|
|
"shortAddDotProduct",
|
|
"shortMulDotProduct",
|
|
"shortMinDotProduct",
|
|
"shortMaxDotProduct",
|
|
"shortAndBig",
|
|
"shortOrBig",
|
|
"shortXorBig",
|
|
"shortAddBig",
|
|
"shortMulBig",
|
|
"shortMinBig",
|
|
"shortMaxBig",
|
|
|
|
"intAndSimple",
|
|
"intOrSimple",
|
|
"intXorSimple",
|
|
"intAddSimple",
|
|
"intMulSimple",
|
|
"intMinSimple",
|
|
"intMaxSimple",
|
|
"intAndDotProduct",
|
|
"intOrDotProduct",
|
|
"intXorDotProduct",
|
|
"intAddDotProduct",
|
|
"intMulDotProduct",
|
|
"intMinDotProduct",
|
|
"intMaxDotProduct",
|
|
"intAndBig",
|
|
"intOrBig",
|
|
"intXorBig",
|
|
"intAddBig",
|
|
"intMulBig",
|
|
"intMinBig",
|
|
"intMaxBig",
|
|
|
|
"longAndSimple",
|
|
"longOrSimple",
|
|
"longXorSimple",
|
|
"longAddSimple",
|
|
"longMulSimple",
|
|
"longMinSimple",
|
|
"longMaxSimple",
|
|
"longAndDotProduct",
|
|
"longOrDotProduct",
|
|
"longXorDotProduct",
|
|
"longAddDotProduct",
|
|
"longMulDotProduct",
|
|
"longMinDotProduct",
|
|
"longMaxDotProduct",
|
|
"longAndBig",
|
|
"longOrBig",
|
|
"longXorBig",
|
|
"longAddBig",
|
|
"longMulBig",
|
|
"longMinBig",
|
|
"longMaxBig",
|
|
|
|
"floatAddSimple",
|
|
"floatMulSimple",
|
|
"floatMinSimple",
|
|
"floatMaxSimple",
|
|
"floatAddDotProduct",
|
|
"floatMulDotProduct",
|
|
"floatMinDotProduct",
|
|
"floatMaxDotProduct",
|
|
"floatAddBig",
|
|
"floatMulBig",
|
|
"floatMinBig",
|
|
"floatMaxBig",
|
|
|
|
"doubleAddSimple",
|
|
"doubleMulSimple",
|
|
"doubleMinSimple",
|
|
"doubleMaxSimple",
|
|
"doubleAddDotProduct",
|
|
"doubleMulDotProduct",
|
|
"doubleMinDotProduct",
|
|
"doubleMaxDotProduct",
|
|
"doubleAddBig",
|
|
"doubleMulBig",
|
|
"doubleMinBig",
|
|
"doubleMaxBig"})
|
|
public void runTests() {
|
|
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
|
String name = entry.getKey();
|
|
TestFunction test = entry.getValue();
|
|
// Recall gold value from before compilation
|
|
Object gold = golds.get(name);
|
|
// Compute new result
|
|
Object result = test.run();
|
|
// Compare gold and new result
|
|
try {
|
|
Verify.checkEQ(gold, result);
|
|
} catch (VerifyException e) {
|
|
throw new RuntimeException("Verify failed for " + name, e);
|
|
}
|
|
}
|
|
}
|
|
|
|
static byte[] fillRandom(byte[] a) {
|
|
for (int i = 0; i < a.length; i++) {
|
|
a[i] = (byte)(int)GEN_I.next();
|
|
}
|
|
return a;
|
|
}
|
|
|
|
static char[] fillRandom(char[] a) {
|
|
for (int i = 0; i < a.length; i++) {
|
|
a[i] = (char)(int)GEN_I.next();
|
|
}
|
|
return a;
|
|
}
|
|
|
|
static short[] fillRandom(short[] a) {
|
|
for (int i = 0; i < a.length; i++) {
|
|
a[i] = (short)(int)GEN_I.next();
|
|
}
|
|
return a;
|
|
}
|
|
|
|
static int[] fillRandom(int[] a) {
|
|
G.fill(GEN_I, a);
|
|
return a;
|
|
}
|
|
|
|
static long[] fillRandom(long[] a) {
|
|
G.fill(GEN_L, a);
|
|
return a;
|
|
}
|
|
|
|
static float[] fillRandom(float[] a) {
|
|
G.fill(GEN_F, a);
|
|
return a;
|
|
}
|
|
|
|
static double[] fillRandom(double[] a) {
|
|
G.fill(GEN_D, a);
|
|
return a;
|
|
}
|
|
|
|
// ---------byte***Simple ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteAndSimple() {
|
|
byte acc = (byte)0xFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = in1B[i];
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteOrSimple() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = in1B[i];
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteXorSimple() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = in1B[i];
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
|
|
private static byte byteAddSimple() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = in1B[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
|
|
private static byte byteMulSimple() {
|
|
byte acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = in1B[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteMinSimple() {
|
|
byte acc = Byte.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = in1B[i];
|
|
acc = (byte)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteMaxSimple() {
|
|
byte acc = Byte.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = in1B[i];
|
|
acc = (byte)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------byte***DotProduct ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteAndDotProduct() {
|
|
byte acc = (byte)0xFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)(in1B[i] * in2B[i]);
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteOrDotProduct() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)(in1B[i] * in2B[i]);
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteXorDotProduct() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)(in1B[i] * in2B[i]);
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
|
|
private static byte byteAddDotProduct() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)(in1B[i] * in2B[i]);
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
|
|
private static byte byteMulDotProduct() {
|
|
byte acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)(in1B[i] * in2B[i]);
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteMinDotProduct() {
|
|
byte acc = Byte.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)(in1B[i] * in2B[i]);
|
|
acc = (byte)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteMaxDotProduct() {
|
|
byte acc = Byte.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)(in1B[i] * in2B[i]);
|
|
acc = (byte)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------byte***Big ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteAndBig() {
|
|
byte acc = (byte)0xFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)((in1B[i] * in2B[i]) + (in1B[i] * in3B[i]) + (in2B[i] * in3B[i]));
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteOrBig() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)((in1B[i] * in2B[i]) + (in1B[i] * in3B[i]) + (in2B[i] * in3B[i]));
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteXorBig() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)((in1B[i] * in2B[i]) + (in1B[i] * in3B[i]) + (in2B[i] * in3B[i]));
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
|
|
private static byte byteAddBig() {
|
|
byte acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)((in1B[i] * in2B[i]) + (in1B[i] * in3B[i]) + (in2B[i] * in3B[i]));
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B) // does not vectorize for now, might in the future.
|
|
private static byte byteMulBig() {
|
|
byte acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)((in1B[i] * in2B[i]) + (in1B[i] * in3B[i]) + (in2B[i] * in3B[i]));
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteMinBig() {
|
|
byte acc = Byte.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)((in1B[i] * in2B[i]) + (in1B[i] * in3B[i]) + (in2B[i] * in3B[i]));
|
|
acc = (byte)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_int, max_byte)", "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_B,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static byte byteMaxBig() {
|
|
byte acc = Byte.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
byte val = (byte)((in1B[i] * in2B[i]) + (in1B[i] * in3B[i]) + (in2B[i] * in3B[i]));
|
|
acc = (byte)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------char***Simple ------------------------------------------------------------
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charAndSimple() {
|
|
char acc = (char)0xFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = in1C[i];
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charOrSimple() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = in1C[i];
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charXorSimple() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = in1C[i];
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charAddSimple() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = in1C[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMulSimple() {
|
|
char acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = in1C[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMinSimple() {
|
|
char acc = Character.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = in1C[i];
|
|
acc = (char)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMaxSimple() {
|
|
char acc = Character.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = in1C[i];
|
|
acc = (char)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------char***DotProduct ------------------------------------------------------------
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charAndDotProduct() {
|
|
char acc = (char)0xFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)(in1C[i] * in2C[i]);
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charOrDotProduct() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)(in1C[i] * in2C[i]);
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charXorDotProduct() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)(in1C[i] * in2C[i]);
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charAddDotProduct() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)(in1C[i] * in2C[i]);
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMulDotProduct() {
|
|
char acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)(in1C[i] * in2C[i]);
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMinDotProduct() {
|
|
char acc = Character.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)(in1C[i] * in2C[i]);
|
|
acc = (char)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMaxDotProduct() {
|
|
char acc = Character.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)(in1C[i] * in2C[i]);
|
|
acc = (char)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------char***Big ------------------------------------------------------------
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charAndBig() {
|
|
char acc = (char)0xFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)((in1C[i] * in2C[i]) + (in1C[i] * in3C[i]) + (in2C[i] * in3C[i]));
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charOrBig() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)((in1C[i] * in2C[i]) + (in1C[i] * in3C[i]) + (in2C[i] * in3C[i]));
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charXorBig() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)((in1C[i] * in2C[i]) + (in1C[i] * in3C[i]) + (in2C[i] * in3C[i]));
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charAddBig() {
|
|
char acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)((in1C[i] * in2C[i]) + (in1C[i] * in3C[i]) + (in2C[i] * in3C[i]));
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMulBig() {
|
|
char acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)((in1C[i] * in2C[i]) + (in1C[i] * in3C[i]) + (in2C[i] * in3C[i]));
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMinBig() {
|
|
char acc = Character.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)((in1C[i] * in2C[i]) + (in1C[i] * in3C[i]) + (in2C[i] * in3C[i]));
|
|
acc = (char)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_C) // does not vectorize for now, might in the future.
|
|
private static char charMaxBig() {
|
|
char acc = Character.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
char val = (char)((in1C[i] * in2C[i]) + (in1C[i] * in3C[i]) + (in2C[i] * in3C[i]));
|
|
acc = (char)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------short***Simple ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortAndSimple() {
|
|
short acc = (short)0xFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = in1S[i];
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortOrSimple() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = in1S[i];
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortXorSimple() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = in1S[i];
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
|
|
private static short shortAddSimple() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = in1S[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
|
|
private static short shortMulSimple() {
|
|
short acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = in1S[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortMinSimple() {
|
|
short acc = Short.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = in1S[i];
|
|
acc = (short)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortMaxSimple() {
|
|
short acc = Short.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = in1S[i];
|
|
acc = (short)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------short***DotProduct ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortAndDotProduct() {
|
|
short acc = (short)0xFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)(in1S[i] * in2S[i]);
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortOrDotProduct() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)(in1S[i] * in2S[i]);
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortXorDotProduct() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)(in1S[i] * in2S[i]);
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
|
|
private static short shortAddDotProduct() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)(in1S[i] * in2S[i]);
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
|
|
private static short shortMulDotProduct() {
|
|
short acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)(in1S[i] * in2S[i]);
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortMinDotProduct() {
|
|
short acc = Short.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)(in1S[i] * in2S[i]);
|
|
acc = (short)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortMaxDotProduct() {
|
|
short acc = Short.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)(in1S[i] * in2S[i]);
|
|
acc = (short)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------short***Big ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortAndBig() {
|
|
short acc = (short)0xFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)((in1S[i] * in2S[i]) + (in1S[i] * in3S[i]) + (in2S[i] * in3S[i]));
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortOrBig() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)((in1S[i] * in2S[i]) + (in1S[i] * in3S[i]) + (in2S[i] * in3S[i]));
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortXorBig() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)((in1S[i] * in2S[i]) + (in1S[i] * in3S[i]) + (in2S[i] * in3S[i]));
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
|
|
private static short shortAddBig() {
|
|
short acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)((in1S[i] * in2S[i]) + (in1S[i] * in3S[i]) + (in2S[i] * in3S[i]));
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S) // does not vectorize for now, might in the future.
|
|
private static short shortMulBig() {
|
|
short acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)((in1S[i] * in2S[i]) + (in1S[i] * in3S[i]) + (in2S[i] * in3S[i]));
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortMinBig() {
|
|
short acc = Short.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)((in1S[i] * in2S[i]) + (in1S[i] * in3S[i]) + (in2S[i] * in3S[i]));
|
|
acc = (short)Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE + "min(max_int, max_short)", "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_S,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static short shortMaxBig() {
|
|
short acc = Short.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
short val = (short)((in1S[i] * in2S[i]) + (in1S[i] * in3S[i]) + (in2S[i] * in3S[i]));
|
|
acc = (short)Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------int***Simple ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intAndSimple() {
|
|
int acc = 0xFFFFFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i];
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intOrSimple() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i];
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intXorSimple() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i];
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.ADD_REDUCTION_VI, "> 0",
|
|
IRNode.ADD_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intAddSimple() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MUL_REDUCTION_VI, "> 0",
|
|
IRNode.MUL_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMulSimple() {
|
|
int acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMinSimple() {
|
|
int acc = Integer.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMaxSimple() {
|
|
int acc = Integer.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------int***DotProduct ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intAndDotProduct() {
|
|
int acc = 0xFFFFFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i] * in2I[i];
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intOrDotProduct() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i] * in2I[i];
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intXorDotProduct() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i] * in2I[i];
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.ADD_REDUCTION_VI, "> 0",
|
|
IRNode.ADD_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intAddDotProduct() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i] * in2I[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MUL_REDUCTION_VI, "> 0",
|
|
IRNode.MUL_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMulDotProduct() {
|
|
int acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i] * in2I[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMinDotProduct() {
|
|
int acc = Integer.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i] * in2I[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMaxDotProduct() {
|
|
int acc = Integer.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = in1I[i] * in2I[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------int***Big ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intAndBig() {
|
|
int acc = 0xFFFFFFFF; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = (in1I[i] * in2I[i]) + (in1I[i] * in3I[i]) + (in2I[i] * in3I[i]);
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intOrBig() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = (in1I[i] * in2I[i]) + (in1I[i] * in3I[i]) + (in2I[i] * in3I[i]);
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intXorBig() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = (in1I[i] * in2I[i]) + (in1I[i] * in3I[i]) + (in2I[i] * in3I[i]);
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.ADD_REDUCTION_VI, "> 0",
|
|
IRNode.ADD_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intAddBig() {
|
|
int acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = (in1I[i] * in2I[i]) + (in1I[i] * in3I[i]) + (in2I[i] * in3I[i]);
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MUL_REDUCTION_VI, "> 0",
|
|
IRNode.MUL_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMulBig() {
|
|
int acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = (in1I[i] * in2I[i]) + (in1I[i] * in3I[i]) + (in2I[i] * in3I[i]);
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMinBig() {
|
|
int acc = Integer.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = (in1I[i] * in2I[i]) + (in1I[i] * in3I[i]) + (in2I[i] * in3I[i]);
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VI, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_I,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static int intMaxBig() {
|
|
int acc = Integer.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
int val = (in1I[i] * in2I[i]) + (in1I[i] * in3I[i]) + (in2I[i] * in3I[i]);
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------long***Simple ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VL, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longAndSimple() {
|
|
long acc = 0xFFFFFFFFFFFFFFFFL; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i];
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VL, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longOrSimple() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i];
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VL, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longXorSimple() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i];
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.ADD_REDUCTION_VL, "> 0",
|
|
IRNode.ADD_VL, "> 0"},
|
|
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longAddSimple() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MUL_REDUCTION_VL, "> 0",
|
|
IRNode.MUL_VL, "> 0"}, // vector accumulator
|
|
applyIfCPUFeature = {"avx512dq", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512dq", "false", "sse4.1", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370673
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MUL_REDUCTION_VL, "> 0",
|
|
IRNode.MUL_VL, "= 0"}, // Reduction NOT moved out of loop
|
|
applyIfCPUFeatureOr = {"asimd", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
// Note: NEON does not support MulVL for auto vectorization. There is
|
|
// a scalarized implementation, but that is not profitable for
|
|
// auto vectorization in almost all cases, and would not be
|
|
// profitable here at any rate.
|
|
// Hence, we have to keep the reduction inside the loop, and
|
|
// cannot use the MulVL as the vector accumulator.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMulSimple() {
|
|
long acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VL, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370671
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMinSimple() {
|
|
long acc = Long.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VL, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370671
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMaxSimple() {
|
|
long acc = Long.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------long***DotProduct ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While AndReductionV is implemented in NEON (see longAndSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longAndDotProduct() {
|
|
long acc = 0xFFFFFFFFFFFFFFFFL; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i] * in2L[i];
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While OrReductionV is implemented in NEON (see longOrSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longOrDotProduct() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i] * in2L[i];
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longXorSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longXorDotProduct() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i] * in2L[i];
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.ADD_REDUCTION_VL, "> 0",
|
|
IRNode.ADD_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longAddSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longAddDotProduct() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i] * in2L[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MUL_REDUCTION_VL, "> 0",
|
|
IRNode.MUL_VL, "> 0"},
|
|
applyIfCPUFeature = {"avx512dq", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512dq", "false", "sse4.1", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370673
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// MulVL is not implemented on NEON, so we also not have the reduction.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMulDotProduct() {
|
|
long acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i] * in2L[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VL, "> 0"},
|
|
applyIfCPUFeature = {"avx512", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370671
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longMinSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMinDotProduct() {
|
|
long acc = Long.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i] * in2L[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VL, "> 0"},
|
|
applyIfCPUFeature = {"avx512", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370671
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longMaxSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMaxDotProduct() {
|
|
long acc = Long.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = in1L[i] * in2L[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------long***Big ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.AND_REDUCTION_V, "> 0",
|
|
IRNode.AND_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While AndReductionV is implemented in NEON (see longAndSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longAndBig() {
|
|
long acc = 0xFFFFFFFFFFFFFFFFL; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = (in1L[i] * in2L[i]) + (in1L[i] * in3L[i]) + (in2L[i] * in3L[i]);
|
|
acc &= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.OR_REDUCTION_V, "> 0",
|
|
IRNode.OR_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While OrReductionV is implemented in NEON (see longOrSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longOrBig() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = (in1L[i] * in2L[i]) + (in1L[i] * in3L[i]) + (in2L[i] * in3L[i]);
|
|
acc |= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.XOR_REDUCTION_V, "> 0",
|
|
IRNode.XOR_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longXorSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longXorBig() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = (in1L[i] * in2L[i]) + (in1L[i] * in3L[i]) + (in2L[i] * in3L[i]);
|
|
acc ^= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.ADD_REDUCTION_VL, "> 0",
|
|
IRNode.ADD_VL, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longAddSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longAddBig() {
|
|
long acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = (in1L[i] * in2L[i]) + (in1L[i] * in3L[i]) + (in2L[i] * in3L[i]);
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MUL_REDUCTION_VL, "> 0",
|
|
IRNode.MUL_VL, "> 0"},
|
|
applyIfCPUFeature = {"avx512dq", "true"},
|
|
applyIfAnd = {"AutoVectorizationOverrideProfitability", "> 0",
|
|
"LoopUnrollLimit", ">= 1000"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeature = {"avx512dq", "true"},
|
|
applyIfAnd = {"AutoVectorizationOverrideProfitability", "> 0",
|
|
"LoopUnrollLimit", "< 1000"})
|
|
// Increasing the body limit seems to help. Filed for investigation: JDK-8370685
|
|
// If you can eliminate this exception for LoopUnrollLimit, please remove
|
|
// the flag completely from the test, also the "addFlags" at the top.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// MulVL is not implemented on NEON, so we also not have the reduction.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMulBig() {
|
|
long acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = (in1L[i] * in2L[i]) + (in1L[i] * in3L[i]) + (in2L[i] * in3L[i]);
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VL, "> 0"},
|
|
applyIfCPUFeature = {"avx512", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370671
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longMinSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMinBig() {
|
|
long acc = Long.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = (in1L[i] * in2L[i]) + (in1L[i] * in3L[i]) + (in2L[i] * in3L[i]);
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VL, "> 0"},
|
|
applyIfCPUFeature = {"avx512", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370671
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// While MaxReductionV is implemented in NEON (see longMaxSimple), MulVL is not.
|
|
// Filed: JDK-8370686
|
|
@IR(failOn = IRNode.LOAD_VECTOR_L,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static long longMaxBig() {
|
|
long acc = Long.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
long val = (in1L[i] * in2L[i]) + (in1L[i] * in3L[i]) + (in2L[i] * in3L[i]);
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------float***Simple ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.ADD_REDUCTION_V, "> 0",
|
|
IRNode.ADD_VF, "= 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 2"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "< 2"})
|
|
// Not considered profitable by cost model, but if forced we can vectorize.
|
|
// Scalar: n loads + n adds
|
|
// Vector: n loads + n adds + n extract (sequential order of reduction)
|
|
private static float floatAddSimple() {
|
|
float acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MUL_REDUCTION_VF, "> 0",
|
|
IRNode.MUL_VF, "= 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 2"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "< 2"})
|
|
// Not considered profitable by cost model, but if forced we can vectorize.
|
|
// Scalar: n loads + n mul
|
|
// Vector: n loads + n mul + n extract (sequential order of reduction)
|
|
private static float floatMulSimple() {
|
|
float acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VF, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMinSimple() {
|
|
float acc = Float.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VF, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMaxSimple() {
|
|
float acc = Float.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------float***DotProduct ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.ADD_REDUCTION_V, "> 0",
|
|
IRNode.ADD_VF, "= 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatAddDotProduct() {
|
|
float acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i] * in2F[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MUL_REDUCTION_VF, "> 0",
|
|
IRNode.MUL_VF, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMulDotProduct() {
|
|
float acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i] * in2F[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VF, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMinDotProduct() {
|
|
float acc = Float.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i] * in2F[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VF, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMaxDotProduct() {
|
|
float acc = Float.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = in1F[i] * in2F[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------float***Big ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.ADD_REDUCTION_V, "> 0",
|
|
IRNode.ADD_VF, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatAddBig() {
|
|
float acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = (in1F[i] * in2F[i]) + (in1F[i] * in3F[i]) + (in2F[i] * in3F[i]);
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MUL_REDUCTION_VF, "> 0",
|
|
IRNode.MUL_VF, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMulBig() {
|
|
float acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = (in1F[i] * in2F[i]) + (in1F[i] * in3F[i]) + (in2F[i] * in3F[i]);
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VF, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMinBig() {
|
|
float acc = Float.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = (in1F[i] * in2F[i]) + (in1F[i] * in3F[i]) + (in2F[i] * in3F[i]);
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_F, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VF, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_F,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static float floatMaxBig() {
|
|
float acc = Float.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
float val = (in1F[i] * in2F[i]) + (in1F[i] * in3F[i]) + (in2F[i] * in3F[i]);
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------double***Simple ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.ADD_REDUCTION_VD, "> 0",
|
|
IRNode.ADD_VD, "= 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 2"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "< 2"})
|
|
// Not considered profitable by cost model, but if forced we can vectorize.
|
|
// Scalar: n loads + n adds
|
|
// Vector: n loads + n adds + n extract (sequential order of reduction)
|
|
private static double doubleAddSimple() {
|
|
double acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MUL_REDUCTION_VD, "> 0",
|
|
IRNode.MUL_VD, "= 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 2"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "< 2"})
|
|
// Not considered profitable by cost model, but if forced we can vectorize.
|
|
// Scalar: n loads + n mul
|
|
// Vector: n loads + n mul + n extract (sequential order of reduction)
|
|
private static double doubleMulSimple() {
|
|
double acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VD, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMinSimple() {
|
|
double acc = Double.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VD, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMaxSimple() {
|
|
double acc = Double.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------double***DotProduct ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.ADD_REDUCTION_V, "> 0",
|
|
IRNode.ADD_VD, "= 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleAddDotProduct() {
|
|
double acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i] * in2D[i];
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MUL_REDUCTION_VD, "> 0",
|
|
IRNode.MUL_VD, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMulDotProduct() {
|
|
double acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i] * in2D[i];
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VD, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMinDotProduct() {
|
|
double acc = Double.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i] * in2D[i];
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VD, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMaxDotProduct() {
|
|
double acc = Double.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = in1D[i] * in2D[i];
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
// ---------double***Big ------------------------------------------------------------
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.ADD_REDUCTION_V, "> 0",
|
|
IRNode.ADD_VD, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleAddBig() {
|
|
double acc = 0; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = (in1D[i] * in2D[i]) + (in1D[i] * in3D[i]) + (in2D[i] * in3D[i]);
|
|
acc += val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MUL_REDUCTION_VD, "> 0",
|
|
IRNode.MUL_VD, "> 0"},
|
|
applyIfCPUFeature = {"sse4.1", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIfCPUFeatureAnd = {"asimd", "true"})
|
|
// I think this could vectorize, but currently does not. Filed: JDK-8370677
|
|
// But: it is not clear that it would be profitable, given the sequential reduction.
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMulBig() {
|
|
double acc = 1; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = (in1D[i] * in2D[i]) + (in1D[i] * in3D[i]) + (in2D[i] * in3D[i]);
|
|
acc *= val;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MIN_REDUCTION_V, "> 0",
|
|
IRNode.MIN_VD, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMinBig() {
|
|
double acc = Double.MAX_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = (in1D[i] * in2D[i]) + (in1D[i] * in3D[i]) + (in2D[i] * in3D[i]);
|
|
acc = Math.min(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
@Test
|
|
@IR(counts = {IRNode.LOAD_VECTOR_D, "> 0",
|
|
IRNode.MAX_REDUCTION_V, "> 0",
|
|
IRNode.MAX_VD, "> 0"},
|
|
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true", "rvv", "true"},
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "> 0"})
|
|
@IR(failOn = IRNode.LOAD_VECTOR_D,
|
|
applyIf = {"AutoVectorizationOverrideProfitability", "= 0"})
|
|
private static double doubleMaxBig() {
|
|
double acc = Double.MIN_VALUE; // neutral element
|
|
for (int i = 0; i < SIZE; i++) {
|
|
double val = (in1D[i] * in2D[i]) + (in1D[i] * in3D[i]) + (in2D[i] * in3D[i]);
|
|
acc = Math.max(acc, val);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
|
|
}
|