/* * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* * @test * @summary Vectorization test on combined operations * @library /test/lib / * * @build jdk.test.whitebox.WhiteBox * compiler.vectorization.runner.VectorizationTestRunner * * @requires vm.compiler2.enabled * * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox * * @run main/othervm -Xbootclasspath/a:. * -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV * * @run main/othervm -Xbootclasspath/a:. * -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV * * @run main/othervm -Xbootclasspath/a:. * -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV * * @run main/othervm -Xbootclasspath/a:. * -XX:+UnlockDiagnosticVMOptions * -XX:+WhiteBoxAPI * compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV */ package compiler.vectorization.runner; import compiler.lib.ir_framework.*; import java.util.Random; public class LoopCombinedOpTest extends VectorizationTestRunner { // We must pass the flags directly to the test-VM, and not the driver vm in the @run above. @Override protected String[] testVMFlags(String[] args) { return switch (args[0]) { case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"}; case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"}; case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"}; case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"}; default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } }; } private static final int SIZE = 543; private int[] a; private int[] b; private int[] c; private int[] d; private long[] l1; private long[] l2; private short[] s1; private short[] s2; private int intInv; public LoopCombinedOpTest() { a = new int[SIZE]; b = new int[SIZE]; c = new int[SIZE]; d = new int[SIZE]; l1 = new long[SIZE]; l2 = new long[SIZE]; s1 = new short[SIZE]; s2 = new short[SIZE]; for (int i = 0; i < SIZE; i++) { a[i] = -654321 * i; b[i] = 123456 * i; c[i] = -998877 * i; d[i] = 778899 * i; l1[i] = 5000000000L * i; l2[i] = -600000000L * i; s1[i] = (short) (3 * i); s2[i] = (short) (-2 * i); } Random ran = new Random(999); intInv = ran.nextInt(); } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] opWithConstant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = a[i] + 1234567890; } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] opWithLoopInvariant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = b[i] * intInv; } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] opWithConstantAndLoopInvariant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = c[i] * (intInv & 0xfff); } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOps() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = a[i] & b[i] + c[i] & d[i]; } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWithMultipleConstants() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2; } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) // With sse2, the MulI does not vectorize. This means we have vectorized stores // to res1, but scalar loads from res1. The store-to-load-forwarding failure // detection catches this and rejects vectorization. public int[] multipleStores() { int[] res1 = new int[SIZE]; int[] res2 = new int[SIZE]; int[] res3 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = a[i] & b[i]; res2[i] = c[i] | d[i]; res3[i] = res1[i] * res2[i]; } return res3; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleStoresWithCommonSubExpression() { int[] res1 = new int[SIZE]; int[] res2 = new int[SIZE]; int[] res3 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = a[i] * b[i]; res2[i] = c[i] * d[i]; res3[i] = res1[i] + res2[i]; } return res3; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypes() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] + b[i]; // We have a mix of int and short loads/stores. // With UseCompactObjectHeaders and AlignVector, // we must 8-byte align all vector loads/stores. // // int: // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 2 = 0 // If UseCompactObjectHeaders=true: iter % 2 = 1 // // byte: // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 8 = 0 // If UseCompactObjectHeaders=true: iter % 8 = 4 // // -> we cannot align both if UseCompactObjectHeaders=true. } return res2; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0", IRNode.LOAD_VECTOR_L, "> 0"}) public long[] multipleOpsWith3DifferentTypes() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; long[] res3 = new long[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] + b[i]; res3[i] = l1[i] + l2[i]; // We have a mix of int and short loads/stores. // With UseCompactObjectHeaders and AlignVector, // we must 8-byte align all vector loads/stores. // // int: // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 2 = 0 // If UseCompactObjectHeaders=true: iter % 2 = 1 // // byte: // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 8 = 0 // If UseCompactObjectHeaders=true: iter % 8 = 4 // // -> we cannot align both if UseCompactObjectHeaders=true. } return res3; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0", IRNode.LOAD_VECTOR_L, "> 0"}) public long[] multipleOpsWith2NonAdjacentTypes() { short[] res1 = new short[SIZE]; long[] res2 = new long[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = l1[i] + l2[i]; } return res2; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndConstant() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] + 88888888;; // We have a mix of int and short loads/stores. // With UseCompactObjectHeaders and AlignVector, // we must 8-byte align all vector loads/stores. // // int: // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 2 = 0 // If UseCompactObjectHeaders=true: iter % 2 = 1 // // byte: // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 8 = 0 // If UseCompactObjectHeaders=true: iter % 8 = 4 // // -> we cannot align both if UseCompactObjectHeaders=true. } return res2; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndInvariant() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] * intInv; // We have a mix of int and short loads/stores. // With UseCompactObjectHeaders and AlignVector, // we must 8-byte align all vector loads/stores. // // int: // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 2 = 0 // If UseCompactObjectHeaders=true: iter % 2 = 1 // // byte: // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 8 = 0 // If UseCompactObjectHeaders=true: iter % 8 = 4 // // -> we cannot align both if UseCompactObjectHeaders=true. } return res2; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndComplexExpression() { short[] res1 = new short[SIZE]; int[] res2 = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res1[i] = (short) (s1[i] + s2[i]); res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa); // same argument as in multipleOpsWith2DifferentTypesAndInvariant. } return res2; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"}, applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_S, "> 0", IRNode.LOAD_VECTOR_I, "> 0"}) public int[] multipleOpsWith2DifferentTypesAndSharedOp() { int i = 0, sum = 0; int[] res1 = new int[SIZE]; short[] res2 = new short[SIZE]; while (++i < SIZE) { sum += (res1[i]--); res2[i]++; // We have a mix of int and short loads/stores. // With UseCompactObjectHeaders and AlignVector, // we must 8-byte align all vector loads/stores. // // int: // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 2 = 0 // If UseCompactObjectHeaders=true: iter % 2 = 1 // // byte: // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: iter % 8 = 0 // If UseCompactObjectHeaders=true: iter % 8 = 4 // // -> we cannot align both if UseCompactObjectHeaders=true. } return res1; } @Test // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public int[] fillIndexPlusStride() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = i + 1; } return res; } @Test // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public int[] addArrayWithIndex() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = a[i] + i; } return res; } @Test // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public short[] multiplyAddShortIndex() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = (short) (i * i + i); } return res; } @Test // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. public int[] multiplyBySumOfIndexAndInvariant() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE; i++) { res[i] = a[i] * (i + 10 + intInv); } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, counts = {IRNode.STORE_VECTOR, ">0"}) public int[] manuallyUnrolledStride2() { int[] res = new int[SIZE]; for (int i = 0; i < SIZE - 1; i += 2) { res[i] = a[i] * b[i]; res[i + 1] = a[i + 1] * b[i + 1]; // Hand-unrolling can mess with alignment! // // With UseCompactObjectHeaders and AlignVector, // we must 8-byte align all vector loads/stores. // // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter // = 16 (or 12 if UseCompactObjectHeaders=true) // If UseCompactObjectHeaders=false: 16 divisible by 8 -> vectorize // If UseCompactObjectHeaders=true: 12 not divisibly by 8 -> not vectorize } return res; } @Test @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, counts = {IRNode.STORE_VECTOR, ">0", IRNode.LOAD_VECTOR_I, "> 0"}) public int partialVectorizableLoop() { int[] res = new int[SIZE]; int k = 9; for (int i = 0; i < SIZE / 2; i++) { res[i] = a[i] * b[i]; k = 3 * k + 1; } return k; } }