diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index 138eabb7553..ed6f042ade4 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -5512,6 +5512,30 @@ instruct vmask_truecount_sve(iRegINoSp dst, pReg src) %{ ins_pipe(pipe_slow); %} +// Combined rule for VectorMaskTrueCount (VectorStoreMask) when the vector element type is not T_BYTE. + +instruct vstoremask_truecount_neon(iRegINoSp dst, vReg src, immI_gt_1 size, vReg vtmp) %{ + match(Set dst (VectorMaskTrueCount (VectorStoreMask src size))); + effect(TEMP vtmp); + format %{ "vstoremask_truecount_neon $dst, $src\t# KILL $vtmp" %} + ins_encode %{ + // Input "src" is a vector mask represented as lanes with + // 0/-1 as element values. + uint esize = (uint)$size$$constant; + if (esize == 8) { + __ addpd($vtmp$$FloatRegister, $src$$FloatRegister); + } else { + uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); + Assembler::SIMD_Arrangement arrangement = Assembler::esize2arrangement(esize, + /* isQ */ length_in_bytes == 16); + __ addv($vtmp$$FloatRegister, arrangement, $src$$FloatRegister); + } + __ smov($dst$$Register, $vtmp$$FloatRegister, __ B, 0); + __ neg($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + // first true instruct vmask_firsttrue_lt8e(iRegINoSp dst, vReg src, rFlagsReg cr) %{ diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 1c91ed0d1c9..bca14a0a305 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -3822,6 +3822,30 @@ instruct vmask_truecount_sve(iRegINoSp dst, pReg src) %{ ins_pipe(pipe_slow); %} +// Combined rule for VectorMaskTrueCount (VectorStoreMask) when the vector element type is not T_BYTE. + +instruct vstoremask_truecount_neon(iRegINoSp dst, vReg src, immI_gt_1 size, vReg vtmp) %{ + match(Set dst (VectorMaskTrueCount (VectorStoreMask src size))); + effect(TEMP vtmp); + format %{ "vstoremask_truecount_neon $dst, $src\t# KILL $vtmp" %} + ins_encode %{ + // Input "src" is a vector mask represented as lanes with + // 0/-1 as element values. + uint esize = (uint)$size$$constant; + if (esize == 8) { + __ addpd($vtmp$$FloatRegister, $src$$FloatRegister); + } else { + uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); + Assembler::SIMD_Arrangement arrangement = Assembler::esize2arrangement(esize, + /* isQ */ length_in_bytes == 16); + __ addv($vtmp$$FloatRegister, arrangement, $src$$FloatRegister); + } + __ smov($dst$$Register, $vtmp$$FloatRegister, __ B, 0); + __ neg($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + // first true instruct vmask_firsttrue_lt8e(iRegINoSp dst, vReg src, rFlagsReg cr) %{ diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 69baddba601..be62d09ca91 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -1463,6 +1463,11 @@ public class IRNode { machOnlyNameRegex(VNOT_L_MASKED, "vnotL_masked"); } + public static final String VSTOREMASK_TRUECOUNT = PREFIX + "VSTOREMASK_TRUECOUNT" + POSTFIX; + static { + machOnlyNameRegex(VSTOREMASK_TRUECOUNT, "vstoremask_truecount_neon"); + } + public static final String XOR = PREFIX + "XOR" + POSTFIX; static { beforeMatchingNameRegex(XOR, "Xor(I|L)"); diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorMaskTrueCount.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorMaskTrueCount.java new file mode 100644 index 00000000000..7df965a6906 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorMaskTrueCount.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.vectorapi; + +import compiler.lib.ir_framework.*; +import java.util.Random; +import jdk.incubator.vector.*; +import jdk.test.lib.Asserts; +import jdk.test.lib.Utils; + +/** + * @test + * @bug 8307795 + * @key randomness + * @library /test/lib / + * @requires os.arch=="aarch64" + * @summary AArch64: Optimize VectorMask.truecount() on Neon + * @modules jdk.incubator.vector + * + * @run driver compiler.vectorapi.TestVectorMaskTrueCount + */ + +public class TestVectorMaskTrueCount { + private static final VectorSpecies SPECIES = DoubleVector.SPECIES_PREFERRED; + private static final int LENGTH = 1024; + private static final Random RD = new Random(); + private static boolean[] ba; + private static boolean[] bb; + + static { + ba = new boolean[LENGTH]; + bb = new boolean[LENGTH]; + for (int i = 0; i < LENGTH; i++) { + ba[i] = RD.nextBoolean(); + bb[i] = RD.nextBoolean(); + } + } + + static int maskAndTrueCount(boolean[] a, boolean[] b, int idx) { + int trueCount = 0; + boolean[] c = new boolean[SPECIES.length()]; + + for (int i = idx; i < idx + SPECIES.length(); i++) { + c[i - idx] = a[i] & b[i]; + } + + for (int i = 0; i < c.length; i++) { + trueCount += c[i] ? 1 : 0; + } + + return trueCount; + } + + static void assertArrayEquals(int[] r, boolean[] a, boolean[] b) { + for (int i = 0; i < a.length; i += SPECIES.length()) { + Asserts.assertEquals(r[i], maskAndTrueCount(a, b, i)); + } + } + + @Test + @IR(counts = { IRNode.VSTOREMASK_TRUECOUNT, ">= 1" }) + public static void test() { + int[] r = new int[LENGTH]; + for (int i = 0; i < LENGTH; i += SPECIES.length()) { + VectorMask ma = VectorMask.fromArray(SPECIES, ba, i); + VectorMask mb = VectorMask.fromArray(SPECIES, bb, i); + r[i] = ma.and(mb).trueCount(); + } + + assertArrayEquals(r, ba, bb); + } + + public static void main(String[] args) { + TestFramework testFramework = new TestFramework(); + testFramework.setDefaultWarmup(10000) + .addFlags("--add-modules=jdk.incubator.vector") + .addFlags("-XX:UseSVE=0") + .start(); + } +} \ No newline at end of file diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/StoreMaskTrueCount.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/StoreMaskTrueCount.java new file mode 100644 index 00000000000..8857af260b8 --- /dev/null +++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/StoreMaskTrueCount.java @@ -0,0 +1,81 @@ +// +// Copyright (c) 2023, Arm Limited. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// +package org.openjdk.bench.jdk.incubator.vector; + +import java.util.concurrent.TimeUnit; +import java.util.Random; +import jdk.incubator.vector.*; +import org.openjdk.jmh.annotations.*; + +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Thread) +@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) +public class StoreMaskTrueCount { + private static final VectorSpecies S_SPECIES = ShortVector.SPECIES_PREFERRED; + private static final VectorSpecies I_SPECIES = IntVector.SPECIES_PREFERRED; + private static final VectorSpecies L_SPECIES = LongVector.SPECIES_PREFERRED; + private static final int LENGTH = 128; + private static final Random RD = new Random(); + private static boolean[] ba; + + static { + ba = new boolean[LENGTH]; + for (int i = 0; i < LENGTH; i++) { + ba[i] = RD.nextBoolean(); + } + } + + @Benchmark + public static int testShort() { + int res = 0; + for (int i = 0; i < LENGTH; i += S_SPECIES.length()) { + VectorMask m = VectorMask.fromArray(S_SPECIES, ba, i); + res += m.not().trueCount(); + } + + return res; + } + + @Benchmark + public static int testInt() { + int res = 0; + for (int i = 0; i < LENGTH; i += I_SPECIES.length()) { + VectorMask m = VectorMask.fromArray(I_SPECIES, ba, i); + res += m.not().trueCount(); + } + + return res; + } + + @Benchmark + public static int testLong() { + int res = 0; + for (int i = 0; i < LENGTH; i += L_SPECIES.length()) { + VectorMask m = VectorMask.fromArray(L_SPECIES, ba, i); + res += m.not().trueCount(); + } + + return res; + } +} \ No newline at end of file