mirror of
https://github.com/openjdk/jdk.git
synced 2026-02-25 09:40:10 +00:00
8371603: C2: Missing Ideal optimizations for load and store vectors on SVE
Reviewed-by: epeter, erfang Backport-of: b6732d6048259de68a3dd5b4f66ac82f87270404
This commit is contained in:
parent
7dc8f786fe
commit
152a511de8
@ -346,8 +346,14 @@ source %{
|
||||
}
|
||||
|
||||
bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
|
||||
// Only SVE has partial vector operations
|
||||
if (UseSVE == 0) {
|
||||
// 1. Only SVE requires partial vector operations.
|
||||
// 2. The vector size in bytes must be smaller than MaxVectorSize.
|
||||
// 3. Predicated vectors have a mask input, which guarantees that
|
||||
// out-of-bounds lanes remain inactive.
|
||||
int length_in_bytes = vt->length_in_bytes();
|
||||
if (UseSVE == 0 ||
|
||||
length_in_bytes == MaxVectorSize ||
|
||||
node->is_predicated_vector()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -370,21 +376,22 @@ source %{
|
||||
return !node->in(1)->is_Con();
|
||||
case Op_LoadVector:
|
||||
case Op_StoreVector:
|
||||
// We use NEON load/store instructions if the vector length is <= 128 bits.
|
||||
return vt->length_in_bytes() > 16;
|
||||
case Op_AddReductionVI:
|
||||
case Op_AddReductionVL:
|
||||
// We may prefer using NEON instructions rather than SVE partial operations.
|
||||
return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
// For these ops, we prefer using NEON instructions rather than SVE
|
||||
// predicated instructions for better performance.
|
||||
return !VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
case Op_MinReductionV:
|
||||
case Op_MaxReductionV:
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
|
||||
// instructions rather than SVE partial operations.
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
|
||||
// instructions rather than SVE predicated instructions for
|
||||
// better performance.
|
||||
return vt->element_basic_type() == T_LONG ||
|
||||
!VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
!VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
default:
|
||||
// For other ops whose vector size is smaller than the max vector size, a
|
||||
// full-sized unpredicated operation does not impact the final vector result.
|
||||
// For other ops whose vector size is smaller than the max vector
|
||||
// size, a full-sized unpredicated operation does not impact the
|
||||
// vector result.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -336,8 +336,14 @@ source %{
|
||||
}
|
||||
|
||||
bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
|
||||
// Only SVE has partial vector operations
|
||||
if (UseSVE == 0) {
|
||||
// 1. Only SVE requires partial vector operations.
|
||||
// 2. The vector size in bytes must be smaller than MaxVectorSize.
|
||||
// 3. Predicated vectors have a mask input, which guarantees that
|
||||
// out-of-bounds lanes remain inactive.
|
||||
int length_in_bytes = vt->length_in_bytes();
|
||||
if (UseSVE == 0 ||
|
||||
length_in_bytes == MaxVectorSize ||
|
||||
node->is_predicated_vector()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -360,21 +366,22 @@ source %{
|
||||
return !node->in(1)->is_Con();
|
||||
case Op_LoadVector:
|
||||
case Op_StoreVector:
|
||||
// We use NEON load/store instructions if the vector length is <= 128 bits.
|
||||
return vt->length_in_bytes() > 16;
|
||||
case Op_AddReductionVI:
|
||||
case Op_AddReductionVL:
|
||||
// We may prefer using NEON instructions rather than SVE partial operations.
|
||||
return !VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
// For these ops, we prefer using NEON instructions rather than SVE
|
||||
// predicated instructions for better performance.
|
||||
return !VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
case Op_MinReductionV:
|
||||
case Op_MaxReductionV:
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we may prefer using NEON
|
||||
// instructions rather than SVE partial operations.
|
||||
// For BYTE/SHORT/INT/FLOAT/DOUBLE types, we prefer using NEON
|
||||
// instructions rather than SVE predicated instructions for
|
||||
// better performance.
|
||||
return vt->element_basic_type() == T_LONG ||
|
||||
!VM_Version::use_neon_for_vector(vt->length_in_bytes());
|
||||
!VM_Version::use_neon_for_vector(length_in_bytes);
|
||||
default:
|
||||
// For other ops whose vector size is smaller than the max vector size, a
|
||||
// full-sized unpredicated operation does not impact the final vector result.
|
||||
// For other ops whose vector size is smaller than the max vector
|
||||
// size, a full-sized unpredicated operation does not impact the
|
||||
// vector result.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -329,6 +329,10 @@ public:
|
||||
|
||||
static bool match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt);
|
||||
|
||||
// Determines if a vector operation needs to be partially implemented with a mask
|
||||
// controlling only the lanes in range [0, vector_length) are processed. This applies
|
||||
// to operations whose vector length is less than the hardware-supported maximum
|
||||
// vector length. Returns true if the operation requires masking, false otherwise.
|
||||
static bool vector_needs_partial_operations(Node* node, const TypeVect* vt);
|
||||
|
||||
static bool vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen);
|
||||
|
||||
@ -936,28 +936,26 @@ bool VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(int op
|
||||
}
|
||||
}
|
||||
|
||||
// Idealize vector operations whose vector size is less than the hardware supported
|
||||
// max vector size. Generate a vector mask for the operation. Lanes with indices
|
||||
// inside of the vector size are set to true, while the remaining lanes are set to
|
||||
// false. Returns the corresponding masked vector node.
|
||||
static Node* ideal_partial_operations(PhaseGVN* phase, Node* node, const TypeVect* vt) {
|
||||
if (!Matcher::vector_needs_partial_operations(node, vt)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* VectorNode::try_to_gen_masked_vector(PhaseGVN* gvn, Node* node, const TypeVect* vt) {
|
||||
int vopc = node->Opcode();
|
||||
uint vlen = vt->length();
|
||||
BasicType bt = vt->element_basic_type();
|
||||
assert(Matcher::match_rule_supported_vector_masked(vopc, vlen, bt),
|
||||
"The masked feature is required for the vector operation");
|
||||
assert(Matcher::match_rule_supported_vector(Op_VectorMaskGen, vlen, bt),
|
||||
"'VectorMaskGen' is required to generate a vector mask");
|
||||
|
||||
// Predicated vectors do not need to add another mask input
|
||||
if (node->is_predicated_vector() || !Matcher::has_predicated_vectors() ||
|
||||
!Matcher::match_rule_supported_vector_masked(vopc, vlen, bt) ||
|
||||
!Matcher::match_rule_supported_vector(Op_VectorMaskGen, vlen, bt)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* mask = nullptr;
|
||||
// Generate a vector mask for vector operation whose vector length is lower than the
|
||||
// hardware supported max vector length.
|
||||
if (vt->length_in_bytes() < (uint)MaxVectorSize) {
|
||||
Node* length = gvn->transform(new ConvI2LNode(gvn->makecon(TypeInt::make(vlen))));
|
||||
mask = gvn->transform(VectorMaskGenNode::make(length, bt, vlen));
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
// Generate a vector mask, with lanes inside of the vector length set to true.
|
||||
Node* length = phase->transform(new ConvI2LNode(phase->makecon(TypeInt::make(vlen))));
|
||||
Node* mask = phase->transform(VectorMaskGenNode::make(length, bt, vlen));
|
||||
|
||||
// Generate the related masked op for vector load/store/load_gather/store_scatter.
|
||||
// Or append the mask to the vector op's input list by default.
|
||||
@ -1037,8 +1035,9 @@ bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
|
||||
}
|
||||
|
||||
Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
if (Matcher::vector_needs_partial_operations(this, vect_type())) {
|
||||
return try_to_gen_masked_vector(phase, this, vect_type());
|
||||
Node* n = ideal_partial_operations(phase, this, vect_type());
|
||||
if (n != nullptr) {
|
||||
return n;
|
||||
}
|
||||
|
||||
// Sort inputs of commutative non-predicated vector operations to help value numbering.
|
||||
@ -1119,9 +1118,9 @@ LoadVectorNode* LoadVectorNode::make(int opc, Node* ctl, Node* mem,
|
||||
}
|
||||
|
||||
Node* LoadVectorNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
const TypeVect* vt = vect_type();
|
||||
if (Matcher::vector_needs_partial_operations(this, vt)) {
|
||||
return VectorNode::try_to_gen_masked_vector(phase, this, vt);
|
||||
Node* n = ideal_partial_operations(phase, this, vect_type());
|
||||
if (n != nullptr) {
|
||||
return n;
|
||||
}
|
||||
return LoadNode::Ideal(phase, can_reshape);
|
||||
}
|
||||
@ -1133,9 +1132,9 @@ StoreVectorNode* StoreVectorNode::make(int opc, Node* ctl, Node* mem, Node* adr,
|
||||
}
|
||||
|
||||
Node* StoreVectorNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
const TypeVect* vt = vect_type();
|
||||
if (Matcher::vector_needs_partial_operations(this, vt)) {
|
||||
return VectorNode::try_to_gen_masked_vector(phase, this, vt);
|
||||
Node* n = ideal_partial_operations(phase, this, vect_type());
|
||||
if (n != nullptr) {
|
||||
return n;
|
||||
}
|
||||
return StoreNode::Ideal(phase, can_reshape);
|
||||
}
|
||||
@ -1411,11 +1410,11 @@ ReductionNode* ReductionNode::make(int opc, Node* ctrl, Node* n1, Node* n2, Basi
|
||||
}
|
||||
|
||||
Node* ReductionNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
const TypeVect* vt = vect_type();
|
||||
if (Matcher::vector_needs_partial_operations(this, vt)) {
|
||||
return VectorNode::try_to_gen_masked_vector(phase, this, vt);
|
||||
Node* n = ideal_partial_operations(phase, this, vect_type());
|
||||
if (n != nullptr) {
|
||||
return n;
|
||||
}
|
||||
return nullptr;
|
||||
return Node::Ideal(phase, can_reshape);
|
||||
}
|
||||
|
||||
// Convert fromLong to maskAll if the input sets or unsets all lanes.
|
||||
@ -1893,11 +1892,11 @@ Node* VectorMaskOpNode::make(Node* mask, const Type* ty, int mopc) {
|
||||
}
|
||||
|
||||
Node* VectorMaskOpNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
const TypeVect* vt = vect_type();
|
||||
if (Matcher::vector_needs_partial_operations(this, vt)) {
|
||||
return VectorNode::try_to_gen_masked_vector(phase, this, vt);
|
||||
Node* n = ideal_partial_operations(phase, this, vect_type());
|
||||
if (n != nullptr) {
|
||||
return n;
|
||||
}
|
||||
return nullptr;
|
||||
return TypeNode::Ideal(phase, can_reshape);
|
||||
}
|
||||
|
||||
Node* VectorMaskCastNode::Identity(PhaseGVN* phase) {
|
||||
|
||||
@ -117,7 +117,6 @@ class VectorNode : public TypeNode {
|
||||
static bool is_vector_bitwise_not_pattern(Node* n);
|
||||
static Node* degenerate_vector_rotate(Node* n1, Node* n2, bool is_rotate_left, int vlen,
|
||||
BasicType bt, PhaseGVN* phase);
|
||||
static Node* try_to_gen_masked_vector(PhaseGVN* gvn, Node* node, const TypeVect* vt);
|
||||
|
||||
// [Start, end) half-open range defining which operands are vectors
|
||||
static void vector_operands(Node* n, uint* start, uint* end);
|
||||
|
||||
@ -1448,6 +1448,16 @@ public class IRNode {
|
||||
beforeMatchingNameRegex(VECTOR_MASK_LANE_IS_SET, "ExtractUB");
|
||||
}
|
||||
|
||||
public static final String VECTOR_MASK_GEN = PREFIX + "VECTOR_MASK_GEN" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(VECTOR_MASK_GEN, "VectorMaskGen");
|
||||
}
|
||||
|
||||
public static final String VECTOR_MASK_FIRST_TRUE = PREFIX + "VECTOR_MASK_FIRST_TRUE" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(VECTOR_MASK_FIRST_TRUE, "VectorMaskFirstTrue");
|
||||
}
|
||||
|
||||
// Can only be used if avx512_vnni is available.
|
||||
public static final String MUL_ADD_VS2VI_VNNI = PREFIX + "MUL_ADD_VS2VI_VNNI" + POSTFIX;
|
||||
static {
|
||||
|
||||
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.generators.*;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.test.lib.Asserts;
|
||||
|
||||
/**
|
||||
* @test 8371603
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @summary Test the missing optimization issues for vector load/store caused by JDK-8286941
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver ${test.main.class}
|
||||
*/
|
||||
public class TestVectorLoadStoreOptimization {
|
||||
private static final int LENGTH = 1024;
|
||||
private static final Generators random = Generators.G;
|
||||
|
||||
private static final VectorSpecies<Integer> SPECIES = IntVector.SPECIES_PREFERRED;
|
||||
|
||||
private static int[] a;
|
||||
|
||||
static {
|
||||
a = new int[LENGTH];
|
||||
random.fill(random.ints(), a);
|
||||
}
|
||||
|
||||
// Test that "LoadVectorNode::Ideal()" calls "LoadNode::Ideal()" as expected,
|
||||
// which sees the previous stores that go to the same position in-dependently,
|
||||
// and optimize out the load with matched store values.
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_I, "1" },
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "avx", "true", "rvv", "true"})
|
||||
public static void testLoadVector() {
|
||||
IntVector v1 = IntVector.fromArray(SPECIES, a, 0);
|
||||
v1.intoArray(a, SPECIES.length());
|
||||
v1.intoArray(a, 2 * SPECIES.length());
|
||||
// The second load vector equals to the first one and should be optimized
|
||||
// out by "LoadNode::Ideal()".
|
||||
IntVector v2 = IntVector.fromArray(SPECIES, a, SPECIES.length());
|
||||
v2.intoArray(a, 3 * SPECIES.length());
|
||||
}
|
||||
|
||||
@Check(test = "testLoadVector")
|
||||
public static void testLoadVectorVerify() {
|
||||
for (int i = SPECIES.length(); i < 4 * SPECIES.length(); i += SPECIES.length()) {
|
||||
for (int j = 0; j < SPECIES.length(); j++) {
|
||||
Asserts.assertEquals(a[i + j], a[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test that "StoreVectorNode::Ideal()" calls "StoreNode::Ideal()" as expected,
|
||||
// which can get rid of previous stores that go to the same position.
|
||||
@Test
|
||||
@IR(counts = { IRNode.STORE_VECTOR, "1" },
|
||||
applyIfCPUFeatureOr = {"asimd", "true", "avx", "true", "rvv", "true"})
|
||||
public static void testStoreVector() {
|
||||
IntVector v1 = IntVector.fromArray(SPECIES, a, 0 * SPECIES.length());
|
||||
IntVector v2 = IntVector.fromArray(SPECIES, a, 1 * SPECIES.length());
|
||||
// Useless store to same position as below, which should be optimized out by
|
||||
// "StoreNode::Ideal()".
|
||||
v1.intoArray(a, 3 * SPECIES.length());
|
||||
v2.intoArray(a, 3 * SPECIES.length());
|
||||
}
|
||||
|
||||
@Check(test = "testStoreVector")
|
||||
public static void testStoreVectorVerify() {
|
||||
for (int i = 3 * SPECIES.length(); i < 4 * SPECIES.length(); i++) {
|
||||
Asserts.assertEquals(a[i], a[i - 2 * SPECIES.length()]);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework testFramework = new TestFramework();
|
||||
testFramework.setDefaultWarmup(10000)
|
||||
.addFlags("--add-modules=jdk.incubator.vector")
|
||||
.start();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,432 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.generators.*;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.incubator.vector.*;
|
||||
import jdk.test.lib.Asserts;
|
||||
|
||||
/**
|
||||
* @test 8371603
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @summary Test vector operations with vector size less than MaxVectorSize
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver ${test.main.class}
|
||||
*/
|
||||
|
||||
public class TestVectorOperationsWithPartialSize {
|
||||
private static final int SIZE = 1024;
|
||||
private static final Generators random = Generators.G;
|
||||
|
||||
private static final VectorSpecies<Integer> ISPEC_128 = IntVector.SPECIES_128;
|
||||
private static final VectorSpecies<Long> LSPEC_128 = LongVector.SPECIES_128;
|
||||
private static final VectorSpecies<Float> FSPEC_128 = FloatVector.SPECIES_128;
|
||||
private static final VectorSpecies<Double> DSPEC_128 = DoubleVector.SPECIES_128;
|
||||
private static final VectorSpecies<Integer> ISPEC_256 = IntVector.SPECIES_256;
|
||||
private static final VectorSpecies<Long> LSPEC_256 = LongVector.SPECIES_256;
|
||||
|
||||
private static int[] ia;
|
||||
private static int[] ib;
|
||||
private static long[] la;
|
||||
private static long[] lb;
|
||||
private static float[] fa;
|
||||
private static float[] fb;
|
||||
private static double[] da;
|
||||
private static double[] db;
|
||||
private static boolean[] m;
|
||||
private static boolean[] mr;
|
||||
private static int[] indices;
|
||||
|
||||
static {
|
||||
ia = new int[SIZE];
|
||||
ib = new int[SIZE];
|
||||
la = new long[SIZE];
|
||||
lb = new long[SIZE];
|
||||
fa = new float[SIZE];
|
||||
fb = new float[SIZE];
|
||||
da = new double[SIZE];
|
||||
db = new double[SIZE];
|
||||
m = new boolean[SIZE];
|
||||
mr = new boolean[SIZE];
|
||||
indices = new int[SIZE];
|
||||
|
||||
random.fill(random.ints(), ia);
|
||||
random.fill(random.longs(), la);
|
||||
random.fill(random.floats(), fa);
|
||||
random.fill(random.doubles(), da);
|
||||
random.fill(random.uniformInts(0, ISPEC_128.length()), indices);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
m[i] = i % 2 == 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ================ Load/Store/Gather/Scatter Tests ==================
|
||||
|
||||
private static void verifyLoadStore(int[] expected, int[] actual, int vlen) {
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
Asserts.assertEquals(expected[i], actual[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void verifyLoadGatherStoreScatter(int[] expected, int[] actual, int[] indices, int vlen) {
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
Asserts.assertEquals(expected[indices[i]], actual[indices[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "0",
|
||||
IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "1",
|
||||
IRNode.STORE_VECTOR, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public void testLoadStore_128() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
v.intoArray(ib, 0);
|
||||
verifyLoadStore(ia, ib, ISPEC_128.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.LOAD_VECTOR_MASKED, "1",
|
||||
IRNode.STORE_VECTOR_MASKED, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"})
|
||||
public void testLoadStore_256() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_256, ia, 0);
|
||||
v.intoArray(ib, 0);
|
||||
verifyLoadStore(ia, ib, ISPEC_256.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.LOAD_VECTOR_GATHER_MASKED, "1",
|
||||
IRNode.STORE_VECTOR_SCATTER_MASKED, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public void testLoadGatherStoreScatter_128() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0, indices, 0);
|
||||
v.intoArray(ib, 0, indices, 0);
|
||||
verifyLoadGatherStoreScatter(ia, ib, indices, ISPEC_128.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.LOAD_VECTOR_GATHER_MASKED, "1",
|
||||
IRNode.STORE_VECTOR_SCATTER_MASKED, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"})
|
||||
public void testLoadGatherStoreScatter_256() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_256, ia, 0, indices, 0);
|
||||
v.intoArray(ib, 0, indices, 0);
|
||||
verifyLoadGatherStoreScatter(ia, ib, indices, ISPEC_256.length());
|
||||
}
|
||||
|
||||
// ===================== Reduction Tests - Add =====================
|
||||
|
||||
interface binOpInt {
|
||||
int apply(int a, int b);
|
||||
}
|
||||
|
||||
interface binOpLong {
|
||||
long apply(long a, long b);
|
||||
}
|
||||
|
||||
private static int reduceLanes(int init, int[] arr, int vlen, binOpInt f) {
|
||||
int result = init;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
result = f.apply(arr[i], result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static long reduceLanes(long init, long[] arr, int vlen,binOpLong f) {
|
||||
long result = init;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
result = f.apply(arr[i], result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Reduction add operations with integer types are implemented with NEON SIMD instructions
|
||||
// when the vector size is less than or equal to 128-bit.
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "0",
|
||||
IRNode.ADD_REDUCTION_VI, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public int testAddReductionInt_128() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.ADD);
|
||||
Asserts.assertEquals(reduceLanes(0, ia, ISPEC_128.length(), (a, b) -> (a + b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.ADD_REDUCTION_VI, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"})
|
||||
public int testAddReductionInt_256() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_256, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.ADD);
|
||||
Asserts.assertEquals(reduceLanes(0, ia, ISPEC_256.length(), (a, b) -> (a + b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Reduction add operations with long types are implemented with NEON SIMD instructions
|
||||
// when the vector size is less than or equal to 128-bit.
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "0",
|
||||
IRNode.ADD_REDUCTION_VL, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public long testAddReductionLong_128() {
|
||||
LongVector v = LongVector.fromArray(LSPEC_128, la, 0);
|
||||
long result = v.reduceLanes(VectorOperators.ADD);
|
||||
Asserts.assertEquals(reduceLanes(0L, la, LSPEC_128.length(), (a, b) -> (a + b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.ADD_REDUCTION_VL, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=64"})
|
||||
public long testAddReductionLong_256() {
|
||||
LongVector v = LongVector.fromArray(LSPEC_256, la, 0);
|
||||
long result = v.reduceLanes(VectorOperators.ADD);
|
||||
Asserts.assertEquals(reduceLanes(0L, la, LSPEC_256.length(), (a, b) -> (a + b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void verifyAddReductionFloat(float actual, float[] arr, int vlen) {
|
||||
float expected = 0.0f;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
expected += arr[i];
|
||||
}
|
||||
// Floating point addition reduction ops may introduce rounding errors.
|
||||
float ROUNDING_ERROR_FACTOR_ADD = 10.0f;
|
||||
float tolerance = Math.ulp(expected) * ROUNDING_ERROR_FACTOR_ADD;
|
||||
if (Math.abs(expected - actual) > tolerance) {
|
||||
throw new RuntimeException(
|
||||
"assertEqualsWithTolerance" +
|
||||
": expected " + expected + " but was " + actual +
|
||||
" (tolerance: " + tolerance + ", diff: " + Math.abs(expected - actual) + ")"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static void verifyAddReductionDouble(double actual, double[] arr, int vlen) {
|
||||
double expected = 0.0;
|
||||
for (int i = 0; i < vlen; i++) {
|
||||
expected += arr[i];
|
||||
}
|
||||
// Floating point addition reduction ops may introduce rounding errors.
|
||||
double ROUNDING_ERROR_FACTOR_ADD = 10.0;
|
||||
double tolerance = Math.ulp(expected) * ROUNDING_ERROR_FACTOR_ADD;
|
||||
if (Math.abs(expected - actual) > tolerance) {
|
||||
throw new RuntimeException(
|
||||
"assertEqualsWithTolerance" +
|
||||
": expected " + expected + " but was " + actual +
|
||||
" (tolerance: " + tolerance + ", diff: " + Math.abs(expected - actual) + ")"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.ADD_REDUCTION_VF, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public float testAddReductionFloat() {
|
||||
FloatVector v = FloatVector.fromArray(FSPEC_128, fa, 0);
|
||||
float result = v.reduceLanes(VectorOperators.ADD);
|
||||
verifyAddReductionFloat(result, fa, FSPEC_128.length());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.ADD_REDUCTION_VD, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public double testAddReductionDouble() {
|
||||
DoubleVector v = DoubleVector.fromArray(DSPEC_128, da, 0);
|
||||
double result = v.reduceLanes(VectorOperators.ADD);
|
||||
verifyAddReductionDouble(result, da, DSPEC_128.length());
|
||||
return result;
|
||||
}
|
||||
|
||||
// ============== Reduction Tests - Logical ==============
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.AND_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public int testAndReduction() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.AND);
|
||||
Asserts.assertEquals(reduceLanes(-1, ia, ISPEC_128.length(), (a, b) -> (a & b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.OR_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public int testOrReduction() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.OR);
|
||||
Asserts.assertEquals(reduceLanes(0, ia, ISPEC_128.length(), (a, b) -> (a | b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.XOR_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">=32"})
|
||||
public int testXorReduction() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.XOR);
|
||||
Asserts.assertEquals(reduceLanes(0, ia, ISPEC_128.length(), (a, b) -> (a ^ b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ===================== Reduction Tests - Min/Max =====================
|
||||
|
||||
// Reduction min operations with non-long types are implemented with NEON SIMD instructions
|
||||
// when the vector size is less than or equal to 128-bit.
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "0",
|
||||
IRNode.MIN_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"})
|
||||
public int testMinReductionInt_128() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.MIN);
|
||||
Asserts.assertEquals(reduceLanes(Integer.MAX_VALUE, ia, ISPEC_128.length(), (a, b) -> Math.min(a, b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.MIN_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 64"})
|
||||
public int testMinReductionInt_256() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_256, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.MIN);
|
||||
Asserts.assertEquals(reduceLanes(Integer.MAX_VALUE, ia, ISPEC_256.length(), (a, b) -> Math.min(a, b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Reduction max operations with non-long types are implemented with NEON SIMD instructions
|
||||
// when the vector size is less than or equal to 128-bit.
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "0",
|
||||
IRNode.MAX_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"})
|
||||
public int testMaxReductionInt_128() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.MAX);
|
||||
Asserts.assertEquals(reduceLanes(Integer.MIN_VALUE, ia, ISPEC_128.length(), (a, b) -> Math.max(a, b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.MAX_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 64"})
|
||||
public int testMaxReductionInt_256() {
|
||||
IntVector v = IntVector.fromArray(ISPEC_256, ia, 0);
|
||||
int result = v.reduceLanes(VectorOperators.MAX);
|
||||
Asserts.assertEquals(reduceLanes(Integer.MIN_VALUE, ia, ISPEC_256.length(), (a, b) -> Math.max(a, b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.MIN_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"})
|
||||
public static long testMinReductionLong() {
|
||||
LongVector v = LongVector.fromArray(LSPEC_128, la, 0);
|
||||
long result = v.reduceLanes(VectorOperators.MIN);
|
||||
Asserts.assertEquals(reduceLanes(Long.MAX_VALUE, la, LSPEC_128.length(), (a, b) -> Math.min(a, b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.MAX_REDUCTION_V, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"})
|
||||
public static long testMaxReductionLong() {
|
||||
LongVector v = LongVector.fromArray(LSPEC_128, la, 0);
|
||||
long result = v.reduceLanes(VectorOperators.MAX);
|
||||
Asserts.assertEquals(reduceLanes(Long.MIN_VALUE, la, LSPEC_128.length(), (a, b) -> Math.max(a, b)), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ====================== VectorMask Tests ======================
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.VECTOR_LOAD_MASK, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"})
|
||||
public static void testLoadMask() {
|
||||
VectorMask<Integer> vm = VectorMask.fromArray(ISPEC_128, m, 0);
|
||||
vm.not().intoArray(mr, 0);
|
||||
// Verify that the mask is loaded correctly.
|
||||
for (int i = 0; i < ISPEC_128.length(); i++) {
|
||||
Asserts.assertEquals(!m[i], mr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.VECTOR_MASK_CMP, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"})
|
||||
public static void testVectorMaskCmp() {
|
||||
IntVector v1 = IntVector.fromArray(ISPEC_128, ia, 0);
|
||||
IntVector v2 = IntVector.fromArray(ISPEC_128, ib, 0);
|
||||
VectorMask<Integer> vm = v1.compare(VectorOperators.LT, v2);
|
||||
vm.intoArray(mr, 0);
|
||||
// Verify that the mask is generated correctly.
|
||||
for (int i = 0; i < ISPEC_128.length(); i++) {
|
||||
Asserts.assertEquals(ia[i] < ib[i], mr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_MASK_GEN, "1",
|
||||
IRNode.VECTOR_MASK_FIRST_TRUE, "1"},
|
||||
applyIfCPUFeature = {"sve", "true"}, applyIf = {"MaxVectorSize", ">= 32"})
|
||||
public static int testFirstTrue() {
|
||||
VectorMask<Integer> vm = ISPEC_128.maskAll(false);
|
||||
int result = vm.firstTrue();
|
||||
// The result is the vector length if no lane is true.
|
||||
// This is the default behavior of the firstTrue method.
|
||||
Asserts.assertEquals(ISPEC_128.length(), result);
|
||||
return result;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework testFramework = new TestFramework();
|
||||
testFramework.setDefaultWarmup(10000)
|
||||
.addFlags("--add-modules=jdk.incubator.vector")
|
||||
.start();
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user