diff --git a/src/hotspot/share/opto/subnode.hpp b/src/hotspot/share/opto/subnode.hpp index 387c1c46ba9..29ec25b41f8 100644 --- a/src/hotspot/share/opto/subnode.hpp +++ b/src/hotspot/share/opto/subnode.hpp @@ -520,7 +520,12 @@ class SqrtDNode : public Node { public: SqrtDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) { init_flags(Flag_is_expensive); - C->add_expensive_node(this); + // Treat node only as expensive if a control input is set because it might + // be created from SqrtVDNode in VectorNode::push_through_replicate which + // does not have control input. + if (c != nullptr) { + C->add_expensive_node(this); + } } virtual int Opcode() const; const Type *bottom_type() const { return Type::DOUBLE; } diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 651a27af9c7..a54fe6e3a73 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -22,10 +22,12 @@ */ #include "memory/allocation.inline.hpp" +#include "opto/addnode.hpp" #include "opto/c2_globals.hpp" #include "opto/compile.hpp" #include "opto/connode.hpp" #include "opto/convertnode.hpp" +#include "opto/divnode.hpp" #include "opto/mulnode.hpp" #include "opto/subnode.hpp" #include "opto/vectornode.hpp" @@ -290,7 +292,146 @@ int VectorNode::opcode(int sopc, BasicType bt) { assert(!VectorNode::is_convert_opcode(sopc), "Convert node %s should be processed by VectorCastNode::opcode()", NodeClassNames[sopc]); - return 0; // Unimplemented + return 0; // not handled + } +} + +// Return the scalar opcode for the specified vector opcode and basic type. +// Returns 0 if not handled. +int VectorNode::scalar_opcode(int vopc, BasicType bt) { + switch (vopc) { + case Op_AddVB: + case Op_AddVS: + case Op_AddVI: + return Op_AddI; + case Op_AddVL: + return Op_AddL; + case Op_AddVF: + return Op_AddF; + case Op_AddVD: + return Op_AddD; + + case Op_SubVB: + case Op_SubVS: + case Op_SubVI: + return Op_SubI; + case Op_SubVL: + return Op_SubL; + case Op_SubVF: + return Op_SubF; + case Op_SubVD: + return Op_SubD; + + case Op_MulVB: + case Op_MulVS: + case Op_MulVI: + return Op_MulI; + case Op_MulVL: + return Op_MulL; + case Op_MulVF: + return Op_MulF; + case Op_MulVD: + return Op_MulD; + + case Op_DivVF: + return Op_DivF; + case Op_DivVD: + return Op_DivD; + + case Op_AndV: + switch (bt) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + return Op_AndI; + case T_LONG: + return Op_AndL; + default: + return 0; + } + + case Op_OrV: + switch (bt) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + return Op_OrI; + case T_LONG: + return Op_OrL; + default: + return 0; + } + + case Op_XorV: + switch (bt) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + return Op_XorI; + case T_LONG: + return Op_XorL; + default: + return 0; + } + + case Op_MinV: + switch (bt) { + case T_BOOLEAN: + case T_CHAR: + // unsigned, not supported for Min + return 0; + case T_BYTE: + case T_SHORT: + case T_INT: + return Op_MinI; + case T_LONG: + return Op_MinL; + case T_FLOAT: + return Op_MinF; + case T_DOUBLE: + return Op_MinD; + default: + return 0; + } + + case Op_MaxV: + switch (bt) { + case T_BOOLEAN: + case T_CHAR: + // unsigned, not supported for Max + return 0; + case T_BYTE: + case T_SHORT: + case T_INT: + return Op_MaxI; + case T_LONG: + return Op_MaxL; + case T_FLOAT: + return Op_MaxF; + case T_DOUBLE: + return Op_MaxD; + default: + return 0; + } + + case Op_SqrtVD: + return Op_SqrtD; + case Op_SqrtVF: + return Op_SqrtF; + + case Op_FmaVF: + return Op_FmaF; + case Op_FmaVD: + return Op_FmaD; + + default: + return 0; // not handled } } @@ -984,17 +1125,9 @@ static Node* ideal_partial_operations(PhaseGVN* phase, Node* node, const TypeVec } } -bool VectorNode::should_swap_inputs_to_help_global_value_numbering() { - // Predicated vector operations are sensitive to ordering of inputs. - // When the mask corresponding to a vector lane is false then - // the result of the operation is corresponding lane of its first operand. - // i.e. RES = VEC1.lanewise(OPER, VEC2, MASK) is semantically equivalent to - // RES = BLEND(VEC1, VEC1.lanewise(OPER, VEC2), MASK) - if (is_predicated_vector()) { - return false; - } - - switch(Opcode()) { +// Check if the vector operation is commutative (assuming that it is not predicated/masked). +static bool is_commutative_vector_operation(int opcode) { + switch(opcode) { case Op_AddVB: case Op_AddVS: case Op_AddVI: @@ -1022,18 +1155,228 @@ bool VectorNode::should_swap_inputs_to_help_global_value_numbering() { case Op_XorVMask: case Op_SaturatingAddV: - assert(req() == 3, "Must be a binary operation"); - // For non-predicated commutative operations, sort the inputs in - // increasing order of node indices. - if (in(1)->_idx > in(2)->_idx) { - return true; - } - // fallthrough + return true; default: return false; } } +bool VectorNode::should_swap_inputs_to_help_global_value_numbering() { + // Predicated vector operations are sensitive to ordering of inputs. + // When the mask corresponding to a vector lane is false then + // the result of the operation is corresponding lane of its first operand. + // i.e. RES = VEC1.lanewise(OPER, VEC2, MASK) is semantically equivalent to + // RES = BLEND(VEC1, VEC1.lanewise(OPER, VEC2), MASK) + if (is_predicated_vector()) { + return false; + } + + if (is_commutative_vector_operation(Opcode())) { + assert(req() == 3, "Must be a binary operation"); + // For non-predicated commutative operations, sort the inputs in + // increasing order of node indices. + if (in(1)->_idx > in(2)->_idx) { + return true; + } + } + + return false; +} + +// Check whether we can push this vector op through replicate (all inputs are Replicate). +bool VectorNode::can_push_through_replicate(BasicType bt) { + if (scalar_opcode(Opcode(), bt) == 0) { + return false; + } + + // Skip over predicated vector operations for now, for masked lanes we preserve + // destination/first source vector contents. + if (is_predicated_vector()) { + return false; + } + + for (uint i = 1; i < req(); i++) { + if (in(i)->Opcode() != Op_Replicate) { + return false; + } + } + return true; +} + +Node* VectorNode::make_scalar(Compile* c, int vopc, BasicType bt, Node* control, Node* in1, Node* in2, Node* in3) { + int sopc = scalar_opcode(vopc, bt); + assert(sopc != 0, "unhandled vector opcode %s", NodeClassNames[vopc]); + assert(opcode(sopc, bt) == vopc, "scalar_opcode and opcode must agree for %s", NodeClassNames[vopc]); + switch (sopc) { + case Op_AddI: + return new AddINode(in1, in2); + case Op_AddL: + return new AddLNode(in1, in2); + case Op_AddF: + return new AddFNode(in1, in2); + case Op_AddD: + return new AddDNode(in1, in2); + case Op_MulI: + return new MulINode(in1, in2); + case Op_MulL: + return new MulLNode(in1, in2); + case Op_MulF: + return new MulFNode(in1, in2); + case Op_MulD: + return new MulDNode(in1, in2); + case Op_AndI: + return new AndINode(in1, in2); + case Op_AndL: + return new AndLNode(in1, in2); + case Op_DivF: + return new DivFNode(control, in1, in2); + case Op_DivD: + return new DivDNode(control, in1, in2); + case Op_OrI: + return new OrINode(in1, in2); + case Op_OrL: + return new OrLNode(in1, in2); + case Op_XorI: + return new XorINode(in1, in2); + case Op_XorL: + return new XorLNode(in1, in2); + case Op_SubI: + return new SubINode(in1, in2); + case Op_SubL: + return new SubLNode(in1, in2); + case Op_SubF: + return new SubFNode(in1, in2); + case Op_SubD: + return new SubDNode(in1, in2); + case Op_MinI: + return new MinINode(in1, in2); + case Op_MinL: + return new MinLNode(c, in1, in2); + case Op_MinF: + return new MinFNode(in1, in2); + case Op_MinD: + return new MinDNode(in1, in2); + case Op_MaxI: + return new MaxINode(in1, in2); + case Op_MaxL: + return new MaxLNode(c, in1, in2); + case Op_MaxF: + return new MaxFNode(in1, in2); + case Op_MaxD: + return new MaxDNode(in1, in2); + case Op_SqrtF: + return new SqrtFNode(c, control, in1); + case Op_SqrtD: + return new SqrtDNode(c, control, in1); + case Op_FmaF: + return new FmaFNode(in1, in2, in3); + case Op_FmaD: + return new FmaDNode(in1, in2, in3); + default: + assert(false, "unexpected scalar opcode"); + return nullptr; + } +} + +// Re-wires and creates a new ideal graph pallet with following connectivity +// parent(child(cinput1, cinput2), pinput2) +Node* VectorNode::create_reassociated_node(Node* parent, Node* child, Node* cinput1, Node* cinput2, + Node* pinput2, PhaseGVN* phase) { + Node* cloned_child = child->clone(); + cloned_child->set_req(1, cinput1); + cloned_child->set_req(2, cinput2); + cloned_child = phase->transform(cloned_child); + Node* cloned_parent = parent->clone(); + cloned_parent->set_req(1, cloned_child); + cloned_parent->set_req(2, pinput2); + return cloned_parent; +} + +// Try to reassociate commutative vector operations using the following ideal transformation, +// this will facilitate strength reducing a vector operation with all replicated inputs to +// a scalar operation. +// +// VectorOp (Replicate INP1) (VectorOp (Replicate INP2) INP3) => +// VectorOp (VectorOp (Replicate INP1) (Replicate INP2)) INP3 +// +Node* VectorNode::reassociate_vector_operation(PhaseGVN* phase) { + // Enable re-association for integral vector operations. + if (!is_integral_type(vect_type()->element_basic_type())) { + return nullptr; + } + + // Enable re-association for commutative vector operations. + if (!is_commutative_vector_operation(Opcode())) { + return nullptr; + } + + Node* in1 = in(1); + Node* in2 = in(2); + if (in2->Opcode() == Op_Replicate && in1->Opcode() == Opcode()) { + swap(in1, in2); + } + + if (in1->Opcode() != Op_Replicate || in2->Opcode() != Opcode()) { + return nullptr; + } + + // Skip predicated vector operations, mask semantics prevent reassociation. + if (is_predicated_vector() || in2->as_Vector()->is_predicated_vector()) { + return nullptr; + } + + Node* in2_1 = in2->in(1); + Node* in2_2 = in2->in(2); + if (in2_1->Opcode() == Op_Replicate) { + return create_reassociated_node(this, in2, in1, in2_1, in2_2, phase); + } else if (in2_2->Opcode() == Op_Replicate) { + return create_reassociated_node(this, in2, in1, in2_2, in2_1, phase); + } + + return nullptr; +} + +// Convert vector operation with all Replicate inputs to scalar operation using following +// ideal transformation. +// +// VectorOp (Replicate INP1, Replicate INP2) => +// Replicate (ScalarOp INP1, INP2) +// +Node* VectorNode::push_through_replicate(PhaseGVN* phase) { + BasicType bt = vect_type()->element_basic_type(); + if (!can_push_through_replicate(bt)) { + return nullptr; + } + + assert(req() >= 2 && req() <= 4, "unexpected req() %u for %s", req(), NodeClassNames[Opcode()]); + + Node* sinp1 = nullptr; + Node* sinp2 = nullptr; + Node* sinp3 = nullptr; + + assert(in(1)->Opcode() == Op_Replicate, ""); + sinp1 = in(1)->in(1); + + if (req() > 2) { + assert(in(2)->Opcode() == Op_Replicate, ""); + sinp2 = in(2)->in(1); + } + + if (req() > 3) { + assert(in(3)->Opcode() == Op_Replicate, ""); + sinp3 = in(3)->in(1); + } + + Node* sop = make_scalar(phase->C, Opcode(), bt, in(0), sinp1, sinp2, sinp3); + if (sop == nullptr) { + return nullptr; + } + + sop = phase->transform(sop); + + return new ReplicateNode(sop, vect_type()); +} + Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) { Node* n = ideal_partial_operations(phase, this, vect_type()); if (n != nullptr) { @@ -1044,7 +1387,13 @@ Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) { if (should_swap_inputs_to_help_global_value_numbering()) { swap_edges(1, 2); } - return nullptr; + + n = push_through_replicate(phase); + if (n != nullptr) { + return n; + } + + return reassociate_vector_operation(phase); } // Traverses a chain of VectorMaskCast and returns the first non VectorMaskCast node. @@ -2094,7 +2443,7 @@ Node* FmaVNode::Ideal(PhaseGVN* phase, bool can_reshape) { swap_edges(1, 2); return this; } - return nullptr; + return VectorNode::Ideal(phase, can_reshape); } // Generate other vector nodes to implement the masked/non-masked vector negation. diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 897cedd6a1b..6bcb7702d13 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -146,12 +146,20 @@ class VectorNode : public TypeNode { static bool is_minmax_opcode(int opc); bool should_swap_inputs_to_help_global_value_numbering(); + Node* reassociate_vector_operation(PhaseGVN* phase); + static Node* create_reassociated_node(Node* parent, Node* child, Node* cinput1, Node* cinput2, + Node* pinput2, PhaseGVN* phase); static bool is_vshift_cnt_opcode(int opc); static bool is_rotate_opcode(int opc); static int opcode(int sopc, BasicType bt); // scalar_opc -> vector_opc + static int scalar_opcode(int vopc, BasicType bt); // vector_opc -> scalar_opc, 0 if not handled + static Node* make_scalar(Compile* c, int vopc, BasicType bt, Node* control, Node* in1, Node* in2, Node* in3); + + bool can_push_through_replicate(BasicType bt); + Node* push_through_replicate(PhaseGVN* phase); static int shift_count_opcode(int opc); diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 55d591acdb3..4f7869f444a 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -233,6 +233,11 @@ public class IRNode { beforeMatchingNameRegex(ADD_P, "AddP"); } + public static final String ADD_D = PREFIX + "ADD_D" + POSTFIX; + static { + beforeMatchingNameRegex(ADD_D, "AddD"); + } + public static final String ADD_VD = VECTOR_PREFIX + "ADD_VD" + POSTFIX; static { vectorNode(ADD_VD, "AddVD", TYPE_DOUBLE); @@ -763,11 +768,21 @@ public class IRNode { vectorNode(DIV_VHF, "DivVHF", TYPE_SHORT); } + public static final String DIV_F = PREFIX + "DIV_F" + POSTFIX; + static { + beforeMatchingNameRegex(DIV_F, "DivF"); + } + public static final String DIV_VF = VECTOR_PREFIX + "DIV_VF" + POSTFIX; static { vectorNode(DIV_VF, "DivVF", TYPE_FLOAT); } + public static final String DIV_D = PREFIX + "DIV_D" + POSTFIX; + static { + beforeMatchingNameRegex(DIV_D, "DivD"); + } + public static final String DIV_VD = VECTOR_PREFIX + "DIV_VD" + POSTFIX; static { vectorNode(DIV_VD, "DivVD", TYPE_DOUBLE); diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorBroadcastTransforms.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorBroadcastTransforms.java new file mode 100644 index 00000000000..c58a6710c86 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorBroadcastTransforms.java @@ -0,0 +1,1100 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8358521 + * @summary Optimize vector operations by reassociating broadcasted inputs + * @modules jdk.incubator.vector + * @library /test/lib / + * @run driver compiler.vectorapi.TestVectorBroadcastTransforms + */ + +package compiler.vectorapi; + +import compiler.lib.ir_framework.*; +import compiler.lib.verify.Verify; +import jdk.incubator.vector.*; + +import jdk.test.lib.Utils; +import java.util.Random; + +public class TestVectorBroadcastTransforms { + + public static void main(String[] args) { + TestFramework.runWithFlags("--add-modules=jdk.incubator.vector"); + } + + private static final Random R = Utils.getRandomInstance(); + + /* ======================= + * INT + * ======================= */ + + static final VectorSpecies ISP = IntVector.SPECIES_PREFERRED; + + @Test + @IR(failOn = IRNode.ADD_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_add(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .add(IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_add") + static void run_int_add() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_add(ia, ib); + Verify.checkEQ(ir, ia + ib); + } + + @Test + @IR(failOn = IRNode.SUB_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_sub(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .sub(IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_sub") + static void run_int_sub() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_sub(ia, ib); + Verify.checkEQ(ir, ia - ib); + } + + @Test + @IR(failOn = IRNode.MUL_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_mul(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .mul(IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_mul") + static void run_int_mul() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_mul(ia, ib); + Verify.checkEQ(ir, ia * ib); + } + + @Test + @IR(failOn = IRNode.AND_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.AND_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_and(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .and(IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_and") + static void run_int_and() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_and(ia, ib); + Verify.checkEQ(ir, ia & ib); + } + + @Test + @IR(failOn = IRNode.OR_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.OR_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_or(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .or(IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_or") + static void run_int_or() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_or(ia, ib); + Verify.checkEQ(ir, ia | ib); + } + + @Test + @IR(failOn = IRNode.XOR_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.XOR_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_xor(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .lanewise(VectorOperators.XOR, IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_xor") + static void run_int_xor() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_xor(ia, ib); + Verify.checkEQ(ir, ia ^ ib); + } + + @Test + @IR(failOn = IRNode.MIN_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MIN_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_min(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .min(IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_min") + static void run_int_min() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_min(ia, ib); + Verify.checkEQ(ir, Math.min(ia, ib)); + } + + @Test + @IR(failOn = IRNode.MAX_VI, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MAX_I, ">= 1", IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static int int_max(int ia, int ib) { + return IntVector.broadcast(ISP, ia) + .max(IntVector.broadcast(ISP, ib)) + .lane(0); + } + + @Run(test = "int_max") + static void run_int_max() { + int ia = R.nextInt(); + int ib = R.nextInt(); + int ir = int_max(ia, ib); + Verify.checkEQ(ir, Math.max(ia, ib)); + } + + /* ======================= + * LONG + * ======================= */ + + static final VectorSpecies LSP = LongVector.SPECIES_PREFERRED; + + @Test + @IR(failOn = IRNode.ADD_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_L, ">= 1", IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_add(long la, long lb) { + return LongVector.broadcast(LSP, la) + .add(LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_add") + static void run_long_add() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_add(la, lb); + Verify.checkEQ(lr, la + lb); + } + + @Test + @IR(failOn = IRNode.SUB_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_L, ">= 1", IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_sub(long la, long lb) { + return LongVector.broadcast(LSP, la) + .sub(LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_sub") + static void run_long_sub() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_sub(la, lb); + Verify.checkEQ(lr, la - lb); + } + + @Test + @IR(failOn = IRNode.MUL_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_L, ">= 1", IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_mul(long la, long lb) { + return LongVector.broadcast(LSP, la) + .mul(LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_mul") + static void run_long_mul() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_mul(la, lb); + Verify.checkEQ(lr, la * lb); + } + + @Test + @IR(failOn = IRNode.AND_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.AND_L, ">= 1", IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_and(long la, long lb) { + return LongVector.broadcast(LSP, la) + .and(LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_and") + static void run_long_and() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_and(la, lb); + Verify.checkEQ(lr, la & lb); + } + + @Test + @IR(failOn = IRNode.OR_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.OR_L, ">= 1", IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_or(long la, long lb) { + return LongVector.broadcast(LSP, la) + .or(LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_or") + static void run_long_or() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_or(la, lb); + Verify.checkEQ(lr, la | lb); + } + + @Test + @IR(failOn = IRNode.XOR_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.XOR_L, ">= 1", IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_xor(long la, long lb) { + return LongVector.broadcast(LSP, la) + .lanewise(VectorOperators.XOR, LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_xor") + static void run_long_xor() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_xor(la, lb); + Verify.checkEQ(lr, la ^ lb); + } + + @Test + @IR(failOn = IRNode.MIN_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = {IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_min(long la, long lb) { + return LongVector.broadcast(LSP, la) + .min(LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_min") + static void run_long_min() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_min(la, lb); + Verify.checkEQ(lr, Math.min(la, lb)); + } + + @Test + @IR(failOn = IRNode.MAX_VL, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = {IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static long long_max(long la, long lb) { + return LongVector.broadcast(LSP, la) + .max(LongVector.broadcast(LSP, lb)) + .lane(0); + } + + @Run(test = "long_max") + static void run_long_max() { + long la = R.nextLong(); + long lb = R.nextLong(); + long lr = long_max(la, lb); + Verify.checkEQ(lr, Math.max(la, lb)); + } + + /* ======================= + * FLOAT + * ======================= */ + + static final VectorSpecies FSP = FloatVector.SPECIES_PREFERRED; + + @Test + @IR(failOn = IRNode.ADD_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_add(float fa, float fb) { + return FloatVector.broadcast(FSP, fa) + .add(FloatVector.broadcast(FSP, fb)) + .lane(0); + } + + @Run(test = "float_add") + static void run_float_add() { + float fa = R.nextFloat(); + float fb = R.nextFloat(); + float fr = float_add(fa, fb); + Verify.checkEQ(fr, fa + fb); + } + + @Test + @IR(failOn = IRNode.SUB_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_sub(float fa, float fb) { + return FloatVector.broadcast(FSP, fa) + .sub(FloatVector.broadcast(FSP, fb)) + .lane(0); + } + + @Run(test = "float_sub") + static void run_float_sub() { + float fa = R.nextFloat(); + float fb = R.nextFloat(); + float fr = float_sub(fa, fb); + Verify.checkEQ(fr, fa - fb); + } + + @Test + @IR(failOn = IRNode.MUL_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_mul(float fa, float fb) { + return FloatVector.broadcast(FSP, fa) + .mul(FloatVector.broadcast(FSP, fb)) + .lane(0); + } + + @Run(test = "float_mul") + static void run_float_mul() { + float fa = R.nextFloat(); + float fb = R.nextFloat(); + float fr = float_mul(fa, fb); + Verify.checkEQ(fr, fa * fb); + } + + @Test + @IR(failOn = IRNode.DIV_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.DIV_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_div(float fa, float fb) { + return FloatVector.broadcast(FSP, fa) + .div(FloatVector.broadcast(FSP, fb)) + .lane(0); + } + + @Run(test = "float_div") + static void run_float_div() { + float fa = R.nextFloat(); + float fb = R.nextFloat(); + if (fb == 0f) fb = 1f; + float fr = float_div(fa, fb); + Verify.checkEQ(fr, fa / fb); + } + + @Test + @IR(failOn = IRNode.MIN_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MIN_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_min(float fa, float fb) { + return FloatVector.broadcast(FSP, fa) + .min(FloatVector.broadcast(FSP, fb)) + .lane(0); + } + + @Run(test = "float_min") + static void run_float_min() { + float fa = R.nextFloat(); + float fb = R.nextFloat(); + float fr = float_min(fa, fb); + Verify.checkEQ(fr, Math.min(fa, fb)); + } + + @Test + @IR(failOn = IRNode.MAX_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MAX_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_max(float fa, float fb) { + return FloatVector.broadcast(FSP, fa) + .max(FloatVector.broadcast(FSP, fb)) + .lane(0); + } + + @Run(test = "float_max") + static void run_float_max() { + float fa = R.nextFloat(); + float fb = R.nextFloat(); + float fr = float_max(fa, fb); + Verify.checkEQ(fr, Math.max(fa, fb)); + } + + @Test + @IR(failOn = IRNode.SQRT_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SQRT_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_sqrt(float fa) { + return FloatVector.broadcast(FSP, fa) + .sqrt() + .lane(0); + } + + @Run(test = "float_sqrt") + static void run_float_sqrt() { + float fa = Math.abs(R.nextFloat()) + Float.MIN_VALUE; + float fr = float_sqrt(fa); + Verify.checkEQ(fr, (float) Math.sqrt(fa)); + } + + @Test + @IR(failOn = IRNode.FMA_VF, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.FMA_F, ">= 1", IRNode.REPLICATE_F, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static float float_fma(float fa, float fb, float fc) { + return FloatVector.broadcast(FSP, fa) + .fma(FloatVector.broadcast(FSP, fb), + FloatVector.broadcast(FSP, fc)) + .lane(0); + } + + @Run(test = "float_fma") + static void run_float_fma() { + float fa = R.nextFloat(); + float fb = R.nextFloat(); + float fc = R.nextFloat(); + float fr = float_fma(fa, fb, fc); + Verify.checkEQ(fr, Math.fma(fa, fb, fc)); + } + + /* ======================= + * DOUBLE + * ======================= */ + + static final VectorSpecies DSP = DoubleVector.SPECIES_PREFERRED; + + @Test + @IR(failOn = IRNode.ADD_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_add(double da, double db) { + return DoubleVector.broadcast(DSP, da) + .add(DoubleVector.broadcast(DSP, db)) + .lane(0); + } + + @Run(test = "double_add") + static void run_double_add() { + double da = R.nextDouble(); + double db = R.nextDouble(); + double dr = double_add(da, db); + Verify.checkEQ(dr, da + db); + } + + @Test + @IR(failOn = IRNode.SUB_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_sub(double da, double db) { + return DoubleVector.broadcast(DSP, da) + .sub(DoubleVector.broadcast(DSP, db)) + .lane(0); + } + + @Run(test = "double_sub") + static void run_double_sub() { + double da = R.nextDouble(); + double db = R.nextDouble(); + double dr = double_sub(da, db); + Verify.checkEQ(dr, da - db); + } + + @Test + @IR(failOn = IRNode.MUL_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_mul(double da, double db) { + return DoubleVector.broadcast(DSP, da) + .mul(DoubleVector.broadcast(DSP, db)) + .lane(0); + } + + @Run(test = "double_mul") + static void run_double_mul() { + double da = R.nextDouble(); + double db = R.nextDouble(); + double dr = double_mul(da, db); + Verify.checkEQ(dr, da * db); + } + + @Test + @IR(failOn = IRNode.DIV_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.DIV_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_div(double da, double db) { + return DoubleVector.broadcast(DSP, da) + .div(DoubleVector.broadcast(DSP, db)) + .lane(0); + } + + @Run(test = "double_div") + static void run_double_div() { + double da = R.nextDouble(); + double db = R.nextDouble(); + if (db == 0d) db = 1d; + double dr = double_div(da, db); + Verify.checkEQ(dr, da / db); + } + + @Test + @IR(failOn = IRNode.MIN_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MIN_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_min(double da, double db) { + return DoubleVector.broadcast(DSP, da) + .min(DoubleVector.broadcast(DSP, db)) + .lane(0); + } + + @Run(test = "double_min") + static void run_double_min() { + double da = R.nextDouble(); + double db = R.nextDouble(); + double dr = double_min(da, db); + Verify.checkEQ(dr, Math.min(da, db)); + } + + @Test + @IR(failOn = IRNode.MAX_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MAX_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_max(double da, double db) { + return DoubleVector.broadcast(DSP, da) + .max(DoubleVector.broadcast(DSP, db)) + .lane(0); + } + + @Run(test = "double_max") + static void run_double_max() { + double da = R.nextDouble(); + double db = R.nextDouble(); + double dr = double_max(da, db); + Verify.checkEQ(dr, Math.max(da, db)); + } + + @Test + @IR(failOn = IRNode.SQRT_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SQRT_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_sqrt(double da) { + return DoubleVector.broadcast(DSP, da) + .sqrt() + .lane(0); + } + + @Run(test = "double_sqrt") + static void run_double_sqrt() { + double da = Math.abs(R.nextDouble()) + Double.MIN_VALUE; + double dr = double_sqrt(da); + Verify.checkEQ(dr, Math.sqrt(da)); + } + + @Test + @IR(failOn = IRNode.FMA_VD, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.FMA_D, ">= 1", IRNode.REPLICATE_D, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static double double_fma(double da, double db, double dc) { + return DoubleVector.broadcast(DSP, da) + .fma(DoubleVector.broadcast(DSP, db), + DoubleVector.broadcast(DSP, dc)) + .lane(0); + } + + @Run(test = "double_fma") + static void run_double_fma() { + double da = R.nextDouble(); + double db = R.nextDouble(); + double dc = R.nextDouble(); + double dr = double_fma(da, db, dc); + Verify.checkEQ(dr, Math.fma(da, db, dc)); + } + + /* ======================= + * BYTE + * ======================= */ + + static final VectorSpecies BSP = ByteVector.SPECIES_PREFERRED; + static byte B_MAX = Byte.MAX_VALUE, B_MIN = Byte.MIN_VALUE; + static byte B_ONE = (byte) 1, B_NEG_ONE = (byte) -1; + + @Test + @IR(failOn = IRNode.ADD_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_add(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .add(ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_add") + static void run_byte_add() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_add(ba, bb); + Verify.checkEQ(br, (byte) (ba + bb)); + } + + @Test + @IR(failOn = IRNode.SUB_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_sub(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .sub(ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_sub") + static void run_byte_sub() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_sub(ba, bb); + Verify.checkEQ(br, (byte) (ba - bb)); + } + + @Test + @IR(failOn = IRNode.ADD_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_add_overflow() { + return ByteVector.broadcast(BSP, B_MAX) + .add(ByteVector.broadcast(BSP, B_ONE)) + .lane(0); + } + + @Run(test = "byte_add_overflow") + static void run_byte_add_overflow() { + byte br = byte_add_overflow(); + Verify.checkEQ(br, (byte) (B_MAX + B_ONE)); + } + + @Test + @IR(failOn = IRNode.ADD_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_add_underflow() { + return ByteVector.broadcast(BSP, B_MIN) + .add(ByteVector.broadcast(BSP, B_NEG_ONE)) + .lane(0); + } + + @Run(test = "byte_add_underflow") + static void run_byte_add_underflow() { + byte br = byte_add_underflow(); + Verify.checkEQ(br, (byte) (B_MIN + B_NEG_ONE)); + } + + @Test + @IR(failOn = IRNode.SUB_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_sub_overflow() { + return ByteVector.broadcast(BSP, B_MAX) + .sub(ByteVector.broadcast(BSP, B_NEG_ONE)) + .lane(0); + } + + @Run(test = "byte_sub_overflow") + static void run_byte_sub_overflow() { + byte br = byte_sub_overflow(); + Verify.checkEQ(br, (byte) (B_MAX - B_NEG_ONE)); + } + + @Test + @IR(failOn = IRNode.SUB_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_sub_underflow() { + return ByteVector.broadcast(BSP, B_MIN) + .sub(ByteVector.broadcast(BSP, B_ONE)) + .lane(0); + } + + @Run(test = "byte_sub_underflow") + static void run_byte_sub_underflow() { + byte br = byte_sub_underflow(); + Verify.checkEQ(br, (byte) (B_MIN - B_ONE)); + } + + @Test + @IR(failOn = IRNode.MUL_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_mul(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .mul(ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_mul") + static void run_byte_mul() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_mul(ba, bb); + Verify.checkEQ(br, (byte) (ba * bb)); + } + + @Test + @IR(failOn = IRNode.AND_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.AND_I, ">= 1", IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_and(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .and(ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_and") + static void run_byte_and() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_and(ba, bb); + Verify.checkEQ(br, (byte) (ba & bb)); + } + + @Test + @IR(failOn = IRNode.OR_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.OR_I, ">= 1", IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_or(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .or(ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_or") + static void run_byte_or() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_or(ba, bb); + Verify.checkEQ(br, (byte) (ba | bb)); + } + + @Test + @IR(failOn = IRNode.XOR_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.XOR_I, ">= 1", IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_xor(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .lanewise(VectorOperators.XOR, ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_xor") + static void run_byte_xor() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_xor(ba, bb); + Verify.checkEQ(br, (byte) (ba ^ bb)); + } + + @Test + @IR(failOn = IRNode.MIN_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MIN_I, ">= 1", IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_min(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .min(ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_min") + static void run_byte_min() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_min(ba, bb); + Verify.checkEQ(br, (byte) Math.min(ba, bb)); + } + + @Test + @IR(failOn = IRNode.MAX_VB, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MAX_I, ">= 1", IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static byte byte_max(byte ba, byte bb) { + return ByteVector.broadcast(BSP, ba) + .max(ByteVector.broadcast(BSP, bb)) + .lane(0); + } + + @Run(test = "byte_max") + static void run_byte_max() { + byte ba = (byte) R.nextInt(); + byte bb = (byte) R.nextInt(); + byte br = byte_max(ba, bb); + Verify.checkEQ(br, (byte) Math.max(ba, bb)); + } + + /* ======================= + * SHORT + * ======================= */ + + static final VectorSpecies SSP = ShortVector.SPECIES_PREFERRED; + static short S_MAX = Short.MAX_VALUE, S_MIN = Short.MIN_VALUE; + static short S_ONE = (short) 1, S_NEG_ONE = (short) -1; + + @Test + @IR(failOn = IRNode.ADD_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_add(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .add(ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_add") + static void run_short_add() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_add(sa, sb); + Verify.checkEQ(sr, (short) (sa + sb)); + } + + @Test + @IR(failOn = IRNode.SUB_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_sub(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .sub(ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_sub") + static void run_short_sub() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_sub(sa, sb); + Verify.checkEQ(sr, (short) (sa - sb)); + } + + @Test + @IR(failOn = IRNode.ADD_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_add_overflow() { + return ShortVector.broadcast(SSP, S_MAX) + .add(ShortVector.broadcast(SSP, S_ONE)) + .lane(0); + } + + @Run(test = "short_add_overflow") + static void run_short_add_overflow() { + short sr = short_add_overflow(); + Verify.checkEQ(sr, (short) (S_MAX + S_ONE)); + } + + @Test + @IR(failOn = IRNode.ADD_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_add_underflow() { + return ShortVector.broadcast(SSP, S_MIN) + .add(ShortVector.broadcast(SSP, S_NEG_ONE)) + .lane(0); + } + + @Run(test = "short_add_underflow") + static void run_short_add_underflow() { + short sr = short_add_underflow(); + Verify.checkEQ(sr, (short) (S_MIN + S_NEG_ONE)); + } + + @Test + @IR(failOn = IRNode.SUB_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_sub_overflow() { + return ShortVector.broadcast(SSP, S_MAX) + .sub(ShortVector.broadcast(SSP, S_NEG_ONE)) + .lane(0); + } + + @Run(test = "short_sub_overflow") + static void run_short_sub_overflow() { + short sr = short_sub_overflow(); + Verify.checkEQ(sr, (short) (S_MAX - S_NEG_ONE)); + } + + @Test + @IR(failOn = IRNode.SUB_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.SUB_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_sub_underflow() { + return ShortVector.broadcast(SSP, S_MIN) + .sub(ShortVector.broadcast(SSP, S_ONE)) + .lane(0); + } + + @Run(test = "short_sub_underflow") + static void run_short_sub_underflow() { + short sr = short_sub_underflow(); + Verify.checkEQ(sr, (short) (S_MIN - S_ONE)); + } + + @Test + @IR(failOn = IRNode.MUL_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_mul(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .mul(ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_mul") + static void run_short_mul() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_mul(sa, sb); + Verify.checkEQ(sr, (short) (sa * sb)); + } + + @Test + @IR(failOn = IRNode.AND_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.AND_I, ">= 1", IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_and(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .and(ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_and") + static void run_short_and() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_and(sa, sb); + Verify.checkEQ(sr, (short) (sa & sb)); + } + + @Test + @IR(failOn = IRNode.OR_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.OR_I, ">= 1", IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_or(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .or(ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_or") + static void run_short_or() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_or(sa, sb); + Verify.checkEQ(sr, (short) (sa | sb)); + } + + @Test + @IR(failOn = IRNode.XOR_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.XOR_I, ">= 1", IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_xor(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .lanewise(VectorOperators.XOR, ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_xor") + static void run_short_xor() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_xor(sa, sb); + Verify.checkEQ(sr, (short) (sa ^ sb)); + } + + @Test + @IR(failOn = IRNode.MIN_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MIN_I, ">= 1", IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_min(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .min(ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_min") + static void run_short_min() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_min(sa, sb); + Verify.checkEQ(sr, (short) Math.min(sa, sb)); + } + + @Test + @IR(failOn = IRNode.MAX_VS, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MAX_I, ">= 1", IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + static short short_max(short sa, short sb) { + return ShortVector.broadcast(SSP, sa) + .max(ShortVector.broadcast(SSP, sb)) + .lane(0); + } + + @Run(test = "short_max") + static void run_short_max() { + short sa = (short) R.nextInt(); + short sb = (short) R.nextInt(); + short sr = short_max(sa, sb); + Verify.checkEQ(sr, (short) Math.max(sa, sb)); + } + +} diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorReassociations.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorReassociations.java new file mode 100644 index 00000000000..c6a11627215 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorReassociations.java @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8358521 + * @summary Test reassociation of broadcasted inputs across vector operations + * @modules jdk.incubator.vector + * @library /test/lib / + * @run driver compiler.vectorapi.TestVectorReassociations + */ + +package compiler.vectorapi; + +import compiler.lib.ir_framework.*; +import jdk.incubator.vector.*; +import java.util.stream.IntStream; + +/** + * Tests for the reassociation transform: + * VectorOp(broadcast(a), VectorOp(broadcast(b), array)) + * => VectorOp(broadcast(ScalarOp(a, b)), array) + */ +public class TestVectorReassociations { + + public static void main(String[] args) { + TestFramework.runWithFlags("--add-modules=jdk.incubator.vector"); + } + + /* ======================= + * INT + * ======================= */ + + static final VectorSpecies ISP = IntVector.SPECIES_PREFERRED; + static int[] intIn = IntStream.range(0, IntVector.SPECIES_PREFERRED.length()).toArray(); + static int[] intOut = new int[IntVector.SPECIES_PREFERRED.length()]; + static int ia = 17, ib = 9; + + // --- INT ADD --- + + // bcast(a) ADD (bcast(b) ADD array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_add_reassociation_pattern1() { + IntVector.broadcast(ISP, ia) + .lanewise(VectorOperators.ADD, + IntVector.broadcast(ISP, ib) + .lanewise(VectorOperators.ADD, + IntVector.fromArray(ISP, intIn, 0))) + .intoArray(intOut, 0); + } + + // bcast(a) ADD (array ADD bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_add_reassociation_pattern2() { + IntVector.broadcast(ISP, ia) + .lanewise(VectorOperators.ADD, + IntVector.fromArray(ISP, intIn, 0) + .lanewise(VectorOperators.ADD, + IntVector.broadcast(ISP, ib))) + .intoArray(intOut, 0); + } + + // (bcast(a) ADD array) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_add_reassociation_pattern3() { + IntVector.broadcast(ISP, ia) + .lanewise(VectorOperators.ADD, + IntVector.fromArray(ISP, intIn, 0)) + .lanewise(VectorOperators.ADD, + IntVector.broadcast(ISP, ib)) + .intoArray(intOut, 0); + } + + // (array ADD bcast(a)) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_add_reassociation_pattern4() { + IntVector.fromArray(ISP, intIn, 0) + .lanewise(VectorOperators.ADD, + IntVector.broadcast(ISP, ia)) + .lanewise(VectorOperators.ADD, + IntVector.broadcast(ISP, ib)) + .intoArray(intOut, 0); + } + + // --- INT MUL --- + + // bcast(a) MUL (bcast(b) MUL array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_mul_reassociation_pattern1() { + IntVector.broadcast(ISP, ia) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ISP, ib) + .lanewise(VectorOperators.MUL, + IntVector.fromArray(ISP, intIn, 0))) + .intoArray(intOut, 0); + } + + // bcast(a) MUL (array MUL bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_mul_reassociation_pattern2() { + IntVector.broadcast(ISP, ia) + .lanewise(VectorOperators.MUL, + IntVector.fromArray(ISP, intIn, 0) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ISP, ib))) + .intoArray(intOut, 0); + } + + // (bcast(a) MUL array) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_mul_reassociation_pattern3() { + IntVector.broadcast(ISP, ia) + .lanewise(VectorOperators.MUL, + IntVector.fromArray(ISP, intIn, 0)) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ISP, ib)) + .intoArray(intOut, 0); + } + + // (array MUL bcast(a)) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_int_mul_reassociation_pattern4() { + IntVector.fromArray(ISP, intIn, 0) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ISP, ia)) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ISP, ib)) + .intoArray(intOut, 0); + } + + /* ======================= + * LONG + * ======================= */ + + static final VectorSpecies LSP = LongVector.SPECIES_PREFERRED; + static long[] longIn; + static long[] longOut; + static long la = 17L, lb = 9L; + + static { + longIn = new long[LSP.length()]; + longOut = new long[LSP.length()]; + for (int i = 0; i < LSP.length(); i++) { + longIn[i] = (long) i; + } + } + + // --- LONG ADD --- + + // bcast(a) ADD (bcast(b) ADD array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_add_reassociation_pattern1() { + LongVector.broadcast(LSP, la) + .lanewise(VectorOperators.ADD, + LongVector.broadcast(LSP, lb) + .lanewise(VectorOperators.ADD, + LongVector.fromArray(LSP, longIn, 0))) + .intoArray(longOut, 0); + } + + // bcast(a) ADD (array ADD bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_add_reassociation_pattern2() { + LongVector.broadcast(LSP, la) + .lanewise(VectorOperators.ADD, + LongVector.fromArray(LSP, longIn, 0) + .lanewise(VectorOperators.ADD, + LongVector.broadcast(LSP, lb))) + .intoArray(longOut, 0); + } + + // (bcast(a) ADD array) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_add_reassociation_pattern3() { + LongVector.broadcast(LSP, la) + .lanewise(VectorOperators.ADD, + LongVector.fromArray(LSP, longIn, 0)) + .lanewise(VectorOperators.ADD, + LongVector.broadcast(LSP, lb)) + .intoArray(longOut, 0); + } + + // (array ADD bcast(a)) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_add_reassociation_pattern4() { + LongVector.fromArray(LSP, longIn, 0) + .lanewise(VectorOperators.ADD, + LongVector.broadcast(LSP, la)) + .lanewise(VectorOperators.ADD, + LongVector.broadcast(LSP, lb)) + .intoArray(longOut, 0); + } + + // --- LONG MUL --- + + // bcast(a) MUL (bcast(b) MUL array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_mul_reassociation_pattern1() { + LongVector.broadcast(LSP, la) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(LSP, lb) + .lanewise(VectorOperators.MUL, + LongVector.fromArray(LSP, longIn, 0))) + .intoArray(longOut, 0); + } + + // bcast(a) MUL (array MUL bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_mul_reassociation_pattern2() { + LongVector.broadcast(LSP, la) + .lanewise(VectorOperators.MUL, + LongVector.fromArray(LSP, longIn, 0) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(LSP, lb))) + .intoArray(longOut, 0); + } + + // (bcast(a) MUL array) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_mul_reassociation_pattern3() { + LongVector.broadcast(LSP, la) + .lanewise(VectorOperators.MUL, + LongVector.fromArray(LSP, longIn, 0)) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(LSP, lb)) + .intoArray(longOut, 0); + } + + // (array MUL bcast(a)) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1", + IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_long_mul_reassociation_pattern4() { + LongVector.fromArray(LSP, longIn, 0) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(LSP, la)) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(LSP, lb)) + .intoArray(longOut, 0); + } + + /* ======================= + * SHORT + * ======================= */ + + static final VectorSpecies SSP = ShortVector.SPECIES_PREFERRED; + static short[] shortIn; + static short[] shortOut; + static short sa = 17, sb = 9; + + static { + shortIn = new short[SSP.length()]; + shortOut = new short[SSP.length()]; + for (int i = 0; i < SSP.length(); i++) { + shortIn[i] = (short) i; + } + } + + // --- SHORT ADD --- + + // bcast(a) ADD (bcast(b) ADD array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_add_reassociation_pattern1() { + ShortVector.broadcast(SSP, sa) + .lanewise(VectorOperators.ADD, + ShortVector.broadcast(SSP, sb) + .lanewise(VectorOperators.ADD, + ShortVector.fromArray(SSP, shortIn, 0))) + .intoArray(shortOut, 0); + } + + // bcast(a) ADD (array ADD bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_add_reassociation_pattern2() { + ShortVector.broadcast(SSP, sa) + .lanewise(VectorOperators.ADD, + ShortVector.fromArray(SSP, shortIn, 0) + .lanewise(VectorOperators.ADD, + ShortVector.broadcast(SSP, sb))) + .intoArray(shortOut, 0); + } + + // (bcast(a) ADD array) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_add_reassociation_pattern3() { + ShortVector.broadcast(SSP, sa) + .lanewise(VectorOperators.ADD, + ShortVector.fromArray(SSP, shortIn, 0)) + .lanewise(VectorOperators.ADD, + ShortVector.broadcast(SSP, sb)) + .intoArray(shortOut, 0); + } + + // (array ADD bcast(a)) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_add_reassociation_pattern4() { + ShortVector.fromArray(SSP, shortIn, 0) + .lanewise(VectorOperators.ADD, + ShortVector.broadcast(SSP, sa)) + .lanewise(VectorOperators.ADD, + ShortVector.broadcast(SSP, sb)) + .intoArray(shortOut, 0); + } + + // --- SHORT MUL --- + + // bcast(a) MUL (bcast(b) MUL array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_mul_reassociation_pattern1() { + ShortVector.broadcast(SSP, sa) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(SSP, sb) + .lanewise(VectorOperators.MUL, + ShortVector.fromArray(SSP, shortIn, 0))) + .intoArray(shortOut, 0); + } + + // bcast(a) MUL (array MUL bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_mul_reassociation_pattern2() { + ShortVector.broadcast(SSP, sa) + .lanewise(VectorOperators.MUL, + ShortVector.fromArray(SSP, shortIn, 0) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(SSP, sb))) + .intoArray(shortOut, 0); + } + + // (bcast(a) MUL array) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_mul_reassociation_pattern3() { + ShortVector.broadcast(SSP, sa) + .lanewise(VectorOperators.MUL, + ShortVector.fromArray(SSP, shortIn, 0)) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(SSP, sb)) + .intoArray(shortOut, 0); + } + + // (array MUL bcast(a)) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_short_mul_reassociation_pattern4() { + ShortVector.fromArray(SSP, shortIn, 0) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(SSP, sa)) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(SSP, sb)) + .intoArray(shortOut, 0); + } + + /* ======================= + * BYTE + * ======================= */ + + static final VectorSpecies BSP = ByteVector.SPECIES_PREFERRED; + static byte[] byteIn; + static byte[] byteOut; + static byte ba = 17, bb = 9; + + static { + byteIn = new byte[BSP.length()]; + byteOut = new byte[BSP.length()]; + for (int i = 0; i < BSP.length(); i++) { + byteIn[i] = (byte) i; + } + } + + // --- BYTE ADD --- + + // bcast(a) ADD (bcast(b) ADD array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_add_reassociation_pattern1() { + ByteVector.broadcast(BSP, ba) + .lanewise(VectorOperators.ADD, + ByteVector.broadcast(BSP, bb) + .lanewise(VectorOperators.ADD, + ByteVector.fromArray(BSP, byteIn, 0))) + .intoArray(byteOut, 0); + } + + // bcast(a) ADD (array ADD bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_add_reassociation_pattern2() { + ByteVector.broadcast(BSP, ba) + .lanewise(VectorOperators.ADD, + ByteVector.fromArray(BSP, byteIn, 0) + .lanewise(VectorOperators.ADD, + ByteVector.broadcast(BSP, bb))) + .intoArray(byteOut, 0); + } + + // (bcast(a) ADD array) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_add_reassociation_pattern3() { + ByteVector.broadcast(BSP, ba) + .lanewise(VectorOperators.ADD, + ByteVector.fromArray(BSP, byteIn, 0)) + .lanewise(VectorOperators.ADD, + ByteVector.broadcast(BSP, bb)) + .intoArray(byteOut, 0); + } + + // (array ADD bcast(a)) ADD bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_add_reassociation_pattern4() { + ByteVector.fromArray(BSP, byteIn, 0) + .lanewise(VectorOperators.ADD, + ByteVector.broadcast(BSP, ba)) + .lanewise(VectorOperators.ADD, + ByteVector.broadcast(BSP, bb)) + .intoArray(byteOut, 0); + } + + // --- BYTE MUL --- + + // bcast(a) MUL (bcast(b) MUL array) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_mul_reassociation_pattern1() { + ByteVector.broadcast(BSP, ba) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(BSP, bb) + .lanewise(VectorOperators.MUL, + ByteVector.fromArray(BSP, byteIn, 0))) + .intoArray(byteOut, 0); + } + + // bcast(a) MUL (array MUL bcast(b)) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_mul_reassociation_pattern2() { + ByteVector.broadcast(BSP, ba) + .lanewise(VectorOperators.MUL, + ByteVector.fromArray(BSP, byteIn, 0) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(BSP, bb))) + .intoArray(byteOut, 0); + } + + // (bcast(a) MUL array) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_mul_reassociation_pattern3() { + ByteVector.broadcast(BSP, ba) + .lanewise(VectorOperators.MUL, + ByteVector.fromArray(BSP, byteIn, 0)) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(BSP, bb)) + .intoArray(byteOut, 0); + } + + // (array MUL bcast(a)) MUL bcast(b) + @Test + @IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}, + counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1", + IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" }) + @Warmup(value = 10000) + static void test_byte_mul_reassociation_pattern4() { + ByteVector.fromArray(BSP, byteIn, 0) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(BSP, ba)) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(BSP, bb)) + .intoArray(byteOut, 0); + } +} diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorReassociateBenchmark.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorReassociateBenchmark.java new file mode 100644 index 00000000000..cf95b2f7971 --- /dev/null +++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorReassociateBenchmark.java @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package org.openjdk.bench.jdk.incubator.vector; + +import java.util.concurrent.TimeUnit; +import java.util.Random; +import jdk.incubator.vector.*; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Thread) +@Fork(jvmArgs = {"--add-modules=jdk.incubator.vector"}) +public class VectorReassociateBenchmark { + @Param({"1024", "2048"}) + int size; + + int [] intIn1; + int [] intOut; + + long [] longIn1; + long [] longOut; + + short [] shortIn1; + short [] shortOut; + + byte [] byteIn1; + byte [] byteOut; + + static final VectorSpecies fspecies = FloatVector.SPECIES_PREFERRED; + static final VectorSpecies dspecies = DoubleVector.SPECIES_PREFERRED; + static final VectorSpecies ispecies = IntVector.SPECIES_PREFERRED; + static final VectorSpecies lspecies = LongVector.SPECIES_PREFERRED; + static final VectorSpecies sspecies = ShortVector.SPECIES_PREFERRED; + static final VectorSpecies bspecies = ByteVector.SPECIES_PREFERRED; + + @Setup(Level.Trial) + public void BmSetup() { + Random r = new Random(2048); + intIn1 = new int[size]; + intOut = new int[size]; + + longIn1 = new long[size]; + longOut = new long[size]; + + shortIn1 = new short[size]; + shortOut = new short[size]; + + byteIn1 = new byte[size]; + byteOut = new byte[size]; + + for (int i = 4; i < size; i++) { + intIn1[i] = r.nextInt(); + longIn1[i] = r.nextLong(); + shortIn1[i] = (short) r.nextInt(); + byteIn1[i] = (byte) r.nextInt(); + } + } + + @Benchmark + public float pushBroadcastsAcrossVectorKernel1() { + FloatVector res = FloatVector.broadcast(fspecies, 0.0f); + for (int i = 0; i < size; i++) { + FloatVector vec1 = FloatVector.broadcast(fspecies, (float)i); + FloatVector vec2 = FloatVector.broadcast(fspecies, (float)i + 1); + FloatVector vec3 = FloatVector.broadcast(fspecies, (float)i + 2); + res = res.lanewise(VectorOperators.ADD, vec1.lanewise(VectorOperators.FMA, vec2, vec3)); + } + return res.lane(0); + } + + @Benchmark + public double pushBroadcastsAcrossVectorKernel2() { + DoubleVector res = DoubleVector.broadcast(dspecies, 0.0f); + for (int i = 0; i < size; i++) { + DoubleVector vec1 = DoubleVector.broadcast(dspecies, (double)i); + res = res.lanewise(VectorOperators.ADD, vec1.lanewise(VectorOperators.SQRT)); + } + return res.lane(0); + } + + // int: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array)) + @Benchmark + public void reassociateIntMulChainedBroadcasts() { + for (int i = 0; i < ispecies.loopBound(size); i += ispecies.length()) { + IntVector.broadcast(ispecies, i) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ispecies, i + 1) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ispecies, i + 2) + .lanewise(VectorOperators.MUL, + IntVector.fromArray(ispecies, intIn1, i)))) + .intoArray(intOut, i); + } + } + + // int: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array) + @Benchmark + public void reassociateIntMulBalancedBroadcasts() { + for (int i = 0; i < ispecies.loopBound(size); i += ispecies.length()) { + IntVector left = + IntVector.broadcast(ispecies, i) + .lanewise(VectorOperators.MUL, + IntVector.broadcast(ispecies, i + 1)); + + IntVector right = + IntVector.broadcast(ispecies, i + 2) + .lanewise(VectorOperators.MUL, + IntVector.fromArray(ispecies, intIn1, i)); + + left.lanewise(VectorOperators.MUL, right) + .intoArray(intOut, i); + } + } + + // long: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array)) + @Benchmark + public void reassociateLongMulChainedBroadcasts() { + for (int i = 0; i < lspecies.loopBound(size); i += lspecies.length()) { + LongVector.broadcast(lspecies, (long) i) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(lspecies, (long) (i + 1)) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(lspecies, (long) (i + 2)) + .lanewise(VectorOperators.MUL, + LongVector.fromArray(lspecies, longIn1, i)))) + .intoArray(longOut, i); + } + } + + // long: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array) + @Benchmark + public void reassociateLongMulBalancedBroadcasts() { + for (int i = 0; i < lspecies.loopBound(size); i += lspecies.length()) { + LongVector left = + LongVector.broadcast(lspecies, (long) i) + .lanewise(VectorOperators.MUL, + LongVector.broadcast(lspecies, (long) (i + 1))); + + LongVector right = + LongVector.broadcast(lspecies, (long) (i + 2)) + .lanewise(VectorOperators.MUL, + LongVector.fromArray(lspecies, longIn1, i)); + + left.lanewise(VectorOperators.MUL, right) + .intoArray(longOut, i); + } + } + + // short: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array)) + @Benchmark + public void reassociateShortMulChainedBroadcasts() { + for (int i = 0; i < sspecies.loopBound(size); i += sspecies.length()) { + ShortVector.broadcast(sspecies, (short) i) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(sspecies, (short) (i + 1)) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(sspecies, (short) (i + 2)) + .lanewise(VectorOperators.MUL, + ShortVector.fromArray(sspecies, shortIn1, i)))) + .intoArray(shortOut, i); + } + } + + // short: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array) + @Benchmark + public void reassociateShortMulBalancedBroadcasts() { + for (int i = 0; i < sspecies.loopBound(size); i += sspecies.length()) { + ShortVector left = + ShortVector.broadcast(sspecies, (short) i) + .lanewise(VectorOperators.MUL, + ShortVector.broadcast(sspecies, (short) (i + 1))); + + ShortVector right = + ShortVector.broadcast(sspecies, (short) (i + 2)) + .lanewise(VectorOperators.MUL, + ShortVector.fromArray(sspecies, shortIn1, i)); + + left.lanewise(VectorOperators.MUL, right) + .intoArray(shortOut, i); + } + } + + // byte: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array)) + @Benchmark + public void reassociateByteMulChainedBroadcasts() { + for (int i = 0; i < bspecies.loopBound(size); i += bspecies.length()) { + ByteVector.broadcast(bspecies, (byte) i) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(bspecies, (byte) (i + 1)) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(bspecies, (byte) (i + 2)) + .lanewise(VectorOperators.MUL, + ByteVector.fromArray(bspecies, byteIn1, i)))) + .intoArray(byteOut, i); + } + } + + // byte: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array) + @Benchmark + public void reassociateByteMulBalancedBroadcasts() { + for (int i = 0; i < bspecies.loopBound(size); i += bspecies.length()) { + ByteVector left = + ByteVector.broadcast(bspecies, (byte) i) + .lanewise(VectorOperators.MUL, + ByteVector.broadcast(bspecies, (byte) (i + 1))); + + ByteVector right = + ByteVector.broadcast(bspecies, (byte) (i + 2)) + .lanewise(VectorOperators.MUL, + ByteVector.fromArray(bspecies, byteIn1, i)); + + left.lanewise(VectorOperators.MUL, right) + .intoArray(byteOut, i); + } + } +}