mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-17 00:50:48 +00:00
8358521: Optimize vector operations by reassociating broadcasted inputs
Reviewed-by: epeter, vlivanov, xgong
This commit is contained in:
parent
776bb729e8
commit
7ff7efd59d
@ -520,7 +520,12 @@ class SqrtDNode : public Node {
|
||||
public:
|
||||
SqrtDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
|
||||
init_flags(Flag_is_expensive);
|
||||
C->add_expensive_node(this);
|
||||
// Treat node only as expensive if a control input is set because it might
|
||||
// be created from SqrtVDNode in VectorNode::push_through_replicate which
|
||||
// does not have control input.
|
||||
if (c != nullptr) {
|
||||
C->add_expensive_node(this);
|
||||
}
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return Type::DOUBLE; }
|
||||
|
||||
@ -22,10 +22,12 @@
|
||||
*/
|
||||
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "opto/addnode.hpp"
|
||||
#include "opto/c2_globals.hpp"
|
||||
#include "opto/compile.hpp"
|
||||
#include "opto/connode.hpp"
|
||||
#include "opto/convertnode.hpp"
|
||||
#include "opto/divnode.hpp"
|
||||
#include "opto/mulnode.hpp"
|
||||
#include "opto/subnode.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
@ -290,7 +292,146 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
assert(!VectorNode::is_convert_opcode(sopc),
|
||||
"Convert node %s should be processed by VectorCastNode::opcode()",
|
||||
NodeClassNames[sopc]);
|
||||
return 0; // Unimplemented
|
||||
return 0; // not handled
|
||||
}
|
||||
}
|
||||
|
||||
// Return the scalar opcode for the specified vector opcode and basic type.
|
||||
// Returns 0 if not handled.
|
||||
int VectorNode::scalar_opcode(int vopc, BasicType bt) {
|
||||
switch (vopc) {
|
||||
case Op_AddVB:
|
||||
case Op_AddVS:
|
||||
case Op_AddVI:
|
||||
return Op_AddI;
|
||||
case Op_AddVL:
|
||||
return Op_AddL;
|
||||
case Op_AddVF:
|
||||
return Op_AddF;
|
||||
case Op_AddVD:
|
||||
return Op_AddD;
|
||||
|
||||
case Op_SubVB:
|
||||
case Op_SubVS:
|
||||
case Op_SubVI:
|
||||
return Op_SubI;
|
||||
case Op_SubVL:
|
||||
return Op_SubL;
|
||||
case Op_SubVF:
|
||||
return Op_SubF;
|
||||
case Op_SubVD:
|
||||
return Op_SubD;
|
||||
|
||||
case Op_MulVB:
|
||||
case Op_MulVS:
|
||||
case Op_MulVI:
|
||||
return Op_MulI;
|
||||
case Op_MulVL:
|
||||
return Op_MulL;
|
||||
case Op_MulVF:
|
||||
return Op_MulF;
|
||||
case Op_MulVD:
|
||||
return Op_MulD;
|
||||
|
||||
case Op_DivVF:
|
||||
return Op_DivF;
|
||||
case Op_DivVD:
|
||||
return Op_DivD;
|
||||
|
||||
case Op_AndV:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR:
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return Op_AndI;
|
||||
case T_LONG:
|
||||
return Op_AndL;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
case Op_OrV:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR:
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return Op_OrI;
|
||||
case T_LONG:
|
||||
return Op_OrL;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
case Op_XorV:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR:
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return Op_XorI;
|
||||
case T_LONG:
|
||||
return Op_XorL;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
case Op_MinV:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR:
|
||||
// unsigned, not supported for Min
|
||||
return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return Op_MinI;
|
||||
case T_LONG:
|
||||
return Op_MinL;
|
||||
case T_FLOAT:
|
||||
return Op_MinF;
|
||||
case T_DOUBLE:
|
||||
return Op_MinD;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
case Op_MaxV:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR:
|
||||
// unsigned, not supported for Max
|
||||
return 0;
|
||||
case T_BYTE:
|
||||
case T_SHORT:
|
||||
case T_INT:
|
||||
return Op_MaxI;
|
||||
case T_LONG:
|
||||
return Op_MaxL;
|
||||
case T_FLOAT:
|
||||
return Op_MaxF;
|
||||
case T_DOUBLE:
|
||||
return Op_MaxD;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
case Op_SqrtVD:
|
||||
return Op_SqrtD;
|
||||
case Op_SqrtVF:
|
||||
return Op_SqrtF;
|
||||
|
||||
case Op_FmaVF:
|
||||
return Op_FmaF;
|
||||
case Op_FmaVD:
|
||||
return Op_FmaD;
|
||||
|
||||
default:
|
||||
return 0; // not handled
|
||||
}
|
||||
}
|
||||
|
||||
@ -984,17 +1125,9 @@ static Node* ideal_partial_operations(PhaseGVN* phase, Node* node, const TypeVec
|
||||
}
|
||||
}
|
||||
|
||||
bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
|
||||
// Predicated vector operations are sensitive to ordering of inputs.
|
||||
// When the mask corresponding to a vector lane is false then
|
||||
// the result of the operation is corresponding lane of its first operand.
|
||||
// i.e. RES = VEC1.lanewise(OPER, VEC2, MASK) is semantically equivalent to
|
||||
// RES = BLEND(VEC1, VEC1.lanewise(OPER, VEC2), MASK)
|
||||
if (is_predicated_vector()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch(Opcode()) {
|
||||
// Check if the vector operation is commutative (assuming that it is not predicated/masked).
|
||||
static bool is_commutative_vector_operation(int opcode) {
|
||||
switch(opcode) {
|
||||
case Op_AddVB:
|
||||
case Op_AddVS:
|
||||
case Op_AddVI:
|
||||
@ -1022,18 +1155,228 @@ bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
|
||||
case Op_XorVMask:
|
||||
|
||||
case Op_SaturatingAddV:
|
||||
assert(req() == 3, "Must be a binary operation");
|
||||
// For non-predicated commutative operations, sort the inputs in
|
||||
// increasing order of node indices.
|
||||
if (in(1)->_idx > in(2)->_idx) {
|
||||
return true;
|
||||
}
|
||||
// fallthrough
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
|
||||
// Predicated vector operations are sensitive to ordering of inputs.
|
||||
// When the mask corresponding to a vector lane is false then
|
||||
// the result of the operation is corresponding lane of its first operand.
|
||||
// i.e. RES = VEC1.lanewise(OPER, VEC2, MASK) is semantically equivalent to
|
||||
// RES = BLEND(VEC1, VEC1.lanewise(OPER, VEC2), MASK)
|
||||
if (is_predicated_vector()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_commutative_vector_operation(Opcode())) {
|
||||
assert(req() == 3, "Must be a binary operation");
|
||||
// For non-predicated commutative operations, sort the inputs in
|
||||
// increasing order of node indices.
|
||||
if (in(1)->_idx > in(2)->_idx) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check whether we can push this vector op through replicate (all inputs are Replicate).
|
||||
bool VectorNode::can_push_through_replicate(BasicType bt) {
|
||||
if (scalar_opcode(Opcode(), bt) == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip over predicated vector operations for now, for masked lanes we preserve
|
||||
// destination/first source vector contents.
|
||||
if (is_predicated_vector()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint i = 1; i < req(); i++) {
|
||||
if (in(i)->Opcode() != Op_Replicate) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Node* VectorNode::make_scalar(Compile* c, int vopc, BasicType bt, Node* control, Node* in1, Node* in2, Node* in3) {
|
||||
int sopc = scalar_opcode(vopc, bt);
|
||||
assert(sopc != 0, "unhandled vector opcode %s", NodeClassNames[vopc]);
|
||||
assert(opcode(sopc, bt) == vopc, "scalar_opcode and opcode must agree for %s", NodeClassNames[vopc]);
|
||||
switch (sopc) {
|
||||
case Op_AddI:
|
||||
return new AddINode(in1, in2);
|
||||
case Op_AddL:
|
||||
return new AddLNode(in1, in2);
|
||||
case Op_AddF:
|
||||
return new AddFNode(in1, in2);
|
||||
case Op_AddD:
|
||||
return new AddDNode(in1, in2);
|
||||
case Op_MulI:
|
||||
return new MulINode(in1, in2);
|
||||
case Op_MulL:
|
||||
return new MulLNode(in1, in2);
|
||||
case Op_MulF:
|
||||
return new MulFNode(in1, in2);
|
||||
case Op_MulD:
|
||||
return new MulDNode(in1, in2);
|
||||
case Op_AndI:
|
||||
return new AndINode(in1, in2);
|
||||
case Op_AndL:
|
||||
return new AndLNode(in1, in2);
|
||||
case Op_DivF:
|
||||
return new DivFNode(control, in1, in2);
|
||||
case Op_DivD:
|
||||
return new DivDNode(control, in1, in2);
|
||||
case Op_OrI:
|
||||
return new OrINode(in1, in2);
|
||||
case Op_OrL:
|
||||
return new OrLNode(in1, in2);
|
||||
case Op_XorI:
|
||||
return new XorINode(in1, in2);
|
||||
case Op_XorL:
|
||||
return new XorLNode(in1, in2);
|
||||
case Op_SubI:
|
||||
return new SubINode(in1, in2);
|
||||
case Op_SubL:
|
||||
return new SubLNode(in1, in2);
|
||||
case Op_SubF:
|
||||
return new SubFNode(in1, in2);
|
||||
case Op_SubD:
|
||||
return new SubDNode(in1, in2);
|
||||
case Op_MinI:
|
||||
return new MinINode(in1, in2);
|
||||
case Op_MinL:
|
||||
return new MinLNode(c, in1, in2);
|
||||
case Op_MinF:
|
||||
return new MinFNode(in1, in2);
|
||||
case Op_MinD:
|
||||
return new MinDNode(in1, in2);
|
||||
case Op_MaxI:
|
||||
return new MaxINode(in1, in2);
|
||||
case Op_MaxL:
|
||||
return new MaxLNode(c, in1, in2);
|
||||
case Op_MaxF:
|
||||
return new MaxFNode(in1, in2);
|
||||
case Op_MaxD:
|
||||
return new MaxDNode(in1, in2);
|
||||
case Op_SqrtF:
|
||||
return new SqrtFNode(c, control, in1);
|
||||
case Op_SqrtD:
|
||||
return new SqrtDNode(c, control, in1);
|
||||
case Op_FmaF:
|
||||
return new FmaFNode(in1, in2, in3);
|
||||
case Op_FmaD:
|
||||
return new FmaDNode(in1, in2, in3);
|
||||
default:
|
||||
assert(false, "unexpected scalar opcode");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Re-wires and creates a new ideal graph pallet with following connectivity
|
||||
// parent(child(cinput1, cinput2), pinput2)
|
||||
Node* VectorNode::create_reassociated_node(Node* parent, Node* child, Node* cinput1, Node* cinput2,
|
||||
Node* pinput2, PhaseGVN* phase) {
|
||||
Node* cloned_child = child->clone();
|
||||
cloned_child->set_req(1, cinput1);
|
||||
cloned_child->set_req(2, cinput2);
|
||||
cloned_child = phase->transform(cloned_child);
|
||||
Node* cloned_parent = parent->clone();
|
||||
cloned_parent->set_req(1, cloned_child);
|
||||
cloned_parent->set_req(2, pinput2);
|
||||
return cloned_parent;
|
||||
}
|
||||
|
||||
// Try to reassociate commutative vector operations using the following ideal transformation,
|
||||
// this will facilitate strength reducing a vector operation with all replicated inputs to
|
||||
// a scalar operation.
|
||||
//
|
||||
// VectorOp (Replicate INP1) (VectorOp (Replicate INP2) INP3) =>
|
||||
// VectorOp (VectorOp (Replicate INP1) (Replicate INP2)) INP3
|
||||
//
|
||||
Node* VectorNode::reassociate_vector_operation(PhaseGVN* phase) {
|
||||
// Enable re-association for integral vector operations.
|
||||
if (!is_integral_type(vect_type()->element_basic_type())) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Enable re-association for commutative vector operations.
|
||||
if (!is_commutative_vector_operation(Opcode())) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* in1 = in(1);
|
||||
Node* in2 = in(2);
|
||||
if (in2->Opcode() == Op_Replicate && in1->Opcode() == Opcode()) {
|
||||
swap(in1, in2);
|
||||
}
|
||||
|
||||
if (in1->Opcode() != Op_Replicate || in2->Opcode() != Opcode()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Skip predicated vector operations, mask semantics prevent reassociation.
|
||||
if (is_predicated_vector() || in2->as_Vector()->is_predicated_vector()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* in2_1 = in2->in(1);
|
||||
Node* in2_2 = in2->in(2);
|
||||
if (in2_1->Opcode() == Op_Replicate) {
|
||||
return create_reassociated_node(this, in2, in1, in2_1, in2_2, phase);
|
||||
} else if (in2_2->Opcode() == Op_Replicate) {
|
||||
return create_reassociated_node(this, in2, in1, in2_2, in2_1, phase);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Convert vector operation with all Replicate inputs to scalar operation using following
|
||||
// ideal transformation.
|
||||
//
|
||||
// VectorOp (Replicate INP1, Replicate INP2) =>
|
||||
// Replicate (ScalarOp INP1, INP2)
|
||||
//
|
||||
Node* VectorNode::push_through_replicate(PhaseGVN* phase) {
|
||||
BasicType bt = vect_type()->element_basic_type();
|
||||
if (!can_push_through_replicate(bt)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(req() >= 2 && req() <= 4, "unexpected req() %u for %s", req(), NodeClassNames[Opcode()]);
|
||||
|
||||
Node* sinp1 = nullptr;
|
||||
Node* sinp2 = nullptr;
|
||||
Node* sinp3 = nullptr;
|
||||
|
||||
assert(in(1)->Opcode() == Op_Replicate, "");
|
||||
sinp1 = in(1)->in(1);
|
||||
|
||||
if (req() > 2) {
|
||||
assert(in(2)->Opcode() == Op_Replicate, "");
|
||||
sinp2 = in(2)->in(1);
|
||||
}
|
||||
|
||||
if (req() > 3) {
|
||||
assert(in(3)->Opcode() == Op_Replicate, "");
|
||||
sinp3 = in(3)->in(1);
|
||||
}
|
||||
|
||||
Node* sop = make_scalar(phase->C, Opcode(), bt, in(0), sinp1, sinp2, sinp3);
|
||||
if (sop == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
sop = phase->transform(sop);
|
||||
|
||||
return new ReplicateNode(sop, vect_type());
|
||||
}
|
||||
|
||||
Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
Node* n = ideal_partial_operations(phase, this, vect_type());
|
||||
if (n != nullptr) {
|
||||
@ -1044,7 +1387,13 @@ Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
if (should_swap_inputs_to_help_global_value_numbering()) {
|
||||
swap_edges(1, 2);
|
||||
}
|
||||
return nullptr;
|
||||
|
||||
n = push_through_replicate(phase);
|
||||
if (n != nullptr) {
|
||||
return n;
|
||||
}
|
||||
|
||||
return reassociate_vector_operation(phase);
|
||||
}
|
||||
|
||||
// Traverses a chain of VectorMaskCast and returns the first non VectorMaskCast node.
|
||||
@ -2094,7 +2443,7 @@ Node* FmaVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
swap_edges(1, 2);
|
||||
return this;
|
||||
}
|
||||
return nullptr;
|
||||
return VectorNode::Ideal(phase, can_reshape);
|
||||
}
|
||||
|
||||
// Generate other vector nodes to implement the masked/non-masked vector negation.
|
||||
|
||||
@ -146,12 +146,20 @@ class VectorNode : public TypeNode {
|
||||
static bool is_minmax_opcode(int opc);
|
||||
|
||||
bool should_swap_inputs_to_help_global_value_numbering();
|
||||
Node* reassociate_vector_operation(PhaseGVN* phase);
|
||||
static Node* create_reassociated_node(Node* parent, Node* child, Node* cinput1, Node* cinput2,
|
||||
Node* pinput2, PhaseGVN* phase);
|
||||
|
||||
static bool is_vshift_cnt_opcode(int opc);
|
||||
|
||||
static bool is_rotate_opcode(int opc);
|
||||
|
||||
static int opcode(int sopc, BasicType bt); // scalar_opc -> vector_opc
|
||||
static int scalar_opcode(int vopc, BasicType bt); // vector_opc -> scalar_opc, 0 if not handled
|
||||
static Node* make_scalar(Compile* c, int vopc, BasicType bt, Node* control, Node* in1, Node* in2, Node* in3);
|
||||
|
||||
bool can_push_through_replicate(BasicType bt);
|
||||
Node* push_through_replicate(PhaseGVN* phase);
|
||||
|
||||
static int shift_count_opcode(int opc);
|
||||
|
||||
|
||||
@ -233,6 +233,11 @@ public class IRNode {
|
||||
beforeMatchingNameRegex(ADD_P, "AddP");
|
||||
}
|
||||
|
||||
public static final String ADD_D = PREFIX + "ADD_D" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(ADD_D, "AddD");
|
||||
}
|
||||
|
||||
public static final String ADD_VD = VECTOR_PREFIX + "ADD_VD" + POSTFIX;
|
||||
static {
|
||||
vectorNode(ADD_VD, "AddVD", TYPE_DOUBLE);
|
||||
@ -763,11 +768,21 @@ public class IRNode {
|
||||
vectorNode(DIV_VHF, "DivVHF", TYPE_SHORT);
|
||||
}
|
||||
|
||||
public static final String DIV_F = PREFIX + "DIV_F" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(DIV_F, "DivF");
|
||||
}
|
||||
|
||||
public static final String DIV_VF = VECTOR_PREFIX + "DIV_VF" + POSTFIX;
|
||||
static {
|
||||
vectorNode(DIV_VF, "DivVF", TYPE_FLOAT);
|
||||
}
|
||||
|
||||
public static final String DIV_D = PREFIX + "DIV_D" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(DIV_D, "DivD");
|
||||
}
|
||||
|
||||
public static final String DIV_VD = VECTOR_PREFIX + "DIV_VD" + POSTFIX;
|
||||
static {
|
||||
vectorNode(DIV_VD, "DivVD", TYPE_DOUBLE);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,605 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8358521
|
||||
* @summary Test reassociation of broadcasted inputs across vector operations
|
||||
* @modules jdk.incubator.vector
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorapi.TestVectorReassociations
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.incubator.vector.*;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* Tests for the reassociation transform:
|
||||
* VectorOp(broadcast(a), VectorOp(broadcast(b), array))
|
||||
* => VectorOp(broadcast(ScalarOp(a, b)), array)
|
||||
*/
|
||||
public class TestVectorReassociations {
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
|
||||
}
|
||||
|
||||
/* =======================
|
||||
* INT
|
||||
* ======================= */
|
||||
|
||||
static final VectorSpecies<Integer> ISP = IntVector.SPECIES_PREFERRED;
|
||||
static int[] intIn = IntStream.range(0, IntVector.SPECIES_PREFERRED.length()).toArray();
|
||||
static int[] intOut = new int[IntVector.SPECIES_PREFERRED.length()];
|
||||
static int ia = 17, ib = 9;
|
||||
|
||||
// --- INT ADD ---
|
||||
|
||||
// bcast(a) ADD (bcast(b) ADD array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_add_reassociation_pattern1() {
|
||||
IntVector.broadcast(ISP, ia)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.broadcast(ISP, ib)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.fromArray(ISP, intIn, 0)))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) ADD (array ADD bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_add_reassociation_pattern2() {
|
||||
IntVector.broadcast(ISP, ia)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.fromArray(ISP, intIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.broadcast(ISP, ib)))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) ADD array) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_add_reassociation_pattern3() {
|
||||
IntVector.broadcast(ISP, ia)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.fromArray(ISP, intIn, 0))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.broadcast(ISP, ib))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
// (array ADD bcast(a)) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_add_reassociation_pattern4() {
|
||||
IntVector.fromArray(ISP, intIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.broadcast(ISP, ia))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
IntVector.broadcast(ISP, ib))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
// --- INT MUL ---
|
||||
|
||||
// bcast(a) MUL (bcast(b) MUL array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_mul_reassociation_pattern1() {
|
||||
IntVector.broadcast(ISP, ia)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ISP, ib)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.fromArray(ISP, intIn, 0)))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) MUL (array MUL bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_mul_reassociation_pattern2() {
|
||||
IntVector.broadcast(ISP, ia)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.fromArray(ISP, intIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ISP, ib)))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) MUL array) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_mul_reassociation_pattern3() {
|
||||
IntVector.broadcast(ISP, ia)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.fromArray(ISP, intIn, 0))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ISP, ib))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
// (array MUL bcast(a)) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_int_mul_reassociation_pattern4() {
|
||||
IntVector.fromArray(ISP, intIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ISP, ia))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ISP, ib))
|
||||
.intoArray(intOut, 0);
|
||||
}
|
||||
|
||||
/* =======================
|
||||
* LONG
|
||||
* ======================= */
|
||||
|
||||
static final VectorSpecies<Long> LSP = LongVector.SPECIES_PREFERRED;
|
||||
static long[] longIn;
|
||||
static long[] longOut;
|
||||
static long la = 17L, lb = 9L;
|
||||
|
||||
static {
|
||||
longIn = new long[LSP.length()];
|
||||
longOut = new long[LSP.length()];
|
||||
for (int i = 0; i < LSP.length(); i++) {
|
||||
longIn[i] = (long) i;
|
||||
}
|
||||
}
|
||||
|
||||
// --- LONG ADD ---
|
||||
|
||||
// bcast(a) ADD (bcast(b) ADD array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_add_reassociation_pattern1() {
|
||||
LongVector.broadcast(LSP, la)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.broadcast(LSP, lb)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.fromArray(LSP, longIn, 0)))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) ADD (array ADD bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_add_reassociation_pattern2() {
|
||||
LongVector.broadcast(LSP, la)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.fromArray(LSP, longIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.broadcast(LSP, lb)))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) ADD array) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_add_reassociation_pattern3() {
|
||||
LongVector.broadcast(LSP, la)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.fromArray(LSP, longIn, 0))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.broadcast(LSP, lb))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
// (array ADD bcast(a)) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_add_reassociation_pattern4() {
|
||||
LongVector.fromArray(LSP, longIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.broadcast(LSP, la))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
LongVector.broadcast(LSP, lb))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
// --- LONG MUL ---
|
||||
|
||||
// bcast(a) MUL (bcast(b) MUL array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_mul_reassociation_pattern1() {
|
||||
LongVector.broadcast(LSP, la)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(LSP, lb)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.fromArray(LSP, longIn, 0)))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) MUL (array MUL bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_mul_reassociation_pattern2() {
|
||||
LongVector.broadcast(LSP, la)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.fromArray(LSP, longIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(LSP, lb)))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) MUL array) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_mul_reassociation_pattern3() {
|
||||
LongVector.broadcast(LSP, la)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.fromArray(LSP, longIn, 0))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(LSP, lb))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
// (array MUL bcast(a)) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
|
||||
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_long_mul_reassociation_pattern4() {
|
||||
LongVector.fromArray(LSP, longIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(LSP, la))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(LSP, lb))
|
||||
.intoArray(longOut, 0);
|
||||
}
|
||||
|
||||
/* =======================
|
||||
* SHORT
|
||||
* ======================= */
|
||||
|
||||
static final VectorSpecies<Short> SSP = ShortVector.SPECIES_PREFERRED;
|
||||
static short[] shortIn;
|
||||
static short[] shortOut;
|
||||
static short sa = 17, sb = 9;
|
||||
|
||||
static {
|
||||
shortIn = new short[SSP.length()];
|
||||
shortOut = new short[SSP.length()];
|
||||
for (int i = 0; i < SSP.length(); i++) {
|
||||
shortIn[i] = (short) i;
|
||||
}
|
||||
}
|
||||
|
||||
// --- SHORT ADD ---
|
||||
|
||||
// bcast(a) ADD (bcast(b) ADD array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_add_reassociation_pattern1() {
|
||||
ShortVector.broadcast(SSP, sa)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.broadcast(SSP, sb)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.fromArray(SSP, shortIn, 0)))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) ADD (array ADD bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_add_reassociation_pattern2() {
|
||||
ShortVector.broadcast(SSP, sa)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.fromArray(SSP, shortIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.broadcast(SSP, sb)))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) ADD array) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_add_reassociation_pattern3() {
|
||||
ShortVector.broadcast(SSP, sa)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.fromArray(SSP, shortIn, 0))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.broadcast(SSP, sb))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
// (array ADD bcast(a)) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_add_reassociation_pattern4() {
|
||||
ShortVector.fromArray(SSP, shortIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.broadcast(SSP, sa))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ShortVector.broadcast(SSP, sb))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
// --- SHORT MUL ---
|
||||
|
||||
// bcast(a) MUL (bcast(b) MUL array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_mul_reassociation_pattern1() {
|
||||
ShortVector.broadcast(SSP, sa)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(SSP, sb)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.fromArray(SSP, shortIn, 0)))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) MUL (array MUL bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_mul_reassociation_pattern2() {
|
||||
ShortVector.broadcast(SSP, sa)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.fromArray(SSP, shortIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(SSP, sb)))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) MUL array) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_mul_reassociation_pattern3() {
|
||||
ShortVector.broadcast(SSP, sa)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.fromArray(SSP, shortIn, 0))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(SSP, sb))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
// (array MUL bcast(a)) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_short_mul_reassociation_pattern4() {
|
||||
ShortVector.fromArray(SSP, shortIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(SSP, sa))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(SSP, sb))
|
||||
.intoArray(shortOut, 0);
|
||||
}
|
||||
|
||||
/* =======================
|
||||
* BYTE
|
||||
* ======================= */
|
||||
|
||||
static final VectorSpecies<Byte> BSP = ByteVector.SPECIES_PREFERRED;
|
||||
static byte[] byteIn;
|
||||
static byte[] byteOut;
|
||||
static byte ba = 17, bb = 9;
|
||||
|
||||
static {
|
||||
byteIn = new byte[BSP.length()];
|
||||
byteOut = new byte[BSP.length()];
|
||||
for (int i = 0; i < BSP.length(); i++) {
|
||||
byteIn[i] = (byte) i;
|
||||
}
|
||||
}
|
||||
|
||||
// --- BYTE ADD ---
|
||||
|
||||
// bcast(a) ADD (bcast(b) ADD array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_add_reassociation_pattern1() {
|
||||
ByteVector.broadcast(BSP, ba)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.broadcast(BSP, bb)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.fromArray(BSP, byteIn, 0)))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) ADD (array ADD bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_add_reassociation_pattern2() {
|
||||
ByteVector.broadcast(BSP, ba)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.fromArray(BSP, byteIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.broadcast(BSP, bb)))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) ADD array) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_add_reassociation_pattern3() {
|
||||
ByteVector.broadcast(BSP, ba)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.fromArray(BSP, byteIn, 0))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.broadcast(BSP, bb))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
|
||||
// (array ADD bcast(a)) ADD bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_add_reassociation_pattern4() {
|
||||
ByteVector.fromArray(BSP, byteIn, 0)
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.broadcast(BSP, ba))
|
||||
.lanewise(VectorOperators.ADD,
|
||||
ByteVector.broadcast(BSP, bb))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
|
||||
// --- BYTE MUL ---
|
||||
|
||||
// bcast(a) MUL (bcast(b) MUL array)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_mul_reassociation_pattern1() {
|
||||
ByteVector.broadcast(BSP, ba)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(BSP, bb)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.fromArray(BSP, byteIn, 0)))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
|
||||
// bcast(a) MUL (array MUL bcast(b))
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_mul_reassociation_pattern2() {
|
||||
ByteVector.broadcast(BSP, ba)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.fromArray(BSP, byteIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(BSP, bb)))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
|
||||
// (bcast(a) MUL array) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_mul_reassociation_pattern3() {
|
||||
ByteVector.broadcast(BSP, ba)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.fromArray(BSP, byteIn, 0))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(BSP, bb))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
|
||||
// (array MUL bcast(a)) MUL bcast(b)
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
|
||||
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
|
||||
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
|
||||
@Warmup(value = 10000)
|
||||
static void test_byte_mul_reassociation_pattern4() {
|
||||
ByteVector.fromArray(BSP, byteIn, 0)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(BSP, ba))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(BSP, bb))
|
||||
.intoArray(byteOut, 0);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,239 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.openjdk.bench.jdk.incubator.vector;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
import jdk.incubator.vector.*;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Fork(jvmArgs = {"--add-modules=jdk.incubator.vector"})
|
||||
public class VectorReassociateBenchmark {
|
||||
@Param({"1024", "2048"})
|
||||
int size;
|
||||
|
||||
int [] intIn1;
|
||||
int [] intOut;
|
||||
|
||||
long [] longIn1;
|
||||
long [] longOut;
|
||||
|
||||
short [] shortIn1;
|
||||
short [] shortOut;
|
||||
|
||||
byte [] byteIn1;
|
||||
byte [] byteOut;
|
||||
|
||||
static final VectorSpecies<Float> fspecies = FloatVector.SPECIES_PREFERRED;
|
||||
static final VectorSpecies<Double> dspecies = DoubleVector.SPECIES_PREFERRED;
|
||||
static final VectorSpecies<Integer> ispecies = IntVector.SPECIES_PREFERRED;
|
||||
static final VectorSpecies<Long> lspecies = LongVector.SPECIES_PREFERRED;
|
||||
static final VectorSpecies<Short> sspecies = ShortVector.SPECIES_PREFERRED;
|
||||
static final VectorSpecies<Byte> bspecies = ByteVector.SPECIES_PREFERRED;
|
||||
|
||||
@Setup(Level.Trial)
|
||||
public void BmSetup() {
|
||||
Random r = new Random(2048);
|
||||
intIn1 = new int[size];
|
||||
intOut = new int[size];
|
||||
|
||||
longIn1 = new long[size];
|
||||
longOut = new long[size];
|
||||
|
||||
shortIn1 = new short[size];
|
||||
shortOut = new short[size];
|
||||
|
||||
byteIn1 = new byte[size];
|
||||
byteOut = new byte[size];
|
||||
|
||||
for (int i = 4; i < size; i++) {
|
||||
intIn1[i] = r.nextInt();
|
||||
longIn1[i] = r.nextLong();
|
||||
shortIn1[i] = (short) r.nextInt();
|
||||
byteIn1[i] = (byte) r.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public float pushBroadcastsAcrossVectorKernel1() {
|
||||
FloatVector res = FloatVector.broadcast(fspecies, 0.0f);
|
||||
for (int i = 0; i < size; i++) {
|
||||
FloatVector vec1 = FloatVector.broadcast(fspecies, (float)i);
|
||||
FloatVector vec2 = FloatVector.broadcast(fspecies, (float)i + 1);
|
||||
FloatVector vec3 = FloatVector.broadcast(fspecies, (float)i + 2);
|
||||
res = res.lanewise(VectorOperators.ADD, vec1.lanewise(VectorOperators.FMA, vec2, vec3));
|
||||
}
|
||||
return res.lane(0);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public double pushBroadcastsAcrossVectorKernel2() {
|
||||
DoubleVector res = DoubleVector.broadcast(dspecies, 0.0f);
|
||||
for (int i = 0; i < size; i++) {
|
||||
DoubleVector vec1 = DoubleVector.broadcast(dspecies, (double)i);
|
||||
res = res.lanewise(VectorOperators.ADD, vec1.lanewise(VectorOperators.SQRT));
|
||||
}
|
||||
return res.lane(0);
|
||||
}
|
||||
|
||||
// int: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
|
||||
@Benchmark
|
||||
public void reassociateIntMulChainedBroadcasts() {
|
||||
for (int i = 0; i < ispecies.loopBound(size); i += ispecies.length()) {
|
||||
IntVector.broadcast(ispecies, i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ispecies, i + 1)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ispecies, i + 2)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.fromArray(ispecies, intIn1, i))))
|
||||
.intoArray(intOut, i);
|
||||
}
|
||||
}
|
||||
|
||||
// int: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
|
||||
@Benchmark
|
||||
public void reassociateIntMulBalancedBroadcasts() {
|
||||
for (int i = 0; i < ispecies.loopBound(size); i += ispecies.length()) {
|
||||
IntVector left =
|
||||
IntVector.broadcast(ispecies, i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.broadcast(ispecies, i + 1));
|
||||
|
||||
IntVector right =
|
||||
IntVector.broadcast(ispecies, i + 2)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
IntVector.fromArray(ispecies, intIn1, i));
|
||||
|
||||
left.lanewise(VectorOperators.MUL, right)
|
||||
.intoArray(intOut, i);
|
||||
}
|
||||
}
|
||||
|
||||
// long: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
|
||||
@Benchmark
|
||||
public void reassociateLongMulChainedBroadcasts() {
|
||||
for (int i = 0; i < lspecies.loopBound(size); i += lspecies.length()) {
|
||||
LongVector.broadcast(lspecies, (long) i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(lspecies, (long) (i + 1))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(lspecies, (long) (i + 2))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.fromArray(lspecies, longIn1, i))))
|
||||
.intoArray(longOut, i);
|
||||
}
|
||||
}
|
||||
|
||||
// long: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
|
||||
@Benchmark
|
||||
public void reassociateLongMulBalancedBroadcasts() {
|
||||
for (int i = 0; i < lspecies.loopBound(size); i += lspecies.length()) {
|
||||
LongVector left =
|
||||
LongVector.broadcast(lspecies, (long) i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.broadcast(lspecies, (long) (i + 1)));
|
||||
|
||||
LongVector right =
|
||||
LongVector.broadcast(lspecies, (long) (i + 2))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
LongVector.fromArray(lspecies, longIn1, i));
|
||||
|
||||
left.lanewise(VectorOperators.MUL, right)
|
||||
.intoArray(longOut, i);
|
||||
}
|
||||
}
|
||||
|
||||
// short: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
|
||||
@Benchmark
|
||||
public void reassociateShortMulChainedBroadcasts() {
|
||||
for (int i = 0; i < sspecies.loopBound(size); i += sspecies.length()) {
|
||||
ShortVector.broadcast(sspecies, (short) i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(sspecies, (short) (i + 1))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(sspecies, (short) (i + 2))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.fromArray(sspecies, shortIn1, i))))
|
||||
.intoArray(shortOut, i);
|
||||
}
|
||||
}
|
||||
|
||||
// short: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
|
||||
@Benchmark
|
||||
public void reassociateShortMulBalancedBroadcasts() {
|
||||
for (int i = 0; i < sspecies.loopBound(size); i += sspecies.length()) {
|
||||
ShortVector left =
|
||||
ShortVector.broadcast(sspecies, (short) i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.broadcast(sspecies, (short) (i + 1)));
|
||||
|
||||
ShortVector right =
|
||||
ShortVector.broadcast(sspecies, (short) (i + 2))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ShortVector.fromArray(sspecies, shortIn1, i));
|
||||
|
||||
left.lanewise(VectorOperators.MUL, right)
|
||||
.intoArray(shortOut, i);
|
||||
}
|
||||
}
|
||||
|
||||
// byte: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
|
||||
@Benchmark
|
||||
public void reassociateByteMulChainedBroadcasts() {
|
||||
for (int i = 0; i < bspecies.loopBound(size); i += bspecies.length()) {
|
||||
ByteVector.broadcast(bspecies, (byte) i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(bspecies, (byte) (i + 1))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(bspecies, (byte) (i + 2))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.fromArray(bspecies, byteIn1, i))))
|
||||
.intoArray(byteOut, i);
|
||||
}
|
||||
}
|
||||
|
||||
// byte: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
|
||||
@Benchmark
|
||||
public void reassociateByteMulBalancedBroadcasts() {
|
||||
for (int i = 0; i < bspecies.loopBound(size); i += bspecies.length()) {
|
||||
ByteVector left =
|
||||
ByteVector.broadcast(bspecies, (byte) i)
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.broadcast(bspecies, (byte) (i + 1)));
|
||||
|
||||
ByteVector right =
|
||||
ByteVector.broadcast(bspecies, (byte) (i + 2))
|
||||
.lanewise(VectorOperators.MUL,
|
||||
ByteVector.fromArray(bspecies, byteIn1, i));
|
||||
|
||||
left.lanewise(VectorOperators.MUL, right)
|
||||
.intoArray(byteOut, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user