8358521: Optimize vector operations by reassociating broadcasted inputs

Reviewed-by: epeter, vlivanov, xgong
This commit is contained in:
Jatin Bhateja 2026-05-12 06:18:37 +00:00
parent 776bb729e8
commit 7ff7efd59d
7 changed files with 2343 additions and 22 deletions

View File

@ -520,7 +520,12 @@ class SqrtDNode : public Node {
public:
SqrtDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
init_flags(Flag_is_expensive);
C->add_expensive_node(this);
// Treat node only as expensive if a control input is set because it might
// be created from SqrtVDNode in VectorNode::push_through_replicate which
// does not have control input.
if (c != nullptr) {
C->add_expensive_node(this);
}
}
virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; }

View File

@ -22,10 +22,12 @@
*/
#include "memory/allocation.inline.hpp"
#include "opto/addnode.hpp"
#include "opto/c2_globals.hpp"
#include "opto/compile.hpp"
#include "opto/connode.hpp"
#include "opto/convertnode.hpp"
#include "opto/divnode.hpp"
#include "opto/mulnode.hpp"
#include "opto/subnode.hpp"
#include "opto/vectornode.hpp"
@ -290,7 +292,146 @@ int VectorNode::opcode(int sopc, BasicType bt) {
assert(!VectorNode::is_convert_opcode(sopc),
"Convert node %s should be processed by VectorCastNode::opcode()",
NodeClassNames[sopc]);
return 0; // Unimplemented
return 0; // not handled
}
}
// Return the scalar opcode for the specified vector opcode and basic type.
// Returns 0 if not handled.
int VectorNode::scalar_opcode(int vopc, BasicType bt) {
switch (vopc) {
case Op_AddVB:
case Op_AddVS:
case Op_AddVI:
return Op_AddI;
case Op_AddVL:
return Op_AddL;
case Op_AddVF:
return Op_AddF;
case Op_AddVD:
return Op_AddD;
case Op_SubVB:
case Op_SubVS:
case Op_SubVI:
return Op_SubI;
case Op_SubVL:
return Op_SubL;
case Op_SubVF:
return Op_SubF;
case Op_SubVD:
return Op_SubD;
case Op_MulVB:
case Op_MulVS:
case Op_MulVI:
return Op_MulI;
case Op_MulVL:
return Op_MulL;
case Op_MulVF:
return Op_MulF;
case Op_MulVD:
return Op_MulD;
case Op_DivVF:
return Op_DivF;
case Op_DivVD:
return Op_DivD;
case Op_AndV:
switch (bt) {
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
case T_SHORT:
case T_INT:
return Op_AndI;
case T_LONG:
return Op_AndL;
default:
return 0;
}
case Op_OrV:
switch (bt) {
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
case T_SHORT:
case T_INT:
return Op_OrI;
case T_LONG:
return Op_OrL;
default:
return 0;
}
case Op_XorV:
switch (bt) {
case T_BOOLEAN:
case T_CHAR:
case T_BYTE:
case T_SHORT:
case T_INT:
return Op_XorI;
case T_LONG:
return Op_XorL;
default:
return 0;
}
case Op_MinV:
switch (bt) {
case T_BOOLEAN:
case T_CHAR:
// unsigned, not supported for Min
return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
return Op_MinI;
case T_LONG:
return Op_MinL;
case T_FLOAT:
return Op_MinF;
case T_DOUBLE:
return Op_MinD;
default:
return 0;
}
case Op_MaxV:
switch (bt) {
case T_BOOLEAN:
case T_CHAR:
// unsigned, not supported for Max
return 0;
case T_BYTE:
case T_SHORT:
case T_INT:
return Op_MaxI;
case T_LONG:
return Op_MaxL;
case T_FLOAT:
return Op_MaxF;
case T_DOUBLE:
return Op_MaxD;
default:
return 0;
}
case Op_SqrtVD:
return Op_SqrtD;
case Op_SqrtVF:
return Op_SqrtF;
case Op_FmaVF:
return Op_FmaF;
case Op_FmaVD:
return Op_FmaD;
default:
return 0; // not handled
}
}
@ -984,17 +1125,9 @@ static Node* ideal_partial_operations(PhaseGVN* phase, Node* node, const TypeVec
}
}
bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
// Predicated vector operations are sensitive to ordering of inputs.
// When the mask corresponding to a vector lane is false then
// the result of the operation is corresponding lane of its first operand.
// i.e. RES = VEC1.lanewise(OPER, VEC2, MASK) is semantically equivalent to
// RES = BLEND(VEC1, VEC1.lanewise(OPER, VEC2), MASK)
if (is_predicated_vector()) {
return false;
}
switch(Opcode()) {
// Check if the vector operation is commutative (assuming that it is not predicated/masked).
static bool is_commutative_vector_operation(int opcode) {
switch(opcode) {
case Op_AddVB:
case Op_AddVS:
case Op_AddVI:
@ -1022,18 +1155,228 @@ bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
case Op_XorVMask:
case Op_SaturatingAddV:
assert(req() == 3, "Must be a binary operation");
// For non-predicated commutative operations, sort the inputs in
// increasing order of node indices.
if (in(1)->_idx > in(2)->_idx) {
return true;
}
// fallthrough
return true;
default:
return false;
}
}
bool VectorNode::should_swap_inputs_to_help_global_value_numbering() {
// Predicated vector operations are sensitive to ordering of inputs.
// When the mask corresponding to a vector lane is false then
// the result of the operation is corresponding lane of its first operand.
// i.e. RES = VEC1.lanewise(OPER, VEC2, MASK) is semantically equivalent to
// RES = BLEND(VEC1, VEC1.lanewise(OPER, VEC2), MASK)
if (is_predicated_vector()) {
return false;
}
if (is_commutative_vector_operation(Opcode())) {
assert(req() == 3, "Must be a binary operation");
// For non-predicated commutative operations, sort the inputs in
// increasing order of node indices.
if (in(1)->_idx > in(2)->_idx) {
return true;
}
}
return false;
}
// Check whether we can push this vector op through replicate (all inputs are Replicate).
bool VectorNode::can_push_through_replicate(BasicType bt) {
if (scalar_opcode(Opcode(), bt) == 0) {
return false;
}
// Skip over predicated vector operations for now, for masked lanes we preserve
// destination/first source vector contents.
if (is_predicated_vector()) {
return false;
}
for (uint i = 1; i < req(); i++) {
if (in(i)->Opcode() != Op_Replicate) {
return false;
}
}
return true;
}
Node* VectorNode::make_scalar(Compile* c, int vopc, BasicType bt, Node* control, Node* in1, Node* in2, Node* in3) {
int sopc = scalar_opcode(vopc, bt);
assert(sopc != 0, "unhandled vector opcode %s", NodeClassNames[vopc]);
assert(opcode(sopc, bt) == vopc, "scalar_opcode and opcode must agree for %s", NodeClassNames[vopc]);
switch (sopc) {
case Op_AddI:
return new AddINode(in1, in2);
case Op_AddL:
return new AddLNode(in1, in2);
case Op_AddF:
return new AddFNode(in1, in2);
case Op_AddD:
return new AddDNode(in1, in2);
case Op_MulI:
return new MulINode(in1, in2);
case Op_MulL:
return new MulLNode(in1, in2);
case Op_MulF:
return new MulFNode(in1, in2);
case Op_MulD:
return new MulDNode(in1, in2);
case Op_AndI:
return new AndINode(in1, in2);
case Op_AndL:
return new AndLNode(in1, in2);
case Op_DivF:
return new DivFNode(control, in1, in2);
case Op_DivD:
return new DivDNode(control, in1, in2);
case Op_OrI:
return new OrINode(in1, in2);
case Op_OrL:
return new OrLNode(in1, in2);
case Op_XorI:
return new XorINode(in1, in2);
case Op_XorL:
return new XorLNode(in1, in2);
case Op_SubI:
return new SubINode(in1, in2);
case Op_SubL:
return new SubLNode(in1, in2);
case Op_SubF:
return new SubFNode(in1, in2);
case Op_SubD:
return new SubDNode(in1, in2);
case Op_MinI:
return new MinINode(in1, in2);
case Op_MinL:
return new MinLNode(c, in1, in2);
case Op_MinF:
return new MinFNode(in1, in2);
case Op_MinD:
return new MinDNode(in1, in2);
case Op_MaxI:
return new MaxINode(in1, in2);
case Op_MaxL:
return new MaxLNode(c, in1, in2);
case Op_MaxF:
return new MaxFNode(in1, in2);
case Op_MaxD:
return new MaxDNode(in1, in2);
case Op_SqrtF:
return new SqrtFNode(c, control, in1);
case Op_SqrtD:
return new SqrtDNode(c, control, in1);
case Op_FmaF:
return new FmaFNode(in1, in2, in3);
case Op_FmaD:
return new FmaDNode(in1, in2, in3);
default:
assert(false, "unexpected scalar opcode");
return nullptr;
}
}
// Re-wires and creates a new ideal graph pallet with following connectivity
// parent(child(cinput1, cinput2), pinput2)
Node* VectorNode::create_reassociated_node(Node* parent, Node* child, Node* cinput1, Node* cinput2,
Node* pinput2, PhaseGVN* phase) {
Node* cloned_child = child->clone();
cloned_child->set_req(1, cinput1);
cloned_child->set_req(2, cinput2);
cloned_child = phase->transform(cloned_child);
Node* cloned_parent = parent->clone();
cloned_parent->set_req(1, cloned_child);
cloned_parent->set_req(2, pinput2);
return cloned_parent;
}
// Try to reassociate commutative vector operations using the following ideal transformation,
// this will facilitate strength reducing a vector operation with all replicated inputs to
// a scalar operation.
//
// VectorOp (Replicate INP1) (VectorOp (Replicate INP2) INP3) =>
// VectorOp (VectorOp (Replicate INP1) (Replicate INP2)) INP3
//
Node* VectorNode::reassociate_vector_operation(PhaseGVN* phase) {
// Enable re-association for integral vector operations.
if (!is_integral_type(vect_type()->element_basic_type())) {
return nullptr;
}
// Enable re-association for commutative vector operations.
if (!is_commutative_vector_operation(Opcode())) {
return nullptr;
}
Node* in1 = in(1);
Node* in2 = in(2);
if (in2->Opcode() == Op_Replicate && in1->Opcode() == Opcode()) {
swap(in1, in2);
}
if (in1->Opcode() != Op_Replicate || in2->Opcode() != Opcode()) {
return nullptr;
}
// Skip predicated vector operations, mask semantics prevent reassociation.
if (is_predicated_vector() || in2->as_Vector()->is_predicated_vector()) {
return nullptr;
}
Node* in2_1 = in2->in(1);
Node* in2_2 = in2->in(2);
if (in2_1->Opcode() == Op_Replicate) {
return create_reassociated_node(this, in2, in1, in2_1, in2_2, phase);
} else if (in2_2->Opcode() == Op_Replicate) {
return create_reassociated_node(this, in2, in1, in2_2, in2_1, phase);
}
return nullptr;
}
// Convert vector operation with all Replicate inputs to scalar operation using following
// ideal transformation.
//
// VectorOp (Replicate INP1, Replicate INP2) =>
// Replicate (ScalarOp INP1, INP2)
//
Node* VectorNode::push_through_replicate(PhaseGVN* phase) {
BasicType bt = vect_type()->element_basic_type();
if (!can_push_through_replicate(bt)) {
return nullptr;
}
assert(req() >= 2 && req() <= 4, "unexpected req() %u for %s", req(), NodeClassNames[Opcode()]);
Node* sinp1 = nullptr;
Node* sinp2 = nullptr;
Node* sinp3 = nullptr;
assert(in(1)->Opcode() == Op_Replicate, "");
sinp1 = in(1)->in(1);
if (req() > 2) {
assert(in(2)->Opcode() == Op_Replicate, "");
sinp2 = in(2)->in(1);
}
if (req() > 3) {
assert(in(3)->Opcode() == Op_Replicate, "");
sinp3 = in(3)->in(1);
}
Node* sop = make_scalar(phase->C, Opcode(), bt, in(0), sinp1, sinp2, sinp3);
if (sop == nullptr) {
return nullptr;
}
sop = phase->transform(sop);
return new ReplicateNode(sop, vect_type());
}
Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* n = ideal_partial_operations(phase, this, vect_type());
if (n != nullptr) {
@ -1044,7 +1387,13 @@ Node* VectorNode::Ideal(PhaseGVN* phase, bool can_reshape) {
if (should_swap_inputs_to_help_global_value_numbering()) {
swap_edges(1, 2);
}
return nullptr;
n = push_through_replicate(phase);
if (n != nullptr) {
return n;
}
return reassociate_vector_operation(phase);
}
// Traverses a chain of VectorMaskCast and returns the first non VectorMaskCast node.
@ -2094,7 +2443,7 @@ Node* FmaVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
swap_edges(1, 2);
return this;
}
return nullptr;
return VectorNode::Ideal(phase, can_reshape);
}
// Generate other vector nodes to implement the masked/non-masked vector negation.

View File

@ -146,12 +146,20 @@ class VectorNode : public TypeNode {
static bool is_minmax_opcode(int opc);
bool should_swap_inputs_to_help_global_value_numbering();
Node* reassociate_vector_operation(PhaseGVN* phase);
static Node* create_reassociated_node(Node* parent, Node* child, Node* cinput1, Node* cinput2,
Node* pinput2, PhaseGVN* phase);
static bool is_vshift_cnt_opcode(int opc);
static bool is_rotate_opcode(int opc);
static int opcode(int sopc, BasicType bt); // scalar_opc -> vector_opc
static int scalar_opcode(int vopc, BasicType bt); // vector_opc -> scalar_opc, 0 if not handled
static Node* make_scalar(Compile* c, int vopc, BasicType bt, Node* control, Node* in1, Node* in2, Node* in3);
bool can_push_through_replicate(BasicType bt);
Node* push_through_replicate(PhaseGVN* phase);
static int shift_count_opcode(int opc);

View File

@ -233,6 +233,11 @@ public class IRNode {
beforeMatchingNameRegex(ADD_P, "AddP");
}
public static final String ADD_D = PREFIX + "ADD_D" + POSTFIX;
static {
beforeMatchingNameRegex(ADD_D, "AddD");
}
public static final String ADD_VD = VECTOR_PREFIX + "ADD_VD" + POSTFIX;
static {
vectorNode(ADD_VD, "AddVD", TYPE_DOUBLE);
@ -763,11 +768,21 @@ public class IRNode {
vectorNode(DIV_VHF, "DivVHF", TYPE_SHORT);
}
public static final String DIV_F = PREFIX + "DIV_F" + POSTFIX;
static {
beforeMatchingNameRegex(DIV_F, "DivF");
}
public static final String DIV_VF = VECTOR_PREFIX + "DIV_VF" + POSTFIX;
static {
vectorNode(DIV_VF, "DivVF", TYPE_FLOAT);
}
public static final String DIV_D = PREFIX + "DIV_D" + POSTFIX;
static {
beforeMatchingNameRegex(DIV_D, "DivD");
}
public static final String DIV_VD = VECTOR_PREFIX + "DIV_VD" + POSTFIX;
static {
vectorNode(DIV_VD, "DivVD", TYPE_DOUBLE);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,605 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8358521
* @summary Test reassociation of broadcasted inputs across vector operations
* @modules jdk.incubator.vector
* @library /test/lib /
* @run driver compiler.vectorapi.TestVectorReassociations
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import jdk.incubator.vector.*;
import java.util.stream.IntStream;
/**
* Tests for the reassociation transform:
* VectorOp(broadcast(a), VectorOp(broadcast(b), array))
* => VectorOp(broadcast(ScalarOp(a, b)), array)
*/
public class TestVectorReassociations {
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
}
/* =======================
* INT
* ======================= */
static final VectorSpecies<Integer> ISP = IntVector.SPECIES_PREFERRED;
static int[] intIn = IntStream.range(0, IntVector.SPECIES_PREFERRED.length()).toArray();
static int[] intOut = new int[IntVector.SPECIES_PREFERRED.length()];
static int ia = 17, ib = 9;
// --- INT ADD ---
// bcast(a) ADD (bcast(b) ADD array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_add_reassociation_pattern1() {
IntVector.broadcast(ISP, ia)
.lanewise(VectorOperators.ADD,
IntVector.broadcast(ISP, ib)
.lanewise(VectorOperators.ADD,
IntVector.fromArray(ISP, intIn, 0)))
.intoArray(intOut, 0);
}
// bcast(a) ADD (array ADD bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_add_reassociation_pattern2() {
IntVector.broadcast(ISP, ia)
.lanewise(VectorOperators.ADD,
IntVector.fromArray(ISP, intIn, 0)
.lanewise(VectorOperators.ADD,
IntVector.broadcast(ISP, ib)))
.intoArray(intOut, 0);
}
// (bcast(a) ADD array) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_add_reassociation_pattern3() {
IntVector.broadcast(ISP, ia)
.lanewise(VectorOperators.ADD,
IntVector.fromArray(ISP, intIn, 0))
.lanewise(VectorOperators.ADD,
IntVector.broadcast(ISP, ib))
.intoArray(intOut, 0);
}
// (array ADD bcast(a)) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_add_reassociation_pattern4() {
IntVector.fromArray(ISP, intIn, 0)
.lanewise(VectorOperators.ADD,
IntVector.broadcast(ISP, ia))
.lanewise(VectorOperators.ADD,
IntVector.broadcast(ISP, ib))
.intoArray(intOut, 0);
}
// --- INT MUL ---
// bcast(a) MUL (bcast(b) MUL array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_mul_reassociation_pattern1() {
IntVector.broadcast(ISP, ia)
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ISP, ib)
.lanewise(VectorOperators.MUL,
IntVector.fromArray(ISP, intIn, 0)))
.intoArray(intOut, 0);
}
// bcast(a) MUL (array MUL bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_mul_reassociation_pattern2() {
IntVector.broadcast(ISP, ia)
.lanewise(VectorOperators.MUL,
IntVector.fromArray(ISP, intIn, 0)
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ISP, ib)))
.intoArray(intOut, 0);
}
// (bcast(a) MUL array) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_mul_reassociation_pattern3() {
IntVector.broadcast(ISP, ia)
.lanewise(VectorOperators.MUL,
IntVector.fromArray(ISP, intIn, 0))
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ISP, ib))
.intoArray(intOut, 0);
}
// (array MUL bcast(a)) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VI, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_I, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_int_mul_reassociation_pattern4() {
IntVector.fromArray(ISP, intIn, 0)
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ISP, ia))
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ISP, ib))
.intoArray(intOut, 0);
}
/* =======================
* LONG
* ======================= */
static final VectorSpecies<Long> LSP = LongVector.SPECIES_PREFERRED;
static long[] longIn;
static long[] longOut;
static long la = 17L, lb = 9L;
static {
longIn = new long[LSP.length()];
longOut = new long[LSP.length()];
for (int i = 0; i < LSP.length(); i++) {
longIn[i] = (long) i;
}
}
// --- LONG ADD ---
// bcast(a) ADD (bcast(b) ADD array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_add_reassociation_pattern1() {
LongVector.broadcast(LSP, la)
.lanewise(VectorOperators.ADD,
LongVector.broadcast(LSP, lb)
.lanewise(VectorOperators.ADD,
LongVector.fromArray(LSP, longIn, 0)))
.intoArray(longOut, 0);
}
// bcast(a) ADD (array ADD bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_add_reassociation_pattern2() {
LongVector.broadcast(LSP, la)
.lanewise(VectorOperators.ADD,
LongVector.fromArray(LSP, longIn, 0)
.lanewise(VectorOperators.ADD,
LongVector.broadcast(LSP, lb)))
.intoArray(longOut, 0);
}
// (bcast(a) ADD array) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_add_reassociation_pattern3() {
LongVector.broadcast(LSP, la)
.lanewise(VectorOperators.ADD,
LongVector.fromArray(LSP, longIn, 0))
.lanewise(VectorOperators.ADD,
LongVector.broadcast(LSP, lb))
.intoArray(longOut, 0);
}
// (array ADD bcast(a)) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_add_reassociation_pattern4() {
LongVector.fromArray(LSP, longIn, 0)
.lanewise(VectorOperators.ADD,
LongVector.broadcast(LSP, la))
.lanewise(VectorOperators.ADD,
LongVector.broadcast(LSP, lb))
.intoArray(longOut, 0);
}
// --- LONG MUL ---
// bcast(a) MUL (bcast(b) MUL array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_mul_reassociation_pattern1() {
LongVector.broadcast(LSP, la)
.lanewise(VectorOperators.MUL,
LongVector.broadcast(LSP, lb)
.lanewise(VectorOperators.MUL,
LongVector.fromArray(LSP, longIn, 0)))
.intoArray(longOut, 0);
}
// bcast(a) MUL (array MUL bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_mul_reassociation_pattern2() {
LongVector.broadcast(LSP, la)
.lanewise(VectorOperators.MUL,
LongVector.fromArray(LSP, longIn, 0)
.lanewise(VectorOperators.MUL,
LongVector.broadcast(LSP, lb)))
.intoArray(longOut, 0);
}
// (bcast(a) MUL array) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_mul_reassociation_pattern3() {
LongVector.broadcast(LSP, la)
.lanewise(VectorOperators.MUL,
LongVector.fromArray(LSP, longIn, 0))
.lanewise(VectorOperators.MUL,
LongVector.broadcast(LSP, lb))
.intoArray(longOut, 0);
}
// (array MUL bcast(a)) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VL, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_L, ">= 1",
IRNode.REPLICATE_L, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_long_mul_reassociation_pattern4() {
LongVector.fromArray(LSP, longIn, 0)
.lanewise(VectorOperators.MUL,
LongVector.broadcast(LSP, la))
.lanewise(VectorOperators.MUL,
LongVector.broadcast(LSP, lb))
.intoArray(longOut, 0);
}
/* =======================
* SHORT
* ======================= */
static final VectorSpecies<Short> SSP = ShortVector.SPECIES_PREFERRED;
static short[] shortIn;
static short[] shortOut;
static short sa = 17, sb = 9;
static {
shortIn = new short[SSP.length()];
shortOut = new short[SSP.length()];
for (int i = 0; i < SSP.length(); i++) {
shortIn[i] = (short) i;
}
}
// --- SHORT ADD ---
// bcast(a) ADD (bcast(b) ADD array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_add_reassociation_pattern1() {
ShortVector.broadcast(SSP, sa)
.lanewise(VectorOperators.ADD,
ShortVector.broadcast(SSP, sb)
.lanewise(VectorOperators.ADD,
ShortVector.fromArray(SSP, shortIn, 0)))
.intoArray(shortOut, 0);
}
// bcast(a) ADD (array ADD bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_add_reassociation_pattern2() {
ShortVector.broadcast(SSP, sa)
.lanewise(VectorOperators.ADD,
ShortVector.fromArray(SSP, shortIn, 0)
.lanewise(VectorOperators.ADD,
ShortVector.broadcast(SSP, sb)))
.intoArray(shortOut, 0);
}
// (bcast(a) ADD array) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_add_reassociation_pattern3() {
ShortVector.broadcast(SSP, sa)
.lanewise(VectorOperators.ADD,
ShortVector.fromArray(SSP, shortIn, 0))
.lanewise(VectorOperators.ADD,
ShortVector.broadcast(SSP, sb))
.intoArray(shortOut, 0);
}
// (array ADD bcast(a)) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_add_reassociation_pattern4() {
ShortVector.fromArray(SSP, shortIn, 0)
.lanewise(VectorOperators.ADD,
ShortVector.broadcast(SSP, sa))
.lanewise(VectorOperators.ADD,
ShortVector.broadcast(SSP, sb))
.intoArray(shortOut, 0);
}
// --- SHORT MUL ---
// bcast(a) MUL (bcast(b) MUL array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_mul_reassociation_pattern1() {
ShortVector.broadcast(SSP, sa)
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(SSP, sb)
.lanewise(VectorOperators.MUL,
ShortVector.fromArray(SSP, shortIn, 0)))
.intoArray(shortOut, 0);
}
// bcast(a) MUL (array MUL bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_mul_reassociation_pattern2() {
ShortVector.broadcast(SSP, sa)
.lanewise(VectorOperators.MUL,
ShortVector.fromArray(SSP, shortIn, 0)
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(SSP, sb)))
.intoArray(shortOut, 0);
}
// (bcast(a) MUL array) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_mul_reassociation_pattern3() {
ShortVector.broadcast(SSP, sa)
.lanewise(VectorOperators.MUL,
ShortVector.fromArray(SSP, shortIn, 0))
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(SSP, sb))
.intoArray(shortOut, 0);
}
// (array MUL bcast(a)) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VS, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_S, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_short_mul_reassociation_pattern4() {
ShortVector.fromArray(SSP, shortIn, 0)
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(SSP, sa))
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(SSP, sb))
.intoArray(shortOut, 0);
}
/* =======================
* BYTE
* ======================= */
static final VectorSpecies<Byte> BSP = ByteVector.SPECIES_PREFERRED;
static byte[] byteIn;
static byte[] byteOut;
static byte ba = 17, bb = 9;
static {
byteIn = new byte[BSP.length()];
byteOut = new byte[BSP.length()];
for (int i = 0; i < BSP.length(); i++) {
byteIn[i] = (byte) i;
}
}
// --- BYTE ADD ---
// bcast(a) ADD (bcast(b) ADD array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_add_reassociation_pattern1() {
ByteVector.broadcast(BSP, ba)
.lanewise(VectorOperators.ADD,
ByteVector.broadcast(BSP, bb)
.lanewise(VectorOperators.ADD,
ByteVector.fromArray(BSP, byteIn, 0)))
.intoArray(byteOut, 0);
}
// bcast(a) ADD (array ADD bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_add_reassociation_pattern2() {
ByteVector.broadcast(BSP, ba)
.lanewise(VectorOperators.ADD,
ByteVector.fromArray(BSP, byteIn, 0)
.lanewise(VectorOperators.ADD,
ByteVector.broadcast(BSP, bb)))
.intoArray(byteOut, 0);
}
// (bcast(a) ADD array) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_add_reassociation_pattern3() {
ByteVector.broadcast(BSP, ba)
.lanewise(VectorOperators.ADD,
ByteVector.fromArray(BSP, byteIn, 0))
.lanewise(VectorOperators.ADD,
ByteVector.broadcast(BSP, bb))
.intoArray(byteOut, 0);
}
// (array ADD bcast(a)) ADD bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.ADD_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.ADD_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_add_reassociation_pattern4() {
ByteVector.fromArray(BSP, byteIn, 0)
.lanewise(VectorOperators.ADD,
ByteVector.broadcast(BSP, ba))
.lanewise(VectorOperators.ADD,
ByteVector.broadcast(BSP, bb))
.intoArray(byteOut, 0);
}
// --- BYTE MUL ---
// bcast(a) MUL (bcast(b) MUL array)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_mul_reassociation_pattern1() {
ByteVector.broadcast(BSP, ba)
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(BSP, bb)
.lanewise(VectorOperators.MUL,
ByteVector.fromArray(BSP, byteIn, 0)))
.intoArray(byteOut, 0);
}
// bcast(a) MUL (array MUL bcast(b))
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_mul_reassociation_pattern2() {
ByteVector.broadcast(BSP, ba)
.lanewise(VectorOperators.MUL,
ByteVector.fromArray(BSP, byteIn, 0)
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(BSP, bb)))
.intoArray(byteOut, 0);
}
// (bcast(a) MUL array) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_mul_reassociation_pattern3() {
ByteVector.broadcast(BSP, ba)
.lanewise(VectorOperators.MUL,
ByteVector.fromArray(BSP, byteIn, 0))
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(BSP, bb))
.intoArray(byteOut, 0);
}
// (array MUL bcast(a)) MUL bcast(b)
@Test
@IR(applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"},
counts = { IRNode.MUL_VB, IRNode.VECTOR_SIZE_ANY, " 1 ", IRNode.MUL_I, ">= 1",
IRNode.REPLICATE_B, IRNode.VECTOR_SIZE_ANY, ">= 1" })
@Warmup(value = 10000)
static void test_byte_mul_reassociation_pattern4() {
ByteVector.fromArray(BSP, byteIn, 0)
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(BSP, ba))
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(BSP, bb))
.intoArray(byteOut, 0);
}
}

View File

@ -0,0 +1,239 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
package org.openjdk.bench.jdk.incubator.vector;
import java.util.concurrent.TimeUnit;
import java.util.Random;
import jdk.incubator.vector.*;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@Fork(jvmArgs = {"--add-modules=jdk.incubator.vector"})
public class VectorReassociateBenchmark {
@Param({"1024", "2048"})
int size;
int [] intIn1;
int [] intOut;
long [] longIn1;
long [] longOut;
short [] shortIn1;
short [] shortOut;
byte [] byteIn1;
byte [] byteOut;
static final VectorSpecies<Float> fspecies = FloatVector.SPECIES_PREFERRED;
static final VectorSpecies<Double> dspecies = DoubleVector.SPECIES_PREFERRED;
static final VectorSpecies<Integer> ispecies = IntVector.SPECIES_PREFERRED;
static final VectorSpecies<Long> lspecies = LongVector.SPECIES_PREFERRED;
static final VectorSpecies<Short> sspecies = ShortVector.SPECIES_PREFERRED;
static final VectorSpecies<Byte> bspecies = ByteVector.SPECIES_PREFERRED;
@Setup(Level.Trial)
public void BmSetup() {
Random r = new Random(2048);
intIn1 = new int[size];
intOut = new int[size];
longIn1 = new long[size];
longOut = new long[size];
shortIn1 = new short[size];
shortOut = new short[size];
byteIn1 = new byte[size];
byteOut = new byte[size];
for (int i = 4; i < size; i++) {
intIn1[i] = r.nextInt();
longIn1[i] = r.nextLong();
shortIn1[i] = (short) r.nextInt();
byteIn1[i] = (byte) r.nextInt();
}
}
@Benchmark
public float pushBroadcastsAcrossVectorKernel1() {
FloatVector res = FloatVector.broadcast(fspecies, 0.0f);
for (int i = 0; i < size; i++) {
FloatVector vec1 = FloatVector.broadcast(fspecies, (float)i);
FloatVector vec2 = FloatVector.broadcast(fspecies, (float)i + 1);
FloatVector vec3 = FloatVector.broadcast(fspecies, (float)i + 2);
res = res.lanewise(VectorOperators.ADD, vec1.lanewise(VectorOperators.FMA, vec2, vec3));
}
return res.lane(0);
}
@Benchmark
public double pushBroadcastsAcrossVectorKernel2() {
DoubleVector res = DoubleVector.broadcast(dspecies, 0.0f);
for (int i = 0; i < size; i++) {
DoubleVector vec1 = DoubleVector.broadcast(dspecies, (double)i);
res = res.lanewise(VectorOperators.ADD, vec1.lanewise(VectorOperators.SQRT));
}
return res.lane(0);
}
// int: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
@Benchmark
public void reassociateIntMulChainedBroadcasts() {
for (int i = 0; i < ispecies.loopBound(size); i += ispecies.length()) {
IntVector.broadcast(ispecies, i)
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ispecies, i + 1)
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ispecies, i + 2)
.lanewise(VectorOperators.MUL,
IntVector.fromArray(ispecies, intIn1, i))))
.intoArray(intOut, i);
}
}
// int: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
@Benchmark
public void reassociateIntMulBalancedBroadcasts() {
for (int i = 0; i < ispecies.loopBound(size); i += ispecies.length()) {
IntVector left =
IntVector.broadcast(ispecies, i)
.lanewise(VectorOperators.MUL,
IntVector.broadcast(ispecies, i + 1));
IntVector right =
IntVector.broadcast(ispecies, i + 2)
.lanewise(VectorOperators.MUL,
IntVector.fromArray(ispecies, intIn1, i));
left.lanewise(VectorOperators.MUL, right)
.intoArray(intOut, i);
}
}
// long: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
@Benchmark
public void reassociateLongMulChainedBroadcasts() {
for (int i = 0; i < lspecies.loopBound(size); i += lspecies.length()) {
LongVector.broadcast(lspecies, (long) i)
.lanewise(VectorOperators.MUL,
LongVector.broadcast(lspecies, (long) (i + 1))
.lanewise(VectorOperators.MUL,
LongVector.broadcast(lspecies, (long) (i + 2))
.lanewise(VectorOperators.MUL,
LongVector.fromArray(lspecies, longIn1, i))))
.intoArray(longOut, i);
}
}
// long: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
@Benchmark
public void reassociateLongMulBalancedBroadcasts() {
for (int i = 0; i < lspecies.loopBound(size); i += lspecies.length()) {
LongVector left =
LongVector.broadcast(lspecies, (long) i)
.lanewise(VectorOperators.MUL,
LongVector.broadcast(lspecies, (long) (i + 1)));
LongVector right =
LongVector.broadcast(lspecies, (long) (i + 2))
.lanewise(VectorOperators.MUL,
LongVector.fromArray(lspecies, longIn1, i));
left.lanewise(VectorOperators.MUL, right)
.intoArray(longOut, i);
}
}
// short: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
@Benchmark
public void reassociateShortMulChainedBroadcasts() {
for (int i = 0; i < sspecies.loopBound(size); i += sspecies.length()) {
ShortVector.broadcast(sspecies, (short) i)
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(sspecies, (short) (i + 1))
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(sspecies, (short) (i + 2))
.lanewise(VectorOperators.MUL,
ShortVector.fromArray(sspecies, shortIn1, i))))
.intoArray(shortOut, i);
}
}
// short: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
@Benchmark
public void reassociateShortMulBalancedBroadcasts() {
for (int i = 0; i < sspecies.loopBound(size); i += sspecies.length()) {
ShortVector left =
ShortVector.broadcast(sspecies, (short) i)
.lanewise(VectorOperators.MUL,
ShortVector.broadcast(sspecies, (short) (i + 1)));
ShortVector right =
ShortVector.broadcast(sspecies, (short) (i + 2))
.lanewise(VectorOperators.MUL,
ShortVector.fromArray(sspecies, shortIn1, i));
left.lanewise(VectorOperators.MUL, right)
.intoArray(shortOut, i);
}
}
// byte: bcast(a) MUL (bcast(b) MUL (bcast(c) MUL array))
@Benchmark
public void reassociateByteMulChainedBroadcasts() {
for (int i = 0; i < bspecies.loopBound(size); i += bspecies.length()) {
ByteVector.broadcast(bspecies, (byte) i)
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(bspecies, (byte) (i + 1))
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(bspecies, (byte) (i + 2))
.lanewise(VectorOperators.MUL,
ByteVector.fromArray(bspecies, byteIn1, i))))
.intoArray(byteOut, i);
}
}
// byte: (bcast(a) MUL bcast(b)) MUL (bcast(c) MUL array)
@Benchmark
public void reassociateByteMulBalancedBroadcasts() {
for (int i = 0; i < bspecies.loopBound(size); i += bspecies.length()) {
ByteVector left =
ByteVector.broadcast(bspecies, (byte) i)
.lanewise(VectorOperators.MUL,
ByteVector.broadcast(bspecies, (byte) (i + 1)));
ByteVector right =
ByteVector.broadcast(bspecies, (byte) (i + 2))
.lanewise(VectorOperators.MUL,
ByteVector.fromArray(bspecies, byteIn1, i));
left.lanewise(VectorOperators.MUL, right)
.intoArray(byteOut, i);
}
}
}