mirror of
https://github.com/openjdk/jdk.git
synced 2026-07-02 15:20:27 +00:00
8382052: VectorAPI: Optimize the lanewise BITWISE_BLEND for AArch64
Reviewed-by: xgong, epeter, aph
This commit is contained in:
parent
2d65ea61d9
commit
5fbce068bd
@ -317,6 +317,13 @@ source %{
|
||||
return false; // NEON only, since SLI/USHR are not available in SVE
|
||||
}
|
||||
break;
|
||||
case Op_VectorBitwiseBlend:
|
||||
// Use NEON BSL when UseSVE < 2; SVE1 has no BSL so larger vectors are
|
||||
// not supported on UseSVE == 1 machines.
|
||||
if (UseSVE < 2 && length_in_bytes > 16) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -340,6 +347,7 @@ source %{
|
||||
case Op_MulReductionVL:
|
||||
case Op_CompressBitsV:
|
||||
case Op_ExpandBitsV:
|
||||
case Op_VectorBitwiseBlend:
|
||||
return false;
|
||||
case Op_SaturatingAddV:
|
||||
case Op_SaturatingSubV:
|
||||
@ -7051,6 +7059,31 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Vector bitwise blend -------------------------
|
||||
|
||||
instruct vbitwise_blend_neon_sve1(vReg src1, vReg src2, vReg dst_src3) %{
|
||||
predicate(UseSVE < 2 &&
|
||||
VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
|
||||
match(Set dst_src3 (VectorBitwiseBlend (Binary src1 src2) dst_src3));
|
||||
format %{ "vbitwise_blend_neon_sve1 $src1, $src2, $dst_src3" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
Assembler::SIMD_Arrangement T = length_in_bytes == 16 ? __ T16B : __ T8B;
|
||||
__ bsl($dst_src3$$FloatRegister, T, $src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vbitwise_blend_sve2(vReg src1, vReg dst_src2, vReg src3) %{
|
||||
predicate(UseSVE == 2);
|
||||
match(Set dst_src2 (VectorBitwiseBlend (Binary src1 dst_src2) src3));
|
||||
format %{ "vbitwise_blend_sve2 $src1, $dst_src2, $src3" %}
|
||||
ins_encode %{
|
||||
__ sve_bsl($dst_src2$$FloatRegister, $src1$$FloatRegister, $src3$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Vector round ---------------------------------
|
||||
|
||||
// vector Math.round
|
||||
|
||||
@ -307,6 +307,13 @@ source %{
|
||||
return false; // NEON only, since SLI/USHR are not available in SVE
|
||||
}
|
||||
break;
|
||||
case Op_VectorBitwiseBlend:
|
||||
// Use NEON BSL when UseSVE < 2; SVE1 has no BSL so larger vectors are
|
||||
// not supported on UseSVE == 1 machines.
|
||||
if (UseSVE < 2 && length_in_bytes > 16) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -330,6 +337,7 @@ source %{
|
||||
case Op_MulReductionVL:
|
||||
case Op_CompressBitsV:
|
||||
case Op_ExpandBitsV:
|
||||
case Op_VectorBitwiseBlend:
|
||||
return false;
|
||||
case Op_SaturatingAddV:
|
||||
case Op_SaturatingSubV:
|
||||
@ -4754,6 +4762,31 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Vector bitwise blend -------------------------
|
||||
|
||||
instruct vbitwise_blend_neon_sve1(vReg src1, vReg src2, vReg dst_src3) %{
|
||||
predicate(UseSVE < 2 &&
|
||||
VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)));
|
||||
match(Set dst_src3 (VectorBitwiseBlend (Binary src1 src2) dst_src3));
|
||||
format %{ "vbitwise_blend_neon_sve1 $src1, $src2, $dst_src3" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
Assembler::SIMD_Arrangement T = length_in_bytes == 16 ? __ T16B : __ T8B;
|
||||
__ bsl($dst_src3$$FloatRegister, T, $src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vbitwise_blend_sve2(vReg src1, vReg dst_src2, vReg src3) %{
|
||||
predicate(UseSVE == 2);
|
||||
match(Set dst_src2 (VectorBitwiseBlend (Binary src1 dst_src2) src3));
|
||||
format %{ "vbitwise_blend_sve2 $src1, $dst_src2, $src3" %}
|
||||
ins_encode %{
|
||||
__ sve_bsl($dst_src2$$FloatRegister, $src1$$FloatRegister, $src3$$FloatRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ------------------------------ Vector round ---------------------------------
|
||||
|
||||
// vector Math.round
|
||||
|
||||
@ -4292,14 +4292,15 @@ public:
|
||||
#undef INSN
|
||||
|
||||
// SVE2 bitwise ternary operations
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Zdn, FloatRegister Zm, FloatRegister Zk) { \
|
||||
starti; \
|
||||
f(0b00000100, 31, 24), f(opc, 23, 21), rf(Zm, 16); \
|
||||
f(0b001110, 15, 10), rf(Zk, 5), rf(Zdn, 0); \
|
||||
#define INSN(NAME, op1, op2) \
|
||||
void NAME(FloatRegister Zdn, FloatRegister Zm, FloatRegister Zk) { \
|
||||
starti; \
|
||||
f(0b00000100, 31, 24), f(op1, 23, 21), rf(Zm, 16); \
|
||||
f(0b00111, 15, 11), f(op2, 10), rf(Zk, 5), rf(Zdn, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_eor3, 0b001); // Bitwise exclusive OR of three vectors
|
||||
INSN(sve_eor3, 0b001, 0b0); // Bitwise exclusive OR of three vectors
|
||||
INSN(sve_bsl, 0b001, 0b1); // Bitwise select
|
||||
#undef INSN
|
||||
|
||||
// SVE2 saturating operations - predicate
|
||||
|
||||
@ -1843,13 +1843,19 @@ public:
|
||||
|
||||
#undef SVE_DESTRUCTIVE_TERNARY_INS
|
||||
|
||||
using Assembler::sve_eor3;
|
||||
void sve_eor3(FloatRegister Zd, FloatRegister Zm, FloatRegister Zk) {
|
||||
if (Zd != Zm && Zd != Zk) {
|
||||
try_to_replace_prev_vector_copy_with_movprfx(Zd);
|
||||
}
|
||||
Assembler::sve_eor3(Zd, Zm, Zk);
|
||||
#define SVE_DESTRUCTIVE_TERNARY_UNPRED_INS(NAME) \
|
||||
using Assembler::NAME; \
|
||||
void NAME(FloatRegister Zd, FloatRegister Zm, FloatRegister Zk) { \
|
||||
if (Zd != Zm && Zd != Zk) { \
|
||||
try_to_replace_prev_vector_copy_with_movprfx(Zd); \
|
||||
} \
|
||||
Assembler::NAME(Zd, Zm, Zk); \
|
||||
}
|
||||
|
||||
SVE_DESTRUCTIVE_TERNARY_UNPRED_INS(sve_bsl);
|
||||
SVE_DESTRUCTIVE_TERNARY_UNPRED_INS(sve_eor3);
|
||||
|
||||
#undef SVE_DESTRUCTIVE_TERNARY_UNPRED_INS
|
||||
};
|
||||
|
||||
#ifdef ASSERT
|
||||
|
||||
@ -512,6 +512,7 @@ macro(VectorMaskWrapper)
|
||||
macro(VectorMaskCmp)
|
||||
macro(VectorMaskCast)
|
||||
macro(VectorTest)
|
||||
macro(VectorBitwiseBlend)
|
||||
macro(VectorBlend)
|
||||
macro(VectorRearrange)
|
||||
macro(VectorLoadMask)
|
||||
|
||||
@ -2385,7 +2385,8 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
|
||||
break;
|
||||
}
|
||||
case Op_VectorBlend:
|
||||
case Op_VectorInsert: {
|
||||
case Op_VectorInsert:
|
||||
case Op_VectorBitwiseBlend: {
|
||||
Node* pair = new BinaryNode(n->in(1), n->in(2));
|
||||
n->set_req(1, pair);
|
||||
n->set_req(2, n->in(3));
|
||||
|
||||
@ -874,6 +874,7 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, Node* n3, const TypeV
|
||||
case Op_SignumVD: return new SignumVDNode(n1, n2, n3, vt);
|
||||
case Op_SignumVF: return new SignumVFNode(n1, n2, n3, vt);
|
||||
case Op_VectorBlend: return new VectorBlendNode(n1, n2, n3);
|
||||
case Op_VectorBitwiseBlend: return new VectorBitwiseBlendNode(n1, n2, n3, vt);
|
||||
default:
|
||||
fatal("Missed vector creation for '%s'", NodeClassNames[vopc]);
|
||||
return nullptr;
|
||||
@ -2768,6 +2769,70 @@ Node* XorVNode::Ideal_XorV_VectorMaskCmp(PhaseGVN* phase, bool can_reshape) {
|
||||
return res;
|
||||
}
|
||||
|
||||
// XorV(a, AndV(sel, XorV(a, b))) => VectorBitwiseBlend(a, b, sel)
|
||||
// XorV(a, AndV(sel, XorV(a, b)), mask) =>
|
||||
// VectorBlend(a, VectorBitwiseBlend(a, b, sel), mask)
|
||||
Node* XorVNode::Ideal_XorV_to_VectorBitwiseBlend(PhaseGVN* phase, bool can_reshape) {
|
||||
const TypeVect* vt = vect_type();
|
||||
BasicType bt = vt->element_basic_type();
|
||||
uint vlen = vt->length();
|
||||
if (!Matcher::match_rule_supported_vector(Op_VectorBitwiseBlend, vlen, bt)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool is_masked = is_predicated_vector();
|
||||
if (is_masked &&
|
||||
!Matcher::match_rule_supported_vector(Op_VectorBlend, vlen, bt)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// For the predicated case in(1) is fixed as the merge source. Otherwise the
|
||||
// outer XorV is commutative.
|
||||
Node* a = nullptr;
|
||||
Node* andv = nullptr;
|
||||
if (is_masked || in(2)->Opcode() == Op_AndV) {
|
||||
andv = in(2);
|
||||
a = in(1);
|
||||
} else {
|
||||
andv = in(1);
|
||||
a = in(2);
|
||||
}
|
||||
if (andv->Opcode() != Op_AndV || andv->is_predicated_vector()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* sel = nullptr;
|
||||
Node* inner_xor = nullptr;
|
||||
if (andv->in(2)->Opcode() == Op_XorV) {
|
||||
inner_xor = andv->in(2);
|
||||
sel = andv->in(1);
|
||||
} else if (andv->in(1)->Opcode() == Op_XorV) {
|
||||
inner_xor = andv->in(1);
|
||||
sel = andv->in(2);
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
if (inner_xor->is_predicated_vector()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* b = nullptr;
|
||||
if (inner_xor->in(1) == a) {
|
||||
b = inner_xor->in(2);
|
||||
} else if (inner_xor->in(2) == a) {
|
||||
b = inner_xor->in(1);
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Node* blend = new VectorBitwiseBlendNode(a, b, sel, vt);
|
||||
if (!is_masked) {
|
||||
return blend;
|
||||
}
|
||||
blend = phase->transform(blend);
|
||||
return new VectorBlendNode(a, blend, in(3));
|
||||
}
|
||||
|
||||
Node* XorVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
// (XorV src src) => (Replicate zero)
|
||||
// (XorVMask src src) => (MaskAll zero)
|
||||
@ -2786,6 +2851,11 @@ Node* XorVNode::Ideal(PhaseGVN* phase, bool can_reshape) {
|
||||
if (res != nullptr) {
|
||||
return res;
|
||||
}
|
||||
|
||||
res = Ideal_XorV_to_VectorBitwiseBlend(phase, can_reshape);
|
||||
if (res != nullptr) {
|
||||
return res;
|
||||
}
|
||||
return VectorNode::Ideal(phase, can_reshape);
|
||||
}
|
||||
|
||||
|
||||
@ -1075,6 +1075,7 @@ class XorVNode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
|
||||
Node* Ideal_XorV_VectorMaskCmp(PhaseGVN* phase, bool can_reshape);
|
||||
Node* Ideal_XorV_to_VectorBitwiseBlend(PhaseGVN* phase, bool can_reshape);
|
||||
};
|
||||
|
||||
// Vector xor byte, short, int, long as a reduction
|
||||
@ -1802,6 +1803,24 @@ class VectorBlendNode : public VectorNode {
|
||||
Node* vec_mask() const { return in(3); }
|
||||
};
|
||||
|
||||
// Vector bitwise blend (bit-select): (sel & vec_true) | (~sel & vec_false).
|
||||
class VectorBitwiseBlendNode : public VectorNode {
|
||||
public:
|
||||
VectorBitwiseBlendNode(Node* vec_false, Node* vec_true, Node* sel, const TypeVect* vt)
|
||||
: VectorNode(vec_false, vec_true, sel, vt) {
|
||||
assert(vec_false->bottom_type()->isa_vect() != nullptr &&
|
||||
vec_true->bottom_type()->isa_vect() != nullptr &&
|
||||
sel->bottom_type()->isa_vect() != nullptr,
|
||||
"inputs must all be vectors");
|
||||
uint vlen = vt->length();
|
||||
assert(vec_false->bottom_type()->is_vect()->length() == vlen &&
|
||||
vec_true->bottom_type()->is_vect()->length() == vlen &&
|
||||
sel->bottom_type()->is_vect()->length() == vlen,
|
||||
"mismatched vector length");
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
// Rearrange lane elements from a source vector under the control of a shuffle
|
||||
// (indexes) vector. Each lane in the shuffle vector specifies which lane from
|
||||
// the source vector to select for the corresponding output lane. All indexes
|
||||
|
||||
@ -1121,7 +1121,7 @@ class SVEVectorOp(Instruction):
|
||||
self._bitwiseop = False
|
||||
if name[0] == 'f':
|
||||
self._width = RegVariant(2, 3)
|
||||
elif not self._isPredicated and (name in ["and", "eor", "orr", "bic", "eor3"]):
|
||||
elif not self._isPredicated and (name in ["and", "bic", "bsl", "eor", "eor3", "orr"]):
|
||||
self._width = RegVariant(3, 3)
|
||||
self._bitwiseop = True
|
||||
elif name == "revb":
|
||||
@ -1150,7 +1150,7 @@ class SVEVectorOp(Instruction):
|
||||
width +
|
||||
[str(self.reg[i]) for i in range(1, self.numRegs)]))
|
||||
def astr(self):
|
||||
firstArg = 0 if self._name == "eor3" else 1
|
||||
firstArg = 0 if self._name in ["bsl", "eor3"] else 1
|
||||
formatStr = "%s%s" + ''.join([", %s" for i in range(firstArg, self.numRegs)])
|
||||
if self._dnm == 'dn':
|
||||
formatStr += ", %s"
|
||||
@ -2258,6 +2258,7 @@ generate(SVEVectorOp, [["add", "ZZZ"],
|
||||
# SVE2 instructions
|
||||
["bext", "ZZZ"],
|
||||
["bdep", "ZZZ"],
|
||||
["bsl", "ZZZ"],
|
||||
["eor3", "ZZZ"],
|
||||
["sqadd", "ZPZ", "m", "dn"],
|
||||
["sqsub", "ZPZ", "m", "dn"],
|
||||
|
||||
@ -1419,38 +1419,39 @@
|
||||
__ sve_fabd(z14, __ S, p5, z22); // fabd z14.s, p5/m, z14.s, z22.s
|
||||
__ sve_bext(z5, __ H, z18, z0); // bext z5.h, z18.h, z0.h
|
||||
__ sve_bdep(z9, __ D, z2, z3); // bdep z9.d, z2.d, z3.d
|
||||
__ sve_eor3(z14, z4, z29); // eor3 z14.d, z14.d, z4.d, z29.d
|
||||
__ sve_sqadd(z14, __ D, p5, z4); // sqadd z14.d, p5/m, z14.d, z4.d
|
||||
__ sve_sqsub(z27, __ S, p3, z22); // sqsub z27.s, p3/m, z27.s, z22.s
|
||||
__ sve_uqadd(z31, __ S, p6, z11); // uqadd z31.s, p6/m, z31.s, z11.s
|
||||
__ sve_uqsub(z12, __ B, p4, z28); // uqsub z12.b, p4/m, z12.b, z28.b
|
||||
__ sve_bsl(z14, z4, z29); // bsl z14.d, z14.d, z4.d, z29.d
|
||||
__ sve_eor3(z14, z22, z4); // eor3 z14.d, z14.d, z22.d, z4.d
|
||||
__ sve_sqadd(z27, __ S, p3, z22); // sqadd z27.s, p3/m, z27.s, z22.s
|
||||
__ sve_sqsub(z31, __ S, p6, z11); // sqsub z31.s, p6/m, z31.s, z11.s
|
||||
__ sve_uqadd(z12, __ B, p4, z28); // uqadd z12.b, p4/m, z12.b, z28.b
|
||||
__ sve_uqsub(z28, __ D, p4, z4); // uqsub z28.d, p4/m, z28.d, z4.d
|
||||
|
||||
// SVEReductionOp
|
||||
__ sve_andv(v28, __ D, p4, z4); // andv d28, p4, z4.d
|
||||
__ sve_orv(v6, __ S, p0, z15); // orv s6, p0, z15.s
|
||||
__ sve_eorv(v1, __ S, p5, z18); // eorv s1, p5, z18.s
|
||||
__ sve_smaxv(v2, __ H, p2, z4); // smaxv h2, p2, z4.h
|
||||
__ sve_sminv(v11, __ S, p2, z28); // sminv s11, p2, z28.s
|
||||
__ sve_umaxv(v3, __ H, p5, z31); // umaxv h3, p5, z31.h
|
||||
__ sve_uminv(v24, __ H, p5, z15); // uminv h24, p5, z15.h
|
||||
__ sve_fminv(v6, __ S, p3, z8); // fminv s6, p3, z8.s
|
||||
__ sve_fmaxv(v21, __ D, p7, z4); // fmaxv d21, p7, z4.d
|
||||
__ sve_fadda(v24, __ S, p5, z6); // fadda s24, p5, s24, z6.s
|
||||
__ sve_uaddv(v4, __ D, p2, z9); // uaddv d4, p2, z9.d
|
||||
__ sve_andv(v6, __ S, p0, z15); // andv s6, p0, z15.s
|
||||
__ sve_orv(v1, __ S, p5, z18); // orv s1, p5, z18.s
|
||||
__ sve_eorv(v2, __ H, p2, z4); // eorv h2, p2, z4.h
|
||||
__ sve_smaxv(v11, __ S, p2, z28); // smaxv s11, p2, z28.s
|
||||
__ sve_sminv(v3, __ H, p5, z31); // sminv h3, p5, z31.h
|
||||
__ sve_umaxv(v24, __ H, p5, z15); // umaxv h24, p5, z15.h
|
||||
__ sve_uminv(v6, __ H, p3, z8); // uminv h6, p3, z8.h
|
||||
__ sve_fminv(v21, __ D, p7, z4); // fminv d21, p7, z4.d
|
||||
__ sve_fmaxv(v24, __ S, p5, z6); // fmaxv s24, p5, z6.s
|
||||
__ sve_fadda(v4, __ D, p2, z9); // fadda d4, p2, d4, z9.d
|
||||
__ sve_uaddv(v10, __ S, p1, z31); // uaddv d10, p1, z31.s
|
||||
|
||||
// AddWideNEONOp
|
||||
__ saddwv(v10, v11, __ T8H, v12, __ T8B); // saddw v10.8H, v11.8H, v12.8B
|
||||
__ saddwv2(v5, v6, __ T8H, v7, __ T16B); // saddw2 v5.8H, v6.8H, v7.16B
|
||||
__ saddwv(v31, v0, __ T4S, v1, __ T4H); // saddw v31.4S, v0.4S, v1.4H
|
||||
__ saddwv2(v22, v23, __ T4S, v24, __ T8H); // saddw2 v22.4S, v23.4S, v24.8H
|
||||
__ saddwv(v25, v26, __ T2D, v27, __ T2S); // saddw v25.2D, v26.2D, v27.2S
|
||||
__ saddwv2(v15, v16, __ T2D, v17, __ T4S); // saddw2 v15.2D, v16.2D, v17.4S
|
||||
__ uaddwv(v3, v4, __ T8H, v5, __ T8B); // uaddw v3.8H, v4.8H, v5.8B
|
||||
__ uaddwv2(v18, v19, __ T8H, v20, __ T16B); // uaddw2 v18.8H, v19.8H, v20.16B
|
||||
__ uaddwv(v14, v15, __ T4S, v16, __ T4H); // uaddw v14.4S, v15.4S, v16.4H
|
||||
__ uaddwv2(v10, v11, __ T4S, v12, __ T8H); // uaddw2 v10.4S, v11.4S, v12.8H
|
||||
__ uaddwv(v2, v3, __ T2D, v4, __ T2S); // uaddw v2.2D, v3.2D, v4.2S
|
||||
__ uaddwv2(v10, v11, __ T2D, v12, __ T4S); // uaddw2 v10.2D, v11.2D, v12.4S
|
||||
__ saddwv(v25, v26, __ T8H, v27, __ T8B); // saddw v25.8H, v26.8H, v27.8B
|
||||
__ saddwv2(v15, v16, __ T8H, v17, __ T16B); // saddw2 v15.8H, v16.8H, v17.16B
|
||||
__ saddwv(v3, v4, __ T4S, v5, __ T4H); // saddw v3.4S, v4.4S, v5.4H
|
||||
__ saddwv2(v18, v19, __ T4S, v20, __ T8H); // saddw2 v18.4S, v19.4S, v20.8H
|
||||
__ saddwv(v14, v15, __ T2D, v16, __ T2S); // saddw v14.2D, v15.2D, v16.2S
|
||||
__ saddwv2(v10, v11, __ T2D, v12, __ T4S); // saddw2 v10.2D, v11.2D, v12.4S
|
||||
__ uaddwv(v2, v3, __ T8H, v4, __ T8B); // uaddw v2.8H, v3.8H, v4.8B
|
||||
__ uaddwv2(v10, v11, __ T8H, v12, __ T16B); // uaddw2 v10.8H, v11.8H, v12.16B
|
||||
__ uaddwv(v8, v9, __ T4S, v10, __ T4H); // uaddw v8.4S, v9.4S, v10.4H
|
||||
__ uaddwv2(v11, v12, __ T4S, v13, __ T8H); // uaddw2 v11.4S, v12.4S, v13.8H
|
||||
__ uaddwv(v22, v23, __ T2D, v24, __ T2S); // uaddw v22.2D, v23.2D, v24.2S
|
||||
__ uaddwv2(v3, v4, __ T2D, v5, __ T4S); // uaddw2 v3.2D, v4.2D, v5.4S
|
||||
|
||||
__ bind(forth);
|
||||
|
||||
@ -1469,30 +1470,30 @@
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140004cb, 0x94000000,
|
||||
0x97ffffd4, 0x940004c8, 0x3400000a, 0x34fffa2a,
|
||||
0x340098aa, 0x35000008, 0x35fff9c8, 0x35009848,
|
||||
0xb400000b, 0xb4fff96b, 0xb40097eb, 0xb500001d,
|
||||
0xb5fff91d, 0xb500979d, 0x10000013, 0x10fff8b3,
|
||||
0x10009733, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x363096b6, 0x3758000c, 0x375ff7cc, 0x3758964c,
|
||||
0x14000000, 0x17ffffd7, 0x140004cc, 0x94000000,
|
||||
0x97ffffd4, 0x940004c9, 0x3400000a, 0x34fffa2a,
|
||||
0x340098ca, 0x35000008, 0x35fff9c8, 0x35009868,
|
||||
0xb400000b, 0xb4fff96b, 0xb400980b, 0xb500001d,
|
||||
0xb5fff91d, 0xb50097bd, 0x10000013, 0x10fff8b3,
|
||||
0x10009753, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x363096d6, 0x3758000c, 0x375ff7cc, 0x3758966c,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x54009420, 0x54000001, 0x54fff541, 0x540093c1,
|
||||
0x54000002, 0x54fff4e2, 0x54009362, 0x54000002,
|
||||
0x54fff482, 0x54009302, 0x54000003, 0x54fff423,
|
||||
0x540092a3, 0x54000003, 0x54fff3c3, 0x54009243,
|
||||
0x54000004, 0x54fff364, 0x540091e4, 0x54000005,
|
||||
0x54fff305, 0x54009185, 0x54000006, 0x54fff2a6,
|
||||
0x54009126, 0x54000007, 0x54fff247, 0x540090c7,
|
||||
0x54000008, 0x54fff1e8, 0x54009068, 0x54000009,
|
||||
0x54fff189, 0x54009009, 0x5400000a, 0x54fff12a,
|
||||
0x54008faa, 0x5400000b, 0x54fff0cb, 0x54008f4b,
|
||||
0x5400000c, 0x54fff06c, 0x54008eec, 0x5400000d,
|
||||
0x54fff00d, 0x54008e8d, 0x5400000e, 0x54ffefae,
|
||||
0x54008e2e, 0x5400000f, 0x54ffef4f, 0x54008dcf,
|
||||
0x54009440, 0x54000001, 0x54fff541, 0x540093e1,
|
||||
0x54000002, 0x54fff4e2, 0x54009382, 0x54000002,
|
||||
0x54fff482, 0x54009322, 0x54000003, 0x54fff423,
|
||||
0x540092c3, 0x54000003, 0x54fff3c3, 0x54009263,
|
||||
0x54000004, 0x54fff364, 0x54009204, 0x54000005,
|
||||
0x54fff305, 0x540091a5, 0x54000006, 0x54fff2a6,
|
||||
0x54009146, 0x54000007, 0x54fff247, 0x540090e7,
|
||||
0x54000008, 0x54fff1e8, 0x54009088, 0x54000009,
|
||||
0x54fff189, 0x54009029, 0x5400000a, 0x54fff12a,
|
||||
0x54008fca, 0x5400000b, 0x54fff0cb, 0x54008f6b,
|
||||
0x5400000c, 0x54fff06c, 0x54008f0c, 0x5400000d,
|
||||
0x54fff00d, 0x54008ead, 0x5400000e, 0x54ffefae,
|
||||
0x54008e4e, 0x5400000f, 0x54ffef4f, 0x54008def,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd503203f, 0xd503205f,
|
||||
0xd503209f, 0xd50320bf, 0xd503219f, 0xd50323bf,
|
||||
@ -1535,7 +1536,7 @@
|
||||
0x39598921, 0x795d3077, 0x399d0675, 0x7998d8f3,
|
||||
0x79dbd02a, 0xb99d068a, 0xfd5d11a0, 0xbd58d76b,
|
||||
0xfd1ac72d, 0xbd1d9c14, 0x5800001a, 0x18ffda33,
|
||||
0xf8991100, 0xd8007880, 0xf8a758e0, 0xf9989d80,
|
||||
0xf8991100, 0xd80078a0, 0xf8a758e0, 0xf9989d80,
|
||||
0x1a0b0298, 0x3a1c01a0, 0x5a0400ea, 0x7a02020f,
|
||||
0x9a1d028c, 0xba0e01ad, 0xda140186, 0xfa19022c,
|
||||
0x0b2b877e, 0x2b21c8ee, 0xcb3ba47d, 0x6b3ae9a0,
|
||||
@ -1769,13 +1770,13 @@
|
||||
0x65b45aff, 0x65e07fa2, 0x04454097, 0x044d6e3c,
|
||||
0x04283148, 0x04bd3013, 0x047731b0, 0x04ed33d7,
|
||||
0x05606ad9, 0x056b6fd9, 0x658896ce, 0x4540b245,
|
||||
0x45c3b449, 0x04243bae, 0x44d8948e, 0x449a8edb,
|
||||
0x4499997f, 0x441b938c, 0x04da309c, 0x049821e6,
|
||||
0x04993641, 0x04482882, 0x048a2b8b, 0x044937e3,
|
||||
0x044b35f8, 0x65872d06, 0x65c63c95, 0x659834d8,
|
||||
0x04c12924, 0x0e2c116a, 0x4e2710c5, 0x0e61101f,
|
||||
0x4e7812f6, 0x0ebb1359, 0x4eb1120f, 0x2e251083,
|
||||
0x6e341272, 0x2e7011ee, 0x6e6c116a, 0x2ea41062,
|
||||
0x6eac116a,
|
||||
0x45c3b449, 0x04243fae, 0x0436388e, 0x44988edb,
|
||||
0x449a997f, 0x4419938c, 0x44db909c, 0x049a21e6,
|
||||
0x04983641, 0x04592882, 0x04882b8b, 0x044a37e3,
|
||||
0x044935f8, 0x044b2d06, 0x65c73c95, 0x658634d8,
|
||||
0x65d82924, 0x048127ea, 0x0e3b1359, 0x4e31120f,
|
||||
0x0e651083, 0x4e741272, 0x0eb011ee, 0x4eac116a,
|
||||
0x2e241062, 0x6e2c116a, 0x2e6a1128, 0x6e6d118b,
|
||||
0x2eb812f6, 0x6ea51083,
|
||||
};
|
||||
// END Generated code -- do not edit
|
||||
|
||||
@ -2442,6 +2442,12 @@ public class IRNode {
|
||||
vectorNode(VECTOR_BLEND_D, "VectorBlend", TYPE_DOUBLE);
|
||||
}
|
||||
|
||||
public static final String VECTOR_BITWISE_BLEND = PREFIX + "VECTOR_BITWISE_BLEND" + POSTFIX;
|
||||
static {
|
||||
String regex = START + "VectorBitwiseBlend" + MID + END;
|
||||
afterBarrierExpansionToBeforeMatching(VECTOR_BITWISE_BLEND, regex);
|
||||
}
|
||||
|
||||
public static final String VECTOR_MASK_CMP_I = VECTOR_PREFIX + "VECTOR_MASK_CMP_I" + POSTFIX;
|
||||
static {
|
||||
vectorNode(VECTOR_MASK_CMP_I, "VectorMaskCmp", TYPE_INT);
|
||||
|
||||
@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8382052
|
||||
* @key randomness
|
||||
* @library /test/lib /
|
||||
* @summary IR tests for Vector BITWISE_BLEND optimization
|
||||
* @modules jdk.incubator.vector
|
||||
*
|
||||
* @run driver ${test.main.class}
|
||||
*/
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import compiler.lib.generators.*;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.incubator.vector.*;
|
||||
|
||||
public class VectorBitwiseBlendTest {
|
||||
|
||||
private static final Generators RD = Generators.G;
|
||||
|
||||
private static final VectorSpecies<Byte> B_SPECIES = ByteVector.SPECIES_MAX;
|
||||
private static final VectorSpecies<Short> S_SPECIES = ShortVector.SPECIES_MAX;
|
||||
private static final VectorSpecies<Integer> I_SPECIES = IntVector.SPECIES_MAX;
|
||||
private static final VectorSpecies<Long> L_SPECIES = LongVector.SPECIES_MAX;
|
||||
|
||||
private static final int BUF_LEN = 256;
|
||||
|
||||
private static final byte[] ba = new byte[BUF_LEN];
|
||||
private static final byte[] bb = new byte[BUF_LEN];
|
||||
private static final byte[] bc = new byte[BUF_LEN];
|
||||
private static final byte[] br = new byte[BUF_LEN];
|
||||
|
||||
private static final short[] sa = new short[BUF_LEN];
|
||||
private static final short[] sb = new short[BUF_LEN];
|
||||
private static final short[] sc = new short[BUF_LEN];
|
||||
private static final short[] sr = new short[BUF_LEN];
|
||||
|
||||
private static final int[] ia = new int[BUF_LEN];
|
||||
private static final int[] ib = new int[BUF_LEN];
|
||||
private static final int[] ic = new int[BUF_LEN];
|
||||
private static final int[] ir = new int[BUF_LEN];
|
||||
|
||||
private static final long[] la = new long[BUF_LEN];
|
||||
private static final long[] lb = new long[BUF_LEN];
|
||||
private static final long[] lc = new long[BUF_LEN];
|
||||
private static final long[] lr = new long[BUF_LEN];
|
||||
|
||||
private static final boolean[] mask_arr = new boolean[BUF_LEN];
|
||||
|
||||
static {
|
||||
Generator<Integer> iGen = RD.ints();
|
||||
Generator<Long> lGen = RD.longs();
|
||||
|
||||
for (int i = 0; i < BUF_LEN; i++) {
|
||||
mask_arr[i] = (i & 1) != 0;
|
||||
ba[i] = iGen.next().byteValue();
|
||||
bb[i] = iGen.next().byteValue();
|
||||
bc[i] = iGen.next().byteValue();
|
||||
sa[i] = iGen.next().shortValue();
|
||||
sb[i] = iGen.next().shortValue();
|
||||
sc[i] = iGen.next().shortValue();
|
||||
}
|
||||
RD.fill(iGen, ia);
|
||||
RD.fill(iGen, ib);
|
||||
RD.fill(iGen, ic);
|
||||
RD.fill(lGen, la);
|
||||
RD.fill(lGen, lb);
|
||||
RD.fill(lGen, lc);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testUnmaskedBlendByte() {
|
||||
ByteVector va = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||
ByteVector vb = ByteVector.fromArray(B_SPECIES, bb, 0);
|
||||
ByteVector vc = ByteVector.fromArray(B_SPECIES, bc, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc).intoArray(br, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testUnmaskedBlendShort() {
|
||||
ShortVector va = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||
ShortVector vb = ShortVector.fromArray(S_SPECIES, sb, 0);
|
||||
ShortVector vc = ShortVector.fromArray(S_SPECIES, sc, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc).intoArray(sr, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testUnmaskedBlendInt() {
|
||||
IntVector va = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||
IntVector vb = IntVector.fromArray(I_SPECIES, ib, 0);
|
||||
IntVector vc = IntVector.fromArray(I_SPECIES, ic, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc).intoArray(ir, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testUnmaskedBlendLong() {
|
||||
LongVector va = LongVector.fromArray(L_SPECIES, la, 0);
|
||||
LongVector vb = LongVector.fromArray(L_SPECIES, lb, 0);
|
||||
LongVector vc = LongVector.fromArray(L_SPECIES, lc, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc).intoArray(lr, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_B, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_B, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testMaskedBlendByte() {
|
||||
VectorMask<Byte> mask = VectorMask.fromArray(B_SPECIES, mask_arr, 0);
|
||||
ByteVector va = ByteVector.fromArray(B_SPECIES, ba, 0);
|
||||
ByteVector vb = ByteVector.fromArray(B_SPECIES, bb, 0);
|
||||
ByteVector vc = ByteVector.fromArray(B_SPECIES, bc, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc, mask).intoArray(br, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_S, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_S, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testMaskedBlendShort() {
|
||||
VectorMask<Short> mask = VectorMask.fromArray(S_SPECIES, mask_arr, 0);
|
||||
ShortVector va = ShortVector.fromArray(S_SPECIES, sa, 0);
|
||||
ShortVector vb = ShortVector.fromArray(S_SPECIES, sb, 0);
|
||||
ShortVector vc = ShortVector.fromArray(S_SPECIES, sc, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc, mask).intoArray(sr, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_I, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_I, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testMaskedBlendInt() {
|
||||
VectorMask<Integer> mask = VectorMask.fromArray(I_SPECIES, mask_arr, 0);
|
||||
IntVector va = IntVector.fromArray(I_SPECIES, ia, 0);
|
||||
IntVector vb = IntVector.fromArray(I_SPECIES, ib, 0);
|
||||
IntVector vc = IntVector.fromArray(I_SPECIES, ic, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc, mask).intoArray(ir, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_L, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeatureAnd = { "asimd", "true", "sve2", "false" },
|
||||
applyIf = { "MaxVectorSize", "<= 16" })
|
||||
@IR(counts = { IRNode.VECTOR_BLEND_L, "= 1",
|
||||
IRNode.VECTOR_BITWISE_BLEND, "= 1" },
|
||||
applyIfCPUFeature = { "sve2", "true" })
|
||||
public static void testMaskedBlendLong() {
|
||||
VectorMask<Long> mask = VectorMask.fromArray(L_SPECIES, mask_arr, 0);
|
||||
LongVector va = LongVector.fromArray(L_SPECIES, la, 0);
|
||||
LongVector vb = LongVector.fromArray(L_SPECIES, lb, 0);
|
||||
LongVector vc = LongVector.fromArray(L_SPECIES, lc, 0);
|
||||
va.lanewise(VectorOperators.BITWISE_BLEND, vb, vc, mask).intoArray(lr, 0);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework testFramework = new TestFramework();
|
||||
testFramework.setDefaultWarmup(10000)
|
||||
.addFlags("--add-modules=jdk.incubator.vector")
|
||||
.start();
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -55,8 +55,9 @@ public class MaskedLogicOpts {
|
||||
int int512_arr_idx;
|
||||
int int256_arr_idx;
|
||||
int int128_arr_idx;
|
||||
int long256_arr_idx;
|
||||
int long512_arr_idx;
|
||||
int long256_arr_idx;
|
||||
int long128_arr_idx;
|
||||
|
||||
private Random r = new Random(1024);
|
||||
|
||||
@ -65,8 +66,9 @@ public class MaskedLogicOpts {
|
||||
int512_arr_idx = -16;
|
||||
int256_arr_idx = -8;
|
||||
int128_arr_idx = -4;
|
||||
long256_arr_idx = -4;
|
||||
long512_arr_idx = -8;
|
||||
long256_arr_idx = -4;
|
||||
long128_arr_idx = -2;
|
||||
|
||||
mask_arr = new boolean[ARRAYLEN];
|
||||
i1 = new int[ARRAYLEN];
|
||||
@ -106,6 +108,7 @@ public class MaskedLogicOpts {
|
||||
int128_arr_idx = (((ARRAYLEN & ~3) - int128_arr_idx) <= 4) ? 0 : int128_arr_idx + 4;
|
||||
long512_arr_idx = (((ARRAYLEN & ~7) - long512_arr_idx) <= 8) ? 0 : long512_arr_idx + 8;
|
||||
long256_arr_idx = (((ARRAYLEN & ~3) - long256_arr_idx) <= 4) ? 0 : long256_arr_idx + 4;
|
||||
long128_arr_idx = (((ARRAYLEN & ~1) - long128_arr_idx) <= 2) ? 0 : long128_arr_idx + 2;
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
@ -278,6 +281,11 @@ public class MaskedLogicOpts {
|
||||
partiallyMaskedLogicOperationsLongKernel(LongVector.SPECIES_256, long256_arr_idx);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void partiallyMaskedLogicOperationsLong128() {
|
||||
partiallyMaskedLogicOperationsLongKernel(LongVector.SPECIES_128, long128_arr_idx);
|
||||
}
|
||||
|
||||
@CompilerControl(CompilerControl.Mode.INLINE)
|
||||
public void bitwiseBlendOperationLongKernel(VectorSpecies<Long> SPECIES, int index) {
|
||||
VectorMask<Long> lmask = VectorMask.fromArray(SPECIES, mask_arr, index);
|
||||
@ -305,4 +313,9 @@ public class MaskedLogicOpts {
|
||||
public void bitwiseBlendOperationLong256() {
|
||||
bitwiseBlendOperationLongKernel(LongVector.SPECIES_256, long256_arr_idx);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void bitwiseBlendOperationLong128() {
|
||||
bitwiseBlendOperationLongKernel(LongVector.SPECIES_128, long128_arr_idx);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user