From 4cc75be80e6a89e0ed293e2f8bbb6d0f94189468 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Thu, 11 Sep 2025 05:03:21 +0000 Subject: [PATCH] 8366702: C2 SuperWord: refactor VTransform vector nodes Reviewed-by: chagedorn, galder --- .../share/opto/superwordVTransformBuilder.cpp | 110 +++++---- .../share/opto/superwordVTransformBuilder.hpp | 4 +- src/hotspot/share/opto/vtransform.cpp | 211 +++++++++--------- src/hotspot/share/opto/vtransform.hpp | 139 ++++++++++-- 4 files changed, 286 insertions(+), 178 deletions(-) diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.cpp b/src/hotspot/share/opto/superwordVTransformBuilder.cpp index b31f2eda9c0..dbc96c234a9 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.cpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.cpp @@ -80,11 +80,13 @@ void SuperWordVTransformBuilder::build_inputs_for_vector_vtnodes(VectorSet& vtn_ vtn_memory_dependencies.clear(); // Add every memory dependency only once per vtn. if (p0->is_Load()) { + init_req_with_scalar(p0, vtn, MemNode::Control); init_req_with_scalar(p0, vtn, MemNode::Address); for (uint k = 0; k < pack->size(); k++) { add_memory_dependencies_of_node_to_vtnode(pack->at(k), vtn, vtn_memory_dependencies); } } else if (p0->is_Store()) { + init_req_with_scalar(p0, vtn, MemNode::Control); init_req_with_scalar(p0, vtn, MemNode::Address); init_req_with_vector(pack, vtn, MemNode::ValueIn); for (uint k = 0; k < pack->size(); k++) { @@ -93,26 +95,27 @@ void SuperWordVTransformBuilder::build_inputs_for_vector_vtnodes(VectorSet& vtn_ } else if (vtn->isa_ReductionVector() != nullptr) { init_req_with_scalar(p0, vtn, 1); // scalar init init_req_with_vector(pack, vtn, 2); // vector - } else { - assert(vtn->isa_ElementWiseVector() != nullptr, "all other vtnodes are handled above"); - if (VectorNode::is_scalar_rotate(p0) && - p0->in(2)->is_Con() && - Matcher::supports_vector_constant_rotates(p0->in(2)->get_int())) { - init_req_with_vector(pack, vtn, 1); - init_req_with_scalar(p0, vtn, 2); // constant rotation - } else if (VectorNode::is_roundopD(p0)) { - init_req_with_vector(pack, vtn, 1); - init_req_with_scalar(p0, vtn, 2); // constant rounding mode - } else if (p0->is_CMove()) { - // Cmp + Bool + CMove -> VectorMaskCmp + VectorBlend. - set_all_req_with_vectors(pack, vtn); - VTransformBoolVectorNode* vtn_mask_cmp = vtn->in_req(1)->isa_BoolVector(); - if (vtn_mask_cmp->test()._is_negated) { - vtn->swap_req(2, 3); // swap if test was negated. - } - } else { - set_all_req_with_vectors(pack, vtn); + } else if (VectorNode::is_scalar_rotate(p0) && + p0->in(2)->is_Con() && + Matcher::supports_vector_constant_rotates(p0->in(2)->get_int())) { + init_req_with_vector(pack, vtn, 1); + init_req_with_scalar(p0, vtn, 2); // constant rotation + } else if (VectorNode::is_roundopD(p0)) { + init_req_with_vector(pack, vtn, 1); + init_req_with_scalar(p0, vtn, 2); // constant rounding mode + } else if (p0->is_CMove()) { + // Cmp + Bool + CMove -> VectorMaskCmp + VectorBlend. + init_all_req_with_vectors(pack, vtn); + // Inputs must be permuted from (mask, blend1, blend2) -> (blend1, blend2, mask) + vtn->swap_req(1, 2); + vtn->swap_req(2, 3); + // If the test was negated: (blend1, blend2, mask) -> (blend2, blend1, mask) + VTransformBoolVectorNode* vtn_mask_cmp = vtn->in_req(3)->isa_BoolVector(); + if (vtn_mask_cmp->test()._is_negated) { + vtn->swap_req(1, 2); // swap if test was negated. } + } else { + init_all_req_with_vectors(pack, vtn); } } } @@ -139,51 +142,72 @@ void SuperWordVTransformBuilder::build_inputs_for_scalar_vtnodes(VectorSet& vtn_ init_req_with_scalar(n, vtn, 0); continue; } else { - set_all_req_with_scalars(n, vtn); + init_all_req_with_scalars(n, vtn); } } } // Create a vtnode for each pack. No in/out edges set yet. VTransformVectorNode* SuperWordVTransformBuilder::make_vector_vtnode_for_pack(const Node_List* pack) const { - uint pack_size = pack->size(); Node* p0 = pack->at(0); - int opc = p0->Opcode(); - VTransformVectorNode* vtn = nullptr; + const VTransformVectorNodeProperties properties = VTransformVectorNodeProperties::make_from_pack(pack, _vloop_analyzer); + const int sopc = properties.scalar_opcode(); + const uint vlen = properties.vector_length(); + const BasicType bt = properties.element_basic_type(); + VTransformVectorNode* vtn = nullptr; if (p0->is_Load()) { const VPointer& scalar_p = _vloop_analyzer.vpointers().vpointer(p0->as_Load()); - const VPointer vector_p(scalar_p.make_with_size(scalar_p.size() * pack_size)); - vtn = new (_vtransform.arena()) VTransformLoadVectorNode(_vtransform, pack_size, vector_p); + const VPointer vector_p(scalar_p.make_with_size(scalar_p.size() * vlen)); + vtn = new (_vtransform.arena()) VTransformLoadVectorNode(_vtransform, properties, vector_p, p0->adr_type()); } else if (p0->is_Store()) { const VPointer& scalar_p = _vloop_analyzer.vpointers().vpointer(p0->as_Store()); - const VPointer vector_p(scalar_p.make_with_size(scalar_p.size() * pack_size)); - vtn = new (_vtransform.arena()) VTransformStoreVectorNode(_vtransform, pack_size, vector_p); + const VPointer vector_p(scalar_p.make_with_size(scalar_p.size() * vlen)); + vtn = new (_vtransform.arena()) VTransformStoreVectorNode(_vtransform, properties, vector_p, p0->adr_type()); + } else if (p0->is_Cmp()) { + vtn = new (_vtransform.arena()) VTransformCmpVectorNode(_vtransform, properties); } else if (p0->is_Bool()) { VTransformBoolTest kind = _packset.get_bool_test(pack); - vtn = new (_vtransform.arena()) VTransformBoolVectorNode(_vtransform, pack_size, kind); + vtn = new (_vtransform.arena()) VTransformBoolVectorNode(_vtransform, properties, kind); + } else if (p0->is_CMove()) { + vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, p0->req(), properties, Op_VectorBlend); } else if (_vloop_analyzer.reductions().is_marked_reduction(p0)) { - vtn = new (_vtransform.arena()) VTransformReductionVectorNode(_vtransform, pack_size); + vtn = new (_vtransform.arena()) VTransformReductionVectorNode(_vtransform, properties); } else if (VectorNode::is_muladds2i(p0)) { // A special kind of binary element-wise vector op: the inputs are "ints" a and b, // but reinterpreted as two "shorts" [a0, a1] and [b0, b1]: // v = MulAddS2I(a, b) = a0 * b0 + a1 + b1 assert(p0->req() == 5, "MulAddS2I should have 4 operands"); - vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, 3, pack_size); + int vopc = VectorNode::opcode(sopc, bt); + vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, 3, properties, vopc); + } else if (VectorNode::is_convert_opcode(sopc)) { + assert(p0->req() == 2, "convert should have 2 operands"); + BasicType def_bt = _vloop_analyzer.types().velt_basic_type(p0->in(1)); + int vopc = VectorCastNode::opcode(sopc, def_bt); + vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, p0->req(), properties, vopc); + } else if (VectorNode::is_reinterpret_opcode(sopc)) { + assert(p0->req() == 2, "reinterpret should have 2 operands"); + BasicType src_bt = _vloop_analyzer.types().velt_basic_type(p0->in(1)); + vtn = new (_vtransform.arena()) VTransformReinterpretVectorNode(_vtransform, properties, src_bt); + } else if (VectorNode::can_use_RShiftI_instead_of_URShiftI(p0, bt)) { + int vopc = VectorNode::opcode(Op_RShiftI, bt); + vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, p0->req(), properties, vopc); + } else if (VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(sopc)) { + vtn = new (_vtransform.arena()) VTransformElementWiseLongOpWithCastToIntVectorNode(_vtransform, properties); } else { assert(p0->req() == 3 || - p0->is_CMove() || - VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(opc) || - VectorNode::is_convert_opcode(opc) || - VectorNode::is_reinterpret_opcode(opc) || - VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(opc) || - opc == Op_FmaD || - opc == Op_FmaF || - opc == Op_FmaHF || - opc == Op_SignumF || - opc == Op_SignumD, + VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(sopc) || + VectorNode::is_reinterpret_opcode(sopc) || + VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(sopc) || + sopc == Op_FmaD || + sopc == Op_FmaF || + sopc == Op_FmaHF || + sopc == Op_SignumF || + sopc == Op_SignumD, "pack type must be in this list"); - vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, p0->req(), pack_size); + assert(!VectorNode::is_roundopD(p0) || p0->in(2)->is_Con(), "rounding mode must be constant"); + int vopc = VectorNode::opcode(sopc, bt); + vtn = new (_vtransform.arena()) VTransformElementWiseVectorNode(_vtransform, p0->req(), properties, vopc); } vtn->set_nodes(pack); return vtn; @@ -291,7 +315,7 @@ void SuperWordVTransformBuilder::init_req_with_vector(const Node_List* pack, VTr vtn->init_req(j, req); } -void SuperWordVTransformBuilder::set_all_req_with_scalars(Node* n, VTransformNode* vtn) { +void SuperWordVTransformBuilder::init_all_req_with_scalars(Node* n, VTransformNode* vtn) { assert(vtn->req() == n->req(), "scalars must have same number of reqs"); for (uint j = 0; j < n->req(); j++) { Node* def = n->in(j); @@ -300,7 +324,7 @@ void SuperWordVTransformBuilder::set_all_req_with_scalars(Node* n, VTransformNod } } -void SuperWordVTransformBuilder::set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn) { +void SuperWordVTransformBuilder::init_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn) { Node* p0 = pack->at(0); assert(vtn->req() <= p0->req(), "must have at at most as many reqs"); // Vectors have no ctrl, so ignore it. diff --git a/src/hotspot/share/opto/superwordVTransformBuilder.hpp b/src/hotspot/share/opto/superwordVTransformBuilder.hpp index ea93bb60ffb..6ed8480209a 100644 --- a/src/hotspot/share/opto/superwordVTransformBuilder.hpp +++ b/src/hotspot/share/opto/superwordVTransformBuilder.hpp @@ -79,8 +79,8 @@ private: VTransformNode* get_vtnode_or_wrap_as_outer(Node* n); void init_req_with_scalar(Node* n, VTransformNode* vtn, const int index); void init_req_with_vector(const Node_List* pack, VTransformNode* vtn, const int index); - void set_all_req_with_scalars(Node* n, VTransformNode* vtn); - void set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn); + void init_all_req_with_scalars(Node* n, VTransformNode* vtn); + void init_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn); void add_memory_dependencies_of_node_to_vtnode(Node* n, VTransformNode* vtn, VectorSet& vtn_memory_dependencies); }; diff --git a/src/hotspot/share/opto/vtransform.cpp b/src/hotspot/share/opto/vtransform.cpp index 2f77c1c2e37..8c1210a5a09 100644 --- a/src/hotspot/share/opto/vtransform.cpp +++ b/src/hotspot/share/opto/vtransform.cpp @@ -104,7 +104,7 @@ bool VTransformGraph::schedule() { } #ifndef PRODUCT - if (_trace._verbose) { + if (_trace._info) { print_schedule(); } #endif @@ -158,11 +158,9 @@ void VTransform::apply_speculative_alignment_runtime_checks() { const GrowableArray& vtnodes = _graph.vtnodes(); for (int i = 0; i < vtnodes.length(); i++) { - VTransformVectorNode* vtn = vtnodes.at(i)->isa_Vector(); + VTransformMemVectorNode* vtn = vtnodes.at(i)->isa_MemVector(); if (vtn == nullptr) { continue; } - MemNode* p0 = vtn->nodes().at(0)->isa_Mem(); - if (p0 == nullptr) { continue; } - const VPointer& vp = vpointer(p0); + const VPointer& vp = vtn->vpointer(); if (vp.mem_pointer().base().is_object()) { continue; } assert(vp.mem_pointer().base().is_native(), "VPointer base must be object or native"); @@ -720,41 +718,41 @@ Node* VTransformApplyState::transformed_node(const VTransformNode* vtn) const { } VTransformApplyResult VTransformMemopScalarNode::apply(VTransformApplyState& apply_state) const { - // This was just wrapped. Now we simply unwap without touching the inputs. + // This was just wrapped. Now we simply unwrap without touching the inputs. return VTransformApplyResult::make_scalar(_node); } VTransformApplyResult VTransformDataScalarNode::apply(VTransformApplyState& apply_state) const { - // This was just wrapped. Now we simply unwap without touching the inputs. + // This was just wrapped. Now we simply unwrap without touching the inputs. return VTransformApplyResult::make_scalar(_node); } VTransformApplyResult VTransformLoopPhiNode::apply(VTransformApplyState& apply_state) const { - // This was just wrapped. Now we simply unwap without touching the inputs. + // This was just wrapped. Now we simply unwrap without touching the inputs. return VTransformApplyResult::make_scalar(_node); } VTransformApplyResult VTransformCFGNode::apply(VTransformApplyState& apply_state) const { - // This was just wrapped. Now we simply unwap without touching the inputs. + // This was just wrapped. Now we simply unwrap without touching the inputs. return VTransformApplyResult::make_scalar(_node); } VTransformApplyResult VTransformOuterNode::apply(VTransformApplyState& apply_state) const { - // This was just wrapped. Now we simply unwap without touching the inputs. + // This was just wrapped. Now we simply unwrap without touching the inputs. return VTransformApplyResult::make_scalar(_node); } VTransformApplyResult VTransformReplicateNode::apply(VTransformApplyState& apply_state) const { Node* val = apply_state.transformed_node(in_req(1)); VectorNode* vn = VectorNode::scalar2vector(val, _vlen, _element_type); - register_new_node_from_vectorization(apply_state, vn, val); - return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes()); + register_new_node_from_vectorization(apply_state, vn); + return VTransformApplyResult::make_vector(vn); } VTransformApplyResult VTransformConvI2LNode::apply(VTransformApplyState& apply_state) const { Node* val = apply_state.transformed_node(in_req(1)); Node* n = new ConvI2LNode(val); - register_new_node_from_vectorization(apply_state, n, val); + register_new_node_from_vectorization(apply_state, n); return VTransformApplyResult::make_scalar(n); } @@ -766,11 +764,11 @@ VTransformApplyResult VTransformShiftCountNode::apply(VTransformApplyState& appl // bits in a scalar shift operation. But vector shift does not truncate, so // we must apply the mask now. Node* shift_count_masked = new AndINode(shift_count_in, phase->intcon(_mask)); - register_new_node_from_vectorization(apply_state, shift_count_masked, shift_count_in); + register_new_node_from_vectorization(apply_state, shift_count_masked); // Now that masked value is "boadcast" (some platforms only set the lowest element). VectorNode* vn = VectorNode::shift_count(_shift_opcode, shift_count_masked, _vlen, _element_bt); - register_new_node_from_vectorization(apply_state, vn, shift_count_in); - return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes()); + register_new_node_from_vectorization(apply_state, vn); + return VTransformApplyResult::make_vector(vn); } @@ -781,77 +779,62 @@ VTransformApplyResult VTransformPopulateIndexNode::apply(VTransformApplyState& a assert(VectorNode::is_populate_index_supported(_element_bt), "should support"); const TypeVect* vt = TypeVect::make(_element_bt, _vlen); VectorNode* vn = new PopulateIndexNode(val, phase->intcon(1), vt); - register_new_node_from_vectorization(apply_state, vn, val); - return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes()); + register_new_node_from_vectorization(apply_state, vn); + return VTransformApplyResult::make_vector(vn); } VTransformApplyResult VTransformElementWiseVectorNode::apply(VTransformApplyState& apply_state) const { - Node* first = nodes().at(0); - uint vlen = nodes().length(); - int opc = first->Opcode(); - BasicType bt = apply_state.vloop_analyzer().types().velt_basic_type(first); - - if (first->is_Cmp()) { - // Cmp + Bool -> VectorMaskCmp - // Handled by Bool / VTransformBoolVectorNode, so we do not generate any nodes here. - return VTransformApplyResult::make_empty(); - } - assert(2 <= req() && req() <= 4, "Must have 1-3 inputs"); - VectorNode* vn = nullptr; + const TypeVect* vt = TypeVect::make(element_basic_type(), vector_length()); Node* in1 = apply_state.transformed_node(in_req(1)); Node* in2 = (req() >= 3) ? apply_state.transformed_node(in_req(2)) : nullptr; - Node* in3 = (req() >= 4) ? apply_state.transformed_node(in_req(3)) : nullptr; - if (first->is_CMove()) { - assert(req() == 4, "three inputs expected: mask, blend1, blend2"); - vn = new VectorBlendNode(/* blend1 */ in2, /* blend2 */ in3, /* mask */ in1); - } else if (VectorNode::is_convert_opcode(opc)) { - assert(first->req() == 2 && req() == 2, "only one input expected"); - int vopc = VectorCastNode::opcode(opc, in1->bottom_type()->is_vect()->element_basic_type()); - vn = VectorCastNode::make(vopc, in1, bt, vlen); - } else if (VectorNode::is_reinterpret_opcode(opc)) { - assert(first->req() == 2 && req() == 2, "only one input expected"); - const TypeVect* vt = TypeVect::make(bt, vlen); - vn = new VectorReinterpretNode(in1, in1->bottom_type()->is_vect(), vt); - } else if (VectorNode::can_use_RShiftI_instead_of_URShiftI(first, bt)) { - opc = Op_RShiftI; - vn = VectorNode::make(opc, in1, in2, vlen, bt); - } else if (VectorNode::is_scalar_op_that_returns_int_but_vector_op_returns_long(opc)) { - // The scalar operation was a long -> int operation. - // However, the vector operation is long -> long. - VectorNode* long_vn = VectorNode::make(opc, in1, nullptr, vlen, T_LONG); - register_new_node_from_vectorization(apply_state, long_vn, first); - // Cast long -> int, to mimic the scalar long -> int operation. - vn = VectorCastNode::make(Op_VectorCastL2X, long_vn, T_INT, vlen); - } else if (req() == 3 || - VectorNode::is_scalar_unary_op_with_equal_input_and_output_types(opc)) { - assert(!VectorNode::is_roundopD(first) || in2->is_Con(), "rounding mode must be constant"); - vn = VectorNode::make(opc, in1, in2, vlen, bt); // unary and binary + VectorNode* vn = nullptr; + if (req() <= 3) { + vn = VectorNode::make(_vector_opcode, in1, in2, vt); // unary and binary } else { - assert(req() == 4, "three inputs expected"); - assert(opc == Op_FmaD || - opc == Op_FmaF || - opc == Op_FmaHF || - opc == Op_SignumF || - opc == Op_SignumD, - "element wise operation must be from this list"); - vn = VectorNode::make(opc, in1, in2, in3, vlen, bt); // ternary + Node* in3 = apply_state.transformed_node(in_req(3)); + vn = VectorNode::make(_vector_opcode, in1, in2, in3, vt); // ternary } register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn); - return VTransformApplyResult::make_vector(vn, vlen, vn->length_in_bytes()); + return VTransformApplyResult::make_vector(vn); +} + +VTransformApplyResult VTransformElementWiseLongOpWithCastToIntVectorNode::apply(VTransformApplyState& apply_state) const { + uint vlen = vector_length(); + int sopc = scalar_opcode(); + Node* in1 = apply_state.transformed_node(in_req(1)); + + // The scalar operation was a long -> int operation. + // However, the vector operation is long -> long. + VectorNode* long_vn = VectorNode::make(sopc, in1, nullptr, vlen, T_LONG); + register_new_node_from_vectorization(apply_state, long_vn); + // Cast long -> int, to mimic the scalar long -> int operation. + VectorNode* vn = VectorCastNode::make(Op_VectorCastL2X, long_vn, T_INT, vlen); + register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn); + return VTransformApplyResult::make_vector(vn); +} + +VTransformApplyResult VTransformReinterpretVectorNode::apply(VTransformApplyState& apply_state) const { + const TypeVect* dst_vt = TypeVect::make(element_basic_type(), vector_length()); + const TypeVect* src_vt = TypeVect::make(_src_bt, vector_length()); + assert(VectorNode::is_reinterpret_opcode(scalar_opcode()), "scalar opcode must be reinterpret"); + + Node* in1 = apply_state.transformed_node(in_req(1)); + VectorNode* vn = new VectorReinterpretNode(in1, src_vt, dst_vt); + + register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn); + return VTransformApplyResult::make_vector(vn); } VTransformApplyResult VTransformBoolVectorNode::apply(VTransformApplyState& apply_state) const { - BoolNode* first = nodes().at(0)->as_Bool(); - uint vlen = nodes().length(); - BasicType bt = apply_state.vloop_analyzer().types().velt_basic_type(first); + const TypeVect* vt = TypeVect::make(element_basic_type(), vector_length()); + assert(scalar_opcode() == Op_Bool, ""); // Cmp + Bool -> VectorMaskCmp - VTransformElementWiseVectorNode* vtn_cmp = in_req(1)->isa_ElementWiseVector(); - assert(vtn_cmp != nullptr && vtn_cmp->nodes().at(0)->is_Cmp(), - "bool vtn expects cmp vtn as input"); + VTransformCmpVectorNode* vtn_cmp = in_req(1)->isa_CmpVector(); + assert(vtn_cmp != nullptr, "bool vtn expects cmp vtn as input"); Node* cmp_in1 = apply_state.transformed_node(vtn_cmp->in_req(1)); Node* cmp_in2 = apply_state.transformed_node(vtn_cmp->in_req(2)); @@ -859,35 +842,30 @@ VTransformApplyResult VTransformBoolVectorNode::apply(VTransformApplyState& appl PhaseIdealLoop* phase = apply_state.phase(); ConINode* mask_node = phase->intcon((int)mask); - const TypeVect* vt = TypeVect::make(bt, vlen); VectorNode* vn = new VectorMaskCmpNode(mask, cmp_in1, cmp_in2, mask_node, vt); register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn); - return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes()); + return VTransformApplyResult::make_vector(vn); } VTransformApplyResult VTransformReductionVectorNode::apply(VTransformApplyState& apply_state) const { - Node* first = nodes().at(0); - uint vlen = nodes().length(); - int opc = first->Opcode(); - BasicType bt = first->bottom_type()->basic_type(); - Node* init = apply_state.transformed_node(in_req(1)); Node* vec = apply_state.transformed_node(in_req(2)); - ReductionNode* vn = ReductionNode::make(opc, nullptr, init, vec, bt); + ReductionNode* vn = ReductionNode::make(scalar_opcode(), nullptr, init, vec, element_basic_type()); register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn); - return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes()); + return VTransformApplyResult::make_vector(vn, vn->vect_type()); } VTransformApplyResult VTransformLoadVectorNode::apply(VTransformApplyState& apply_state) const { + int sopc = scalar_opcode(); + uint vlen = vector_length(); + BasicType bt = element_basic_type(); + LoadNode* first = nodes().at(0)->as_Load(); - uint vlen = nodes().length(); - Node* ctrl = first->in(MemNode::Control); + Node* ctrl = apply_state.transformed_node(in_req(MemNode::Control)); + // first has the correct memory state, determined by VTransformGraph::apply_memops_reordering_with_schedule Node* mem = first->in(MemNode::Memory); - Node* adr = first->in(MemNode::Address); - int opc = first->Opcode(); - const TypePtr* adr_type = first->adr_type(); - BasicType bt = apply_state.vloop_analyzer().types().velt_basic_type(first); + Node* adr = apply_state.transformed_node(in_req(MemNode::Address)); // Set the memory dependency of the LoadVector as early as possible. // Walk up the memory chain, and ignore any StoreVector that provably @@ -902,34 +880,33 @@ VTransformApplyResult VTransformLoadVectorNode::apply(VTransformApplyState& appl } } - LoadVectorNode* vn = LoadVectorNode::make(opc, ctrl, mem, adr, adr_type, vlen, bt, + LoadVectorNode* vn = LoadVectorNode::make(sopc, ctrl, mem, adr, _adr_type, vlen, bt, control_dependency()); DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } ) register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn); - return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size()); + return VTransformApplyResult::make_vector(vn, vn->vect_type()); } VTransformApplyResult VTransformStoreVectorNode::apply(VTransformApplyState& apply_state) const { + int sopc = scalar_opcode(); + uint vlen = vector_length(); + StoreNode* first = nodes().at(0)->as_Store(); - uint vlen = nodes().length(); - Node* ctrl = first->in(MemNode::Control); + Node* ctrl = apply_state.transformed_node(in_req(MemNode::Control)); + // first has the correct memory state, determined by VTransformGraph::apply_memops_reordering_with_schedule Node* mem = first->in(MemNode::Memory); - Node* adr = first->in(MemNode::Address); - int opc = first->Opcode(); - const TypePtr* adr_type = first->adr_type(); + Node* adr = apply_state.transformed_node(in_req(MemNode::Address)); Node* value = apply_state.transformed_node(in_req(MemNode::ValueIn)); - StoreVectorNode* vn = StoreVectorNode::make(opc, ctrl, mem, adr, adr_type, value, vlen); + StoreVectorNode* vn = StoreVectorNode::make(sopc, ctrl, mem, adr, _adr_type, value, vlen); DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } ) register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn); - return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size()); + return VTransformApplyResult::make_vector(vn, vn->vect_type()); } void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes(VTransformApplyState& apply_state, Node* vn) const { PhaseIdealLoop* phase = apply_state.phase(); - Node* first = nodes().at(0); - - register_new_node_from_vectorization(apply_state, vn, first); + register_new_node_from_vectorization(apply_state, vn); for (int i = 0; i < _nodes.length(); i++) { Node* n = _nodes.at(i); @@ -937,9 +914,11 @@ void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scal } } -void VTransformNode::register_new_node_from_vectorization(VTransformApplyState& apply_state, Node* vn, Node* old_node) const { +void VTransformNode::register_new_node_from_vectorization(VTransformApplyState& apply_state, Node* vn) const { PhaseIdealLoop* phase = apply_state.phase(); - phase->register_new_node_with_ctrl_of(vn, old_node); + // Using the cl is sometimes not the most accurate, but still correct. We do not have to be + // perfectly accurate, because we will set major_progress anyway. + phase->register_new_node(vn, apply_state.vloop().cl()); phase->igvn()._worklist.push(vn); VectorNode::trace_new_vector(vn, "AutoVectorization"); } @@ -1050,18 +1029,32 @@ void VTransformPopulateIndexNode::print_spec() const { } void VTransformVectorNode::print_spec() const { - tty->print("%d-pack[", _nodes.length()); - for (int i = 0; i < _nodes.length(); i++) { - Node* n = _nodes.at(i); - if (i > 0) { - tty->print(", "); - } - tty->print("%d %s", n->_idx, n->Name()); - } - tty->print("]"); + tty->print("Properties[orig=[%d %s] sopc=%s vlen=%d element_bt=%s]", + approximate_origin()->_idx, + approximate_origin()->Name(), + NodeClassNames[scalar_opcode()], + vector_length(), + type2name(element_basic_type())); if (is_load_or_store_in_loop()) { tty->print(" "); vpointer().print_on(tty, false); } } + +void VTransformElementWiseVectorNode::print_spec() const { + VTransformVectorNode::print_spec(); + tty->print(" vopc=%s", NodeClassNames[_vector_opcode]); +} + +void VTransformReinterpretVectorNode::print_spec() const { + VTransformVectorNode::print_spec(); + tty->print(" src_bt=%s", type2name(_src_bt)); +} + +void VTransformBoolVectorNode::print_spec() const { + VTransformVectorNode::print_spec(); + const BoolTest bt(_test._mask); + tty->print(" test="); + bt.dump_on(tty); +} #endif diff --git a/src/hotspot/share/opto/vtransform.hpp b/src/hotspot/share/opto/vtransform.hpp index 60b0b5d4f9d..9a4e4de01a2 100644 --- a/src/hotspot/share/opto/vtransform.hpp +++ b/src/hotspot/share/opto/vtransform.hpp @@ -26,6 +26,7 @@ #include "opto/node.hpp" #include "opto/vectorization.hpp" +#include "opto/vectornode.hpp" // VTransform: // - Models the transformation of the scalar loop to vectorized loop: @@ -67,6 +68,7 @@ class VTransformCFGNode; class VTransformOuterNode; class VTransformVectorNode; class VTransformElementWiseVectorNode; +class VTransformCmpVectorNode; class VTransformBoolVectorNode; class VTransformReductionVectorNode; class VTransformMemVectorNode; @@ -90,9 +92,12 @@ public: return VTransformApplyResult(n, 0, 0); } - static VTransformApplyResult make_vector(Node* n, uint vector_length, uint vector_width) { - assert(vector_length > 0 && vector_width > 0, "must have nonzero size"); - return VTransformApplyResult(n, vector_length, vector_width); + static VTransformApplyResult make_vector(VectorNode* vn) { + return VTransformApplyResult(vn, vn->length(), vn->length_in_bytes()); + } + + static VTransformApplyResult make_vector(Node* n, const TypeVect* vt) { + return VTransformApplyResult(n, vt->length(), vt->length_in_bytes()); } static VTransformApplyResult make_empty() { @@ -431,6 +436,7 @@ public: virtual VTransformOuterNode* isa_Outer() { return nullptr; } virtual VTransformVectorNode* isa_Vector() { return nullptr; } virtual VTransformElementWiseVectorNode* isa_ElementWiseVector() { return nullptr; } + virtual VTransformCmpVectorNode* isa_CmpVector() { return nullptr; } virtual VTransformBoolVectorNode* isa_BoolVector() { return nullptr; } virtual VTransformReductionVectorNode* isa_ReductionVector() { return nullptr; } virtual VTransformMemVectorNode* isa_MemVector() { return nullptr; } @@ -445,7 +451,7 @@ public: Node* find_transformed_input(int i, const GrowableArray& vnode_idx_to_transformed_node) const; - void register_new_node_from_vectorization(VTransformApplyState& apply_state, Node* vn, Node* old_node) const; + void register_new_node_from_vectorization(VTransformApplyState& apply_state, Node* vn) const; NOT_PRODUCT(virtual const char* name() const = 0;) NOT_PRODUCT(void print() const;) @@ -590,13 +596,52 @@ public: NOT_PRODUCT(virtual void print_spec() const override;) }; -// Base class for all vector vtnodes. +// Bundle the information needed for vector nodes. +class VTransformVectorNodeProperties : public StackObj { +private: + Node* _approximate_origin; // for proper propagation of node notes + const int _scalar_opcode; + const uint _vector_length; + const BasicType _element_basic_type; + + VTransformVectorNodeProperties(Node* approximate_origin, + int scalar_opcode, + uint vector_length, + BasicType element_basic_type) : + _approximate_origin(approximate_origin), + _scalar_opcode(scalar_opcode), + _vector_length(vector_length), + _element_basic_type(element_basic_type) {} + +public: + static VTransformVectorNodeProperties make_from_pack(const Node_List* pack, const VLoopAnalyzer& vloop_analyzer) { + Node* first = pack->at(0); + int opc = first->Opcode(); + int vlen = pack->size(); + BasicType bt = vloop_analyzer.types().velt_basic_type(first); + return VTransformVectorNodeProperties(first, opc, vlen, bt); + } + + Node* approximate_origin() const { return _approximate_origin; } + int scalar_opcode() const { return _scalar_opcode; } + uint vector_length() const { return _vector_length; } + BasicType element_basic_type() const { return _element_basic_type; } +}; + +// Abstract base class for all vector vtnodes. class VTransformVectorNode : public VTransformNode { private: + const VTransformVectorNodeProperties _properties; +protected: GrowableArray _nodes; public: - VTransformVectorNode(VTransform& vtransform, const uint req, const uint number_of_nodes) : - VTransformNode(vtransform, req), _nodes(vtransform.arena(), number_of_nodes, number_of_nodes, nullptr) {} + VTransformVectorNode(VTransform& vtransform, const uint req, const VTransformVectorNodeProperties properties) : + VTransformNode(vtransform, req), + _properties(properties), + _nodes(vtransform.arena(), + properties.vector_length(), + properties.vector_length(), + nullptr) {} void set_nodes(const Node_List* pack) { for (uint k = 0; k < pack->size(); k++) { @@ -604,20 +649,50 @@ public: } } - const GrowableArray& nodes() const { return _nodes; } virtual VTransformVectorNode* isa_Vector() override { return this; } void register_new_node_from_vectorization_and_replace_scalar_nodes(VTransformApplyState& apply_state, Node* vn) const; NOT_PRODUCT(virtual void print_spec() const override;) + +protected: + Node* approximate_origin() const { return _properties.approximate_origin(); } + int scalar_opcode() const { return _properties.scalar_opcode(); } + uint vector_length() const { return _properties.vector_length(); } + BasicType element_basic_type() const { return _properties.element_basic_type(); } }; // Catch all for all element-wise vector operations. class VTransformElementWiseVectorNode : public VTransformVectorNode { +private: + const int _vector_opcode; public: - VTransformElementWiseVectorNode(VTransform& vtransform, uint req, uint number_of_nodes) : - VTransformVectorNode(vtransform, req, number_of_nodes) {} + VTransformElementWiseVectorNode(VTransform& vtransform, uint req, const VTransformVectorNodeProperties properties, const int vector_opcode) : + VTransformVectorNode(vtransform, req, properties), _vector_opcode(vector_opcode) {} virtual VTransformElementWiseVectorNode* isa_ElementWiseVector() override { return this; } virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override; NOT_PRODUCT(virtual const char* name() const override { return "ElementWiseVector"; };) + NOT_PRODUCT(virtual void print_spec() const override;) +}; + +// The scalar operation was a long -> int operation. +// However, the vector operation is long -> long. +// Hence, we vectorize it as: long --long_op--> long --cast--> int +class VTransformElementWiseLongOpWithCastToIntVectorNode : public VTransformVectorNode { +public: + VTransformElementWiseLongOpWithCastToIntVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties) : + VTransformVectorNode(vtransform, 2, properties) {} + virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override; + NOT_PRODUCT(virtual const char* name() const override { return "ElementWiseLongOpWithCastToIntVector"; };) +}; + +class VTransformReinterpretVectorNode : public VTransformVectorNode { +private: + const BasicType _src_bt; +public: + VTransformReinterpretVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties, const BasicType src_bt) : + VTransformVectorNode(vtransform, 2, properties), _src_bt(src_bt) {} + virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override; + NOT_PRODUCT(virtual const char* name() const override { return "ReinterpretVector"; };) + NOT_PRODUCT(virtual void print_spec() const override;) }; struct VTransformBoolTest { @@ -628,23 +703,35 @@ struct VTransformBoolTest { _mask(mask), _is_negated(is_negated) {} }; -class VTransformBoolVectorNode : public VTransformElementWiseVectorNode { +// Cmp + Bool -> VectorMaskCmp +// The Bool node takes care of "apply". +class VTransformCmpVectorNode : public VTransformVectorNode { +public: + VTransformCmpVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties) : + VTransformVectorNode(vtransform, 3, properties) {} + virtual VTransformCmpVectorNode* isa_CmpVector() override { return this; } + virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override { return VTransformApplyResult::make_empty(); } + NOT_PRODUCT(virtual const char* name() const override { return "CmpVector"; };) +}; + +class VTransformBoolVectorNode : public VTransformVectorNode { private: const VTransformBoolTest _test; public: - VTransformBoolVectorNode(VTransform& vtransform, uint number_of_nodes, VTransformBoolTest test) : - VTransformElementWiseVectorNode(vtransform, 2, number_of_nodes), _test(test) {} + VTransformBoolVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties, VTransformBoolTest test) : + VTransformVectorNode(vtransform, 2, properties), _test(test) {} VTransformBoolTest test() const { return _test; } virtual VTransformBoolVectorNode* isa_BoolVector() override { return this; } virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override; NOT_PRODUCT(virtual const char* name() const override { return "BoolVector"; };) + NOT_PRODUCT(virtual void print_spec() const override;) }; class VTransformReductionVectorNode : public VTransformVectorNode { public: // req = 3 -> [ctrl, scalar init, vector] - VTransformReductionVectorNode(VTransform& vtransform, uint number_of_nodes) : - VTransformVectorNode(vtransform, 3, number_of_nodes) {} + VTransformReductionVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties) : + VTransformVectorNode(vtransform, 3, properties) {} virtual VTransformReductionVectorNode* isa_ReductionVector() override { return this; } virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override; NOT_PRODUCT(virtual const char* name() const override { return "ReductionVector"; };) @@ -653,12 +740,16 @@ public: class VTransformMemVectorNode : public VTransformVectorNode { private: const VPointer _vpointer; // with size of the vector +protected: + const TypePtr* _adr_type; public: - VTransformMemVectorNode(VTransform& vtransform, const uint req, uint number_of_nodes, const VPointer& vpointer) : - VTransformVectorNode(vtransform, req, number_of_nodes), - _vpointer(vpointer) {} + VTransformMemVectorNode(VTransform& vtransform, const uint req, const VTransformVectorNodeProperties properties, const VPointer& vpointer, const TypePtr* adr_type) : + VTransformVectorNode(vtransform, req, properties), + _vpointer(vpointer), + _adr_type(adr_type) {} + const GrowableArray& nodes() const { return _nodes; } virtual VTransformMemVectorNode* isa_MemVector() override { return this; } virtual bool is_load_or_store_in_loop() const override { return true; } virtual const VPointer& vpointer() const override { return _vpointer; } @@ -667,8 +758,8 @@ public: class VTransformLoadVectorNode : public VTransformMemVectorNode { public: // req = 3 -> [ctrl, mem, adr] - VTransformLoadVectorNode(VTransform& vtransform, uint number_of_nodes, const VPointer& vpointer) : - VTransformMemVectorNode(vtransform, 3, number_of_nodes, vpointer) {} + VTransformLoadVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties, const VPointer& vpointer, const TypePtr* adr_type) : + VTransformMemVectorNode(vtransform, 3, properties, vpointer, adr_type) {} LoadNode::ControlDependency control_dependency() const; virtual VTransformLoadVectorNode* isa_LoadVector() override { return this; } virtual bool is_load_in_loop() const override { return true; } @@ -679,8 +770,8 @@ public: class VTransformStoreVectorNode : public VTransformMemVectorNode { public: // req = 4 -> [ctrl, mem, adr, val] - VTransformStoreVectorNode(VTransform& vtransform, uint number_of_nodes, const VPointer& vpointer) : - VTransformMemVectorNode(vtransform, 4, number_of_nodes, vpointer) {} + VTransformStoreVectorNode(VTransform& vtransform, const VTransformVectorNodeProperties properties, const VPointer& vpointer, const TypePtr* adr_type) : + VTransformMemVectorNode(vtransform, 4, properties, vpointer, adr_type) {} virtual VTransformStoreVectorNode* isa_StoreVector() override { return this; } virtual bool is_load_in_loop() const override { return false; } virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override; @@ -703,8 +794,8 @@ void VTransformGraph::for_each_memop_in_schedule(Callback callback) const { callback(scalar->node()); } - VTransformVectorNode* vector = vtn->isa_Vector(); - if (vector != nullptr && vector->nodes().at(0)->is_Mem()) { + VTransformMemVectorNode* vector = vtn->isa_MemVector(); + if (vector != nullptr) { for (int j = 0; j < vector->nodes().length(); j++) { callback(vector->nodes().at(j)->as_Mem()); }