8366427: C2 SuperWord: refactor VTransform scalar nodes

Reviewed-by: mhaessig, chagedorn, kvn
This commit is contained in:
Emanuel Peter 2025-09-01 13:48:25 +00:00
parent 5110d54d93
commit 99223eea03
4 changed files with 157 additions and 43 deletions

View File

@ -53,7 +53,19 @@ void SuperWordVTransformBuilder::build_scalar_vtnodes_for_non_packed_nodes() {
for (int i = 0; i < _vloop_analyzer.body().body().length(); i++) {
Node* n = _vloop_analyzer.body().body().at(i);
if (_packset.get_pack(n) != nullptr) { continue; }
VTransformScalarNode* vtn = new (_vtransform.arena()) VTransformScalarNode(_vtransform, n);
VTransformNode* vtn = nullptr;
if (n->is_Load() || n->is_Store()) {
MemNode* mem = n->as_Mem();
const VPointer& mem_p = _vloop_analyzer.vpointers().vpointer(mem);
vtn = new (_vtransform.arena()) VTransformMemopScalarNode(_vtransform, mem, mem_p);
} else if (n->is_Phi()) {
vtn = new (_vtransform.arena()) VTransformLoopPhiNode(_vtransform, n->as_Phi());
} else if (n->is_CFG()) {
vtn = new (_vtransform.arena()) VTransformCFGNode(_vtransform, n);
} else {
vtn = new (_vtransform.arena()) VTransformDataScalarNode(_vtransform, n);
}
map_node_to_vtnode(n, vtn);
}
}
@ -108,8 +120,8 @@ void SuperWordVTransformBuilder::build_inputs_for_vector_vtnodes(VectorSet& vtn_
void SuperWordVTransformBuilder::build_inputs_for_scalar_vtnodes(VectorSet& vtn_memory_dependencies) {
for (int i = 0; i < _vloop_analyzer.body().body().length(); i++) {
Node* n = _vloop_analyzer.body().body().at(i);
VTransformScalarNode* vtn = get_vtnode(n)->isa_Scalar();
if (vtn == nullptr) { continue; }
VTransformNode* vtn = get_vtnode(n);
if (vtn->isa_Vector() != nullptr) { continue; }
vtn_memory_dependencies.clear(); // Add every dependency only once per vtn.
if (n->is_Load()) {
@ -178,7 +190,7 @@ VTransformVectorNode* SuperWordVTransformBuilder::make_vector_vtnode_for_pack(co
}
void SuperWordVTransformBuilder::init_req_with_scalar(Node* n, VTransformNode* vtn, const int index) {
VTransformNode* req = get_vtnode_or_wrap_as_input_scalar(n->in(index));
VTransformNode* req = get_vtnode_or_wrap_as_outer(n->in(index));
vtn->init_req(index, req);
}
@ -210,7 +222,7 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i
Node* same_input = _packset.same_inputs_at_index_or_null(pack, index);
if (same_input == nullptr && p0->in(index) == _vloop.iv()) {
// PopulateIndex: [iv+0, iv+1, iv+2, ...]
VTransformNode* iv_vtn = get_vtnode_or_wrap_as_input_scalar(_vloop.iv());
VTransformNode* iv_vtn = get_vtnode_or_wrap_as_outer(_vloop.iv());
BasicType p0_bt = _vloop_analyzer.types().velt_basic_type(p0);
// If we have subword type, take that type directly. If p0 is some ConvI2L/F/D,
// then the p0_bt can also be L/F/D but we need to produce ints for the input of
@ -222,7 +234,7 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i
}
if (same_input != nullptr) {
VTransformNode* same_input_vtn = get_vtnode_or_wrap_as_input_scalar(same_input);
VTransformNode* same_input_vtn = get_vtnode_or_wrap_as_outer(same_input);
if (index == 2 && VectorNode::is_shift(p0)) {
// Scalar shift count for vector shift operation: vec2 = shiftV(vec1, scalar_count)
// Scalar shift operations masks the shift count, but the vector shift does not, so
@ -264,12 +276,12 @@ VTransformNode* SuperWordVTransformBuilder::get_or_make_vtnode_vector_input_at_i
ShouldNotReachHere();
}
VTransformNode* SuperWordVTransformBuilder::get_vtnode_or_wrap_as_input_scalar(Node* n) {
VTransformNode* SuperWordVTransformBuilder::get_vtnode_or_wrap_as_outer(Node* n) {
VTransformNode* vtn = get_vtnode_or_null(n);
if (vtn != nullptr) { return vtn; }
assert(!_vloop.in_bb(n), "only nodes outside the loop can be input nodes to the loop");
vtn = new (_vtransform.arena()) VTransformInputScalarNode(_vtransform, n);
vtn = new (_vtransform.arena()) VTransformOuterNode(_vtransform, n);
map_node_to_vtnode(n, vtn);
return vtn;
}

View File

@ -76,7 +76,7 @@ private:
VTransformVectorNode* make_vector_vtnode_for_pack(const Node_List* pack) const;
VTransformNode* get_or_make_vtnode_vector_input_at_index(const Node_List* pack, const int index);
VTransformNode* get_vtnode_or_wrap_as_input_scalar(Node* n);
VTransformNode* get_vtnode_or_wrap_as_outer(Node* n);
void init_req_with_scalar(Node* n, VTransformNode* vtn, const int index);
void init_req_with_vector(const Node_List* pack, VTransformNode* vtn, const int index);
void set_all_req_with_scalars(Node* n, VTransformNode* vtn);

View File

@ -282,8 +282,8 @@ void VTransform::apply_speculative_aliasing_runtime_checks() {
if (visited.test(use->_idx)) {
// The use node was already visited, i.e. is higher up in the schedule.
// The "out" edge thus points backward, i.e. it is violated.
const VPointer& vp1 = vtn->vpointer(_vloop_analyzer);
const VPointer& vp2 = use->vpointer(_vloop_analyzer);
const VPointer& vp1 = vtn->vpointer();
const VPointer& vp2 = use->vpointer();
#ifdef ASSERT
if (_trace._speculative_aliasing_analysis || _trace._speculative_runtime_checks) {
tty->print_cr("\nViolated Weak Edge:");
@ -630,7 +630,7 @@ bool VTransformGraph::has_store_to_load_forwarding_failure(const VLoopAnalyzer&
for (int i = 0; i < _schedule.length(); i++) {
VTransformNode* vtn = _schedule.at(i);
if (vtn->is_load_or_store_in_loop()) {
const VPointer& p = vtn->vpointer(vloop_analyzer);
const VPointer& p = vtn->vpointer();
if (p.is_valid()) {
VTransformVectorNode* vector = vtn->isa_Vector();
bool is_load = vtn->is_load_in_loop();
@ -708,7 +708,27 @@ Node* VTransformApplyState::transformed_node(const VTransformNode* vtn) const {
return n;
}
VTransformApplyResult VTransformScalarNode::apply(VTransformApplyState& apply_state) const {
VTransformApplyResult VTransformMemopScalarNode::apply(VTransformApplyState& apply_state) const {
// This was just wrapped. Now we simply unwap without touching the inputs.
return VTransformApplyResult::make_scalar(_node);
}
VTransformApplyResult VTransformDataScalarNode::apply(VTransformApplyState& apply_state) const {
// This was just wrapped. Now we simply unwap without touching the inputs.
return VTransformApplyResult::make_scalar(_node);
}
VTransformApplyResult VTransformLoopPhiNode::apply(VTransformApplyState& apply_state) const {
// This was just wrapped. Now we simply unwap without touching the inputs.
return VTransformApplyResult::make_scalar(_node);
}
VTransformApplyResult VTransformCFGNode::apply(VTransformApplyState& apply_state) const {
// This was just wrapped. Now we simply unwap without touching the inputs.
return VTransformApplyResult::make_scalar(_node);
}
VTransformApplyResult VTransformOuterNode::apply(VTransformApplyState& apply_state) const {
// This was just wrapped. Now we simply unwap without touching the inputs.
return VTransformApplyResult::make_scalar(_node);
}
@ -861,7 +881,7 @@ VTransformApplyResult VTransformLoadVectorNode::apply(VTransformApplyState& appl
// Set the memory dependency of the LoadVector as early as possible.
// Walk up the memory chain, and ignore any StoreVector that provably
// does not have any memory dependency.
const VPointer& load_p = vpointer(apply_state.vloop_analyzer());
const VPointer& load_p = vpointer();
while (mem->is_StoreVector()) {
VPointer store_p(mem->as_Mem(), apply_state.vloop());
if (store_p.never_overlaps_with(load_p)) {
@ -983,7 +1003,24 @@ void VTransformNode::print_node_idx(const VTransformNode* vtn) {
}
}
void VTransformScalarNode::print_spec() const {
void VTransformMemopScalarNode::print_spec() const {
tty->print("node[%d %s] ", _node->_idx, _node->Name());
_vpointer.print_on(tty, false);
}
void VTransformDataScalarNode::print_spec() const {
tty->print("node[%d %s]", _node->_idx, _node->Name());
}
void VTransformLoopPhiNode::print_spec() const {
tty->print("node[%d %s]", _node->_idx, _node->Name());
}
void VTransformCFGNode::print_spec() const {
tty->print("node[%d %s]", _node->_idx, _node->Name());
}
void VTransformOuterNode::print_spec() const {
tty->print("node[%d %s]", _node->_idx, _node->Name());
}
@ -1011,5 +1048,9 @@ void VTransformVectorNode::print_spec() const {
tty->print("%d %s", n->_idx, n->Name());
}
tty->print("]");
if (is_load_or_store_in_loop()) {
tty->print(" ");
vpointer().print_on(tty, false);
}
}
#endif

View File

@ -60,8 +60,11 @@
typedef int VTransformNodeIDX;
class VTransformNode;
class VTransformScalarNode;
class VTransformInputScalarNode;
class VTransformMemopScalarNode;
class VTransformDataScalarNode;
class VTransformLoopPhiNode;
class VTransformCFGNode;
class VTransformOuterNode;
class VTransformVectorNode;
class VTransformElementWiseVectorNode;
class VTransformBoolVectorNode;
@ -422,8 +425,8 @@ public:
return false;
}
virtual VTransformScalarNode* isa_Scalar() { return nullptr; }
virtual VTransformInputScalarNode* isa_InputScalar() { return nullptr; }
virtual VTransformMemopScalarNode* isa_MemopScalar() { return nullptr; }
virtual VTransformOuterNode* isa_Outer() { return nullptr; }
virtual VTransformVectorNode* isa_Vector() { return nullptr; }
virtual VTransformElementWiseVectorNode* isa_ElementWiseVector() { return nullptr; }
virtual VTransformBoolVectorNode* isa_BoolVector() { return nullptr; }
@ -434,7 +437,7 @@ public:
virtual bool is_load_in_loop() const { return false; }
virtual bool is_load_or_store_in_loop() const { return false; }
virtual const VPointer& vpointer(const VLoopAnalyzer& vloop_analyzer) const { ShouldNotReachHere(); }
virtual const VPointer& vpointer() const { ShouldNotReachHere(); }
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const = 0;
@ -448,34 +451,92 @@ public:
NOT_PRODUCT(static void print_node_idx(const VTransformNode* vtn);)
};
// Identity transform for scalar nodes.
class VTransformScalarNode : public VTransformNode {
// Identity transform for scalar loads and stores.
class VTransformMemopScalarNode : public VTransformNode {
private:
MemNode* _node;
const VPointer _vpointer;
public:
VTransformMemopScalarNode(VTransform& vtransform, MemNode* n, const VPointer& vpointer) :
VTransformNode(vtransform, n->req()), _node(n), _vpointer(vpointer)
{
assert(node()->is_Load() || node()->is_Store(), "must be memop");
}
MemNode* node() const { return _node; }
virtual VTransformMemopScalarNode* isa_MemopScalar() override { return this; }
virtual bool is_load_in_loop() const override { return _node->is_Load(); }
virtual bool is_load_or_store_in_loop() const override { return true; }
virtual const VPointer& vpointer() const override { return _vpointer; }
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
NOT_PRODUCT(virtual const char* name() const override { return "MemopScalar"; };)
NOT_PRODUCT(virtual void print_spec() const override;)
};
// Identity transform for scalar data nodes.
class VTransformDataScalarNode : public VTransformNode {
private:
Node* _node;
public:
VTransformScalarNode(VTransform& vtransform, Node* n) :
VTransformNode(vtransform, n->req()), _node(n) {}
Node* node() const { return _node; }
virtual VTransformScalarNode* isa_Scalar() override { return this; }
virtual bool is_load_in_loop() const override { return _node->is_Load(); }
virtual bool is_load_or_store_in_loop() const override { return _node->is_Load() || _node->is_Store(); }
virtual const VPointer& vpointer(const VLoopAnalyzer& vloop_analyzer) const override { return vloop_analyzer.vpointers().vpointer(node()->as_Mem()); }
VTransformDataScalarNode(VTransform& vtransform, Node* n) :
VTransformNode(vtransform, n->req()), _node(n)
{
assert(!_node->is_Mem() && !_node->is_Phi() && !_node->is_CFG(), "must be data node: %s", _node->Name());
}
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
NOT_PRODUCT(virtual const char* name() const override { return "Scalar"; };)
NOT_PRODUCT(virtual const char* name() const override { return "DataScalar"; };)
NOT_PRODUCT(virtual void print_spec() const override;)
};
// Identity transform for loop head phi nodes.
class VTransformLoopPhiNode : public VTransformNode {
private:
PhiNode* _node;
public:
VTransformLoopPhiNode(VTransform& vtransform, PhiNode* n) :
VTransformNode(vtransform, n->req()), _node(n)
{
assert(_node->in(0)->is_Loop(), "phi ctrl must be Loop: %s", _node->in(0)->Name());
}
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
NOT_PRODUCT(virtual const char* name() const override { return "LoopPhi"; };)
NOT_PRODUCT(virtual void print_spec() const override;)
};
// Identity transform for CFG nodes.
class VTransformCFGNode : public VTransformNode {
private:
Node* _node;
public:
VTransformCFGNode(VTransform& vtransform, Node* n) :
VTransformNode(vtransform, n->req()), _node(n)
{
assert(_node->is_CFG(), "must be CFG node: %s", _node->Name());
}
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
NOT_PRODUCT(virtual const char* name() const override { return "CFG"; };)
NOT_PRODUCT(virtual void print_spec() const override;)
};
// Wrapper node for nodes outside the loop that are inputs to nodes in the loop.
// Since we want the loop-internal nodes to be able to reference all inputs as vtnodes,
// we must wrap the inputs that are outside the loop into special vtnodes, too.
class VTransformInputScalarNode : public VTransformScalarNode {
class VTransformOuterNode : public VTransformNode {
private:
Node* _node;
public:
VTransformInputScalarNode(VTransform& vtransform, Node* n) :
VTransformScalarNode(vtransform, n) {}
virtual VTransformInputScalarNode* isa_InputScalar() override { return this; }
virtual bool is_load_in_loop() const override { return false; }
virtual bool is_load_or_store_in_loop() const override { return false; }
NOT_PRODUCT(virtual const char* name() const override { return "InputScalar"; };)
VTransformOuterNode(VTransform& vtransform, Node* n) :
VTransformNode(vtransform, n->req()), _node(n) {}
virtual VTransformOuterNode* isa_Outer() override { return this; }
virtual VTransformApplyResult apply(VTransformApplyState& apply_state) const override;
NOT_PRODUCT(virtual const char* name() const override { return "Outer"; };)
NOT_PRODUCT(virtual void print_spec() const override;)
};
// Transform produces a ReplicateNode, replicating the input to all vector lanes.
@ -598,7 +659,7 @@ public:
virtual VTransformMemVectorNode* isa_MemVector() override { return this; }
virtual bool is_load_or_store_in_loop() const override { return true; }
virtual const VPointer& vpointer(const VLoopAnalyzer& vloop_analyzer) const override { return _vpointer; }
virtual const VPointer& vpointer() const override { return _vpointer; }
};
class VTransformLoadVectorNode : public VTransformMemVectorNode {
@ -632,12 +693,12 @@ void VTransformGraph::for_each_memop_in_schedule(Callback callback) const {
for (int i = 0; i < _schedule.length(); i++) {
VTransformNode* vtn = _schedule.at(i);
// We can ignore input nodes, they are outside the loop.
if (vtn->isa_InputScalar() != nullptr) { continue; }
// We must ignore nodes outside the loop.
if (vtn->isa_Outer() != nullptr) { continue; }
VTransformScalarNode* scalar = vtn->isa_Scalar();
if (scalar != nullptr && scalar->node()->is_Mem()) {
callback(scalar->node()->as_Mem());
VTransformMemopScalarNode* scalar = vtn->isa_MemopScalar();
if (scalar != nullptr) {
callback(scalar->node());
}
VTransformVectorNode* vector = vtn->isa_Vector();