8354477: C2 SuperWord: make use of memory edges more explicit

Reviewed-by: kvn, roland
This commit is contained in:
Emanuel Peter 2025-04-21 11:41:45 +00:00
parent 128f2d1cad
commit 4dd64b4971
5 changed files with 62 additions and 55 deletions

View File

@ -34,9 +34,9 @@ void SuperWordVTransformBuilder::build() {
// Connect all vtnodes with their inputs. Possibly create vtnodes for input
// nodes that are outside the loop.
VectorSet vtn_dependencies; // Shared, but cleared for every vtnode.
build_inputs_for_vector_vtnodes(vtn_dependencies);
build_inputs_for_scalar_vtnodes(vtn_dependencies);
VectorSet vtn_memory_dependencies; // Shared, but cleared for every vtnode.
build_inputs_for_vector_vtnodes(vtn_memory_dependencies);
build_inputs_for_scalar_vtnodes(vtn_memory_dependencies);
}
void SuperWordVTransformBuilder::build_vector_vtnodes_for_packed_nodes() {
@ -58,75 +58,77 @@ void SuperWordVTransformBuilder::build_scalar_vtnodes_for_non_packed_nodes() {
}
}
void SuperWordVTransformBuilder::build_inputs_for_vector_vtnodes(VectorSet& vtn_dependencies) {
void SuperWordVTransformBuilder::build_inputs_for_vector_vtnodes(VectorSet& vtn_memory_dependencies) {
for (int i = 0; i < _packset.length(); i++) {
Node_List* pack = _packset.at(i);
Node* p0 = pack->at(0);
VTransformVectorNode* vtn = get_vtnode(p0)->isa_Vector();
assert(vtn != nullptr, "all packs must have vector vtnodes");
vtn_dependencies.clear(); // Add every dependency only once per vtn.
vtn_memory_dependencies.clear(); // Add every memory dependency only once per vtn.
if (p0->is_Load()) {
set_req_with_scalar(p0, vtn, vtn_dependencies, MemNode::Address);
set_req_with_scalar(p0, vtn, MemNode::Address);
for (uint k = 0; k < pack->size(); k++) {
add_memory_dependencies_of_node_to_vtnode(pack->at(k), vtn, vtn_memory_dependencies);
}
} else if (p0->is_Store()) {
set_req_with_scalar(p0, vtn, vtn_dependencies, MemNode::Address);
set_req_with_vector(pack, vtn, vtn_dependencies, MemNode::ValueIn);
set_req_with_scalar(p0, vtn, MemNode::Address);
set_req_with_vector(pack, vtn, MemNode::ValueIn);
for (uint k = 0; k < pack->size(); k++) {
add_memory_dependencies_of_node_to_vtnode(pack->at(k), vtn, vtn_memory_dependencies);
}
} else if (vtn->isa_ReductionVector() != nullptr) {
set_req_with_scalar(p0, vtn, vtn_dependencies, 1); // scalar init
set_req_with_vector(pack, vtn, vtn_dependencies, 2); // vector
set_req_with_scalar(p0, vtn, 1); // scalar init
set_req_with_vector(pack, vtn, 2); // vector
} else {
assert(vtn->isa_ElementWiseVector() != nullptr, "all other vtnodes are handled above");
if (VectorNode::is_scalar_rotate(p0) &&
p0->in(2)->is_Con() &&
Matcher::supports_vector_constant_rotates(p0->in(2)->get_int())) {
set_req_with_vector(pack, vtn, vtn_dependencies, 1);
set_req_with_scalar(p0, vtn, vtn_dependencies, 2); // constant rotation
set_req_with_vector(pack, vtn, 1);
set_req_with_scalar(p0, vtn, 2); // constant rotation
} else if (VectorNode::is_roundopD(p0)) {
set_req_with_vector(pack, vtn, vtn_dependencies, 1);
set_req_with_scalar(p0, vtn, vtn_dependencies, 2); // constant rounding mode
set_req_with_vector(pack, vtn, 1);
set_req_with_scalar(p0, vtn, 2); // constant rounding mode
} else if (p0->is_CMove()) {
// Cmp + Bool + CMove -> VectorMaskCmp + VectorBlend.
set_all_req_with_vectors(pack, vtn, vtn_dependencies);
set_all_req_with_vectors(pack, vtn);
VTransformBoolVectorNode* vtn_mask_cmp = vtn->in(1)->isa_BoolVector();
if (vtn_mask_cmp->test()._is_negated) {
vtn->swap_req(2, 3); // swap if test was negated.
}
} else {
set_all_req_with_vectors(pack, vtn, vtn_dependencies);
set_all_req_with_vectors(pack, vtn);
}
}
for (uint k = 0; k < pack->size(); k++) {
add_dependencies_of_node_to_vtnode(pack->at(k), vtn, vtn_dependencies);
}
}
}
void SuperWordVTransformBuilder::build_inputs_for_scalar_vtnodes(VectorSet& vtn_dependencies) {
void SuperWordVTransformBuilder::build_inputs_for_scalar_vtnodes(VectorSet& vtn_memory_dependencies) {
for (int i = 0; i < _vloop_analyzer.body().body().length(); i++) {
Node* n = _vloop_analyzer.body().body().at(i);
VTransformScalarNode* vtn = get_vtnode(n)->isa_Scalar();
if (vtn == nullptr) { continue; }
vtn_dependencies.clear(); // Add every dependency only once per vtn.
vtn_memory_dependencies.clear(); // Add every dependency only once per vtn.
if (n->is_Load()) {
set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::Address);
set_req_with_scalar(n, vtn, MemNode::Address);
add_memory_dependencies_of_node_to_vtnode(n, vtn, vtn_memory_dependencies);
} else if (n->is_Store()) {
set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::Address);
set_req_with_scalar(n, vtn, vtn_dependencies, MemNode::ValueIn);
set_req_with_scalar(n, vtn, MemNode::Address);
set_req_with_scalar(n, vtn, MemNode::ValueIn);
add_memory_dependencies_of_node_to_vtnode(n, vtn, vtn_memory_dependencies);
} else if (n->is_CountedLoop()) {
continue; // Is "root", has no dependency.
} else if (n->is_Phi()) {
// CountedLoop Phi's: ignore backedge (and entry value).
assert(n->in(0) == _vloop.cl(), "only Phi's from the CountedLoop allowed");
set_req_with_scalar(n, vtn, vtn_dependencies, 0);
set_req_with_scalar(n, vtn, 0);
continue;
} else {
set_all_req_with_scalars(n, vtn, vtn_dependencies);
set_all_req_with_scalars(n, vtn);
}
add_dependencies_of_node_to_vtnode(n, vtn, vtn_dependencies);
}
}
@ -175,10 +177,9 @@ VTransformVectorNode* SuperWordVTransformBuilder::make_vector_vtnode_for_pack(co
return vtn;
}
void SuperWordVTransformBuilder::set_req_with_scalar(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies, const int index) {
void SuperWordVTransformBuilder::set_req_with_scalar(Node* n, VTransformNode* vtn, const int index) {
VTransformNode* req = get_vtnode_or_wrap_as_input_scalar(n->in(index));
vtn->set_req(index, req);
vtn_dependencies.set(req->_idx);
}
// Either get the existing vtnode vector input (when input is a pack), or else make a
@ -273,46 +274,42 @@ VTransformNode* SuperWordVTransformBuilder::get_vtnode_or_wrap_as_input_scalar(N
return vtn;
}
void SuperWordVTransformBuilder::set_req_with_vector(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies, int j) {
void SuperWordVTransformBuilder::set_req_with_vector(const Node_List* pack, VTransformNode* vtn, int j) {
VTransformNode* req = get_or_make_vtnode_vector_input_at_index(pack, j);
vtn->set_req(j, req);
vtn_dependencies.set(req->_idx);
}
void SuperWordVTransformBuilder::set_all_req_with_scalars(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies) {
void SuperWordVTransformBuilder::set_all_req_with_scalars(Node* n, VTransformNode* vtn) {
assert(vtn->req() == n->req(), "scalars must have same number of reqs");
for (uint j = 0; j < n->req(); j++) {
Node* def = n->in(j);
if (def == nullptr) { continue; }
set_req_with_scalar(n, vtn, vtn_dependencies, j);
set_req_with_scalar(n, vtn, j);
}
}
void SuperWordVTransformBuilder::set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies) {
void SuperWordVTransformBuilder::set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn) {
Node* p0 = pack->at(0);
assert(vtn->req() <= p0->req(), "must have at at most as many reqs");
// Vectors have no ctrl, so ignore it.
for (uint j = 1; j < vtn->req(); j++) {
Node* def = p0->in(j);
if (def == nullptr) { continue; }
set_req_with_vector(pack, vtn, vtn_dependencies, j);
set_req_with_vector(pack, vtn, j);
}
}
void SuperWordVTransformBuilder::add_dependencies_of_node_to_vtnode(Node*n, VTransformNode* vtn, VectorSet& vtn_dependencies) {
void SuperWordVTransformBuilder::add_memory_dependencies_of_node_to_vtnode(Node*n, VTransformNode* vtn, VectorSet& vtn_memory_dependencies) {
for (VLoopDependencyGraph::PredsIterator preds(_vloop_analyzer.dependency_graph(), n); !preds.done(); preds.next()) {
Node* pred = preds.current();
if (!_vloop.in_bb(pred)) { continue; }
if (!preds.is_current_memory_edge()) { continue; }
// Only add memory dependencies to memory nodes. All others are taken care of with the req.
if (n->is_Mem() && !pred->is_Mem()) { continue; }
// Only track every memory edge once.
VTransformNode* dependency = get_vtnode(pred);
if (vtn_memory_dependencies.test_set(dependency->_idx)) { continue; }
// Reduction self-cycle?
if (vtn == dependency && _vloop_analyzer.reductions().is_marked_reduction(n)) { continue; }
if (vtn_dependencies.test_set(dependency->_idx)) { continue; }
vtn->add_dependency(dependency); // Add every dependency only once per vtn.
assert(n->is_Mem() && pred->is_Mem(), "only memory edges");
vtn->add_memory_dependency(dependency); // Add every dependency only once per vtn.
}
}

View File

@ -54,8 +54,8 @@ private:
void build();
void build_vector_vtnodes_for_packed_nodes();
void build_scalar_vtnodes_for_non_packed_nodes();
void build_inputs_for_vector_vtnodes(VectorSet& vtn_dependencies);
void build_inputs_for_scalar_vtnodes(VectorSet& vtn_dependencies);
void build_inputs_for_vector_vtnodes(VectorSet& vtn_memory_dependencies);
void build_inputs_for_scalar_vtnodes(VectorSet& vtn_memory_dependencies);
// Helper methods for building VTransform.
VTransformNode* get_vtnode_or_null(Node* n) const {
@ -77,11 +77,11 @@ private:
VTransformVectorNode* make_vector_vtnode_for_pack(const Node_List* pack) const;
VTransformNode* get_or_make_vtnode_vector_input_at_index(const Node_List* pack, const int index);
VTransformNode* get_vtnode_or_wrap_as_input_scalar(Node* n);
void set_req_with_scalar(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies, const int index);
void set_req_with_vector(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies, const int index);
void set_all_req_with_scalars(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies);
void set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn, VectorSet& vtn_dependencies);
void add_dependencies_of_node_to_vtnode(Node* n, VTransformNode* vtn, VectorSet& vtn_dependencies);
void set_req_with_scalar(Node* n, VTransformNode* vtn, const int index);
void set_req_with_vector(const Node_List* pack, VTransformNode* vtn, const int index);
void set_all_req_with_scalars(Node* n, VTransformNode* vtn);
void set_all_req_with_vectors(const Node_List* pack, VTransformNode* vtn);
void add_memory_dependencies_of_node_to_vtnode(Node* n, VTransformNode* vtn, VectorSet& vtn_memory_dependencies);
};
#endif // SHARE_OPTO_SUPERWORD_VTRANSFORM_BUILDER_HPP

View File

@ -399,6 +399,7 @@ VLoopDependencyGraph::PredsIterator::PredsIterator(const VLoopDependencyGraph& d
_node(node),
_dependency_node(dependency_graph.dependency_node(node)),
_current(nullptr),
_is_current_memory_edge(false),
_next_pred(0),
_end_pred(node->req()),
_next_memory_pred(0),
@ -418,11 +419,14 @@ VLoopDependencyGraph::PredsIterator::PredsIterator(const VLoopDependencyGraph& d
void VLoopDependencyGraph::PredsIterator::next() {
if (_next_pred < _end_pred) {
_current = _node->in(_next_pred++);
_is_current_memory_edge = false;
} else if (_next_memory_pred < _end_memory_pred) {
int pred_bb_idx = _dependency_node->memory_pred_edge(_next_memory_pred++);
_current = _dependency_graph._body.body().at(pred_bb_idx);
_is_current_memory_edge = true;
} else {
_current = nullptr; // done
_is_current_memory_edge = false;
}
}

View File

@ -648,6 +648,7 @@ public:
const DependencyNode* _dependency_node;
Node* _current;
bool _is_current_memory_edge;
// Iterate in node->in(i)
int _next_pred;
@ -665,6 +666,10 @@ public:
assert(!done(), "not done yet");
return _current;
}
bool is_current_memory_edge() const {
assert(!done(), "not done yet");
return _is_current_memory_edge;
}
};
};

View File

@ -264,7 +264,8 @@ public:
const VTransformNodeIDX _idx;
private:
// _in is split into required inputs (_req), and additional dependencies.
// _in is split into required inputs (_req, i.e. all data dependencies),
// and memory dependencies.
const uint _req;
GrowableArray<VTransformNode*> _in;
GrowableArray<VTransformNode*> _out;
@ -294,7 +295,7 @@ public:
_in.at_put(j, tmp);
}
void add_dependency(VTransformNode* n) {
void add_memory_dependency(VTransformNode* n) {
assert(n != nullptr, "no need to add nullptr");
_in.push(n);
n->add_out(this);