diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index 1d670253bbf..2ebc06a4bdf 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -1314,22 +1314,30 @@ bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const { return true; } -//------------------------------clone_up_backedge_goo-------------------------- -// Return the appropriate node to use on the new loop's data-flow path derived from 'n'. -// If no suitable existing node can be reused, create a private clone controlled by -// the 'preheader_ctrl'. +//------------------------------resolve_value_for_preheader-------------------------- +// Determine the appropriate node to use on the data-flow path of a newly created +// loop, starting from node 'n'. If an existing node cannot be safely reused, +// recursively create a private clone whose control is anchored in 'preheader_ctrl'. // -// If 'back_ctrl' is not null: -// - Clone a private version of node 'n' in 'preheader_ctrl' if it resides in the 'back_ctrl' block. -// - Otherwise, return 'n' unchanged. +// The behavior depends on the selected ResolveMode: // -// If 'back_ctrl' is null: (Specially for pre-loop exit in resolve_input_for_drain_or_post()) -// - Clone 'n' into 'preheader_ctrl' if its block does not strictly dominate 'preheader_ctrl'. -// - Otherwise, return 'n'. -Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones) { - bool is_from_preloop_exit = (back_ctrl == nullptr); +// FromBackedge: +// - If 'n' is controlled by 'back_ctrl', clone a private copy of 'n' and +// anchor it in 'preheader_ctrl'. +// - Otherwise, reuse 'n' unchanged. +// +// FromPreLoopExit: +// - If the control of 'n' does not strictly dominate 'preheader_ctrl', +// clone 'n' and anchor the clone in 'preheader_ctrl'. +// - Otherwise, reuse 'n' unchanged. +// +// The method ensures that each node is visited at most once, reusing previously +// created clones when available. If cloning is required, all non-control inputs +// are recursively resolved to preserve correctness of the new data-flow. +Node *PhaseIdealLoop::resolve_value_for_preheader(ResolveMode mode, Node* back_ctrl, Node* preheader_ctrl, + Node* n, VectorSet& visited, Node_Stack& clones) { bool requires_clone_from_preloop_exit = !is_dominator(get_ctrl(n), preheader_ctrl); - if (is_from_preloop_exit) { + if (mode == FromPreLoopExit) { // Specially for pre-loop exit in resolve_input_for_drain_or_post() if (!requires_clone_from_preloop_exit) { return n; } } else { @@ -1345,7 +1353,7 @@ Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctr Node *x = nullptr; // If required, a clone of 'n' // Check for 'n' being pinned in the backedge. if (n->in(0) && (n->in(0) == back_ctrl || - (is_from_preloop_exit && requires_clone_from_preloop_exit))) { + (mode == FromPreLoopExit && requires_clone_from_preloop_exit))) { assert(clones.find(n->_idx) == nullptr, "dead loop"); x = n->clone(); // Clone a copy of 'n' to preheader clones.push(x, n->_idx); @@ -1356,7 +1364,7 @@ Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctr // If there are no changes we can just return 'n', otherwise // we need to clone a private copy and change it. for (uint i = 1; i < n->req(); i++) { - Node *g = clone_up_backedge_goo(back_ctrl, preheader_ctrl, n->in(i), visited, clones); + Node* g = resolve_value_for_preheader(mode, back_ctrl, preheader_ctrl, n->in(i), visited, clones); if (g != n->in(i)) { if (!x) { assert(clones.find(n->_idx) == nullptr, "dead loop"); @@ -1369,7 +1377,7 @@ Node *PhaseIdealLoop::clone_up_backedge_goo(Node *back_ctrl, Node *preheader_ctr if (x) { // x can legally float to pre-header location register_new_node(x, preheader_ctrl); return x; - } else if (!is_from_preloop_exit) { // raise n to cover LCA of uses + } else if (mode != FromPreLoopExit) { // raise n to cover LCA of uses set_ctrl(n, find_non_split_ctrl(back_ctrl->in(0))); } return n; @@ -1398,19 +1406,21 @@ Node* PhaseIdealLoop::resolve_input_for_drain_or_post(Node* post_head_ctrl, Vect Node* main_phi, CloneLoopMode mode) { CountedLoopNode* main_head = main_phi->in(0)->as_CountedLoop(); Node* main_backedge_ctrl = main_head->back_control(); - // For the post loop, we call clone_up_backedge_goo() to obtain the fall-out values + // For the post loop, we call resolve_value_for_preheader() to obtain the fall-out values // from the main loop, which serve as the fall-in values for the post loop. if (mode == ControlAroundStripMined) { - return clone_up_backedge_goo(main_backedge_ctrl, - post_head_ctrl, - main_phi->in(LoopNode::LoopBackControl), - visited, clones); + return resolve_value_for_preheader(FromBackedge, main_backedge_ctrl, + post_head_ctrl, + main_phi->in(LoopNode::LoopBackControl), + visited, clones); } + assert(mode == InsertVectorizedDrain, "We don't support other modes"); + // For drain loop, after inserting zero trip guard for the vectorized drain loop, // we now need to make the fall-in values to the vectorized drain // loop come from phis merging exit values from the pre loop and - // the main loop. + // the main loop, see "drain_input". // (new edges are marked with "*/*" or "*\*".) // // pre loop exit 'pre_incr' @@ -1423,7 +1433,7 @@ Node* PhaseIdealLoop::resolve_input_for_drain_or_post(Node* post_head_ctrl, Vect // | | | \ / // | IfTrue | ----> PhiNode('main_phi') // | | v | | - // | -------loop end ---- addI('iv_after_main') + // | -------loop end ---- addI('main_exit_value') // \ | | // \ IfFalse | // \ / 'pre_incr' | @@ -1451,45 +1461,32 @@ Node* PhaseIdealLoop::resolve_input_for_drain_or_post(Node* post_head_ctrl, Vect // post zero-trip guard // ... // - // We look for an existing Phi node 'drain_input' among the uses of 'iv_after_main'. + // We look for an existing Phi node 'drain_input' among the uses of 'main_exit_value'. // // If no valid Phi is found, we create a new Phi that merges output data edges // from both the pre-loop and main loop. The example here is test5() added in // TestVectorizedDrainLoop.java. Node* drain_input = nullptr; - Node* iv_after_main = main_phi->in(LoopNode::LoopBackControl); - if (get_ctrl(iv_after_main) != main_backedge_ctrl) { - // We try to look up target phi from all uses of node 'iv_after_main'. - drain_input = find_merge_phi_for_vectorized_drain(iv_after_main, main_merge_region); - } + Node* main_backedge = main_phi->in(LoopNode::LoopBackControl); + drain_input = find_merge_phi_for_vectorized_drain(main_backedge, main_merge_region); if (drain_input == nullptr) { // Make the fall-in values to the vectorized drain-loop come from a phi node // merging the data from the vector main-loop and the pre-loop. - // If Node 'iv_after_main' lives in the 'main_backedge_ctrl' block, we clone a - // private version of 'iv_after_main' in 'main_exit' block and return that, - // otherwise return 'iv_after_main'. - iv_after_main = clone_up_backedge_goo(main_backedge_ctrl, main_merge_region->in(2), - iv_after_main, visited, clones); - drain_input = PhiNode::make(main_merge_region, iv_after_main); - Node* pre_incr = main_phi->in(LoopNode::EntryControl); - if (has_ctrl(pre_incr) && !is_dominator(get_ctrl(pre_incr), main_merge_region->in(1))) { + Node* main_exit_value = resolve_value_for_preheader(FromBackedge, main_backedge_ctrl, main_merge_region->in(2), + main_backedge, visited, clones); + drain_input = PhiNode::make(main_merge_region, main_exit_value); + Node* main_entry = main_phi->in(LoopNode::EntryControl); + Node* pre_exit_value = main_entry; + if (has_ctrl(main_entry) && !is_dominator(get_ctrl(main_entry), main_merge_region->in(1))) { // If the entry input of the main_phi is not directly from pre-loop but has been preprocessed // by some nodes floating below the zero-trip guard of main-loop, we need to clone a private // version of these nodes for vectorized drain loop. - pre_incr = clone_up_backedge_goo(nullptr, main_merge_region->in(1), pre_incr, visited, clones); - } - drain_input->set_req(1, pre_incr); - // If inserting a new Phi, check for prior hits - Node* hit = _igvn.hash_find_insert(drain_input); - if (hit == nullptr) { - // Register new phi - _igvn.register_new_node_with_optimizer(drain_input); - } else { - // Remove the new phi from the graph and use the hit - _igvn.remove_dead_node(drain_input); - drain_input = hit; + pre_exit_value = resolve_value_for_preheader(FromPreLoopExit, nullptr, main_merge_region->in(1), + main_entry, visited, clones); } + drain_input->set_req(1, pre_exit_value); + _igvn.register_new_node_with_optimizer(drain_input); set_ctrl(drain_input, main_merge_region); } return drain_input; @@ -1635,10 +1632,10 @@ void PhaseIdealLoop::insert_pre_post_loops(IdealLoopTree *loop, Node_List &old_n Node* main_phi = main_head->out(i2); if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0) { Node* pre_phi = old_new[main_phi->_idx]; - Node* fallpre = clone_up_backedge_goo(pre_head->back_control(), - main_head->skip_strip_mined()->in(LoopNode::EntryControl), - pre_phi->in(LoopNode::LoopBackControl), - visited, clones); + Node* fallpre = resolve_value_for_preheader(FromBackedge, pre_head->back_control(), + main_head->skip_strip_mined()->in(LoopNode::EntryControl), + pre_phi->in(LoopNode::LoopBackControl), + visited, clones); _igvn.hash_delete(main_phi); main_phi->set_req(LoopNode::EntryControl, fallpre); } @@ -1828,6 +1825,9 @@ void PhaseIdealLoop::rewire_ctrl_for_drain_loop_nodes(CountedLoopNode* main_head for (uint i = 0; i < start->outcnt(); i++) { Node* loop_node = start->raw_out(i); if (loop_node->in(0) == start && loop_node->_idx >= new_counter) { + assert(start == main_head->skip_strip_mined()->in(LoopNode::EntryControl) || + start == main_head->skip_assertion_predicates_with_halt(), + "initialize_assertion_predicates_for_post_loop() must be extended to avoid sinking assertion predicates"); _igvn.replace_input_of(loop_node, 0, new_start); --i; } @@ -1941,10 +1941,10 @@ Node* PhaseIdealLoop::insert_post_or_drain_loop(IdealLoopTree* loop, Node_List& post_end->_prob = PROB_FAIR; // Step 2: Find some key nodes which control the execution paths of the zero trip guard. - // Step 2.1: Find 'zero_ctrl' which will be the control input of the zero trip guard. + // Step 2.1: Get 'zero_ctrl' which will be the control input of the zero trip guard. Node* zero_ctrl = nullptr; if (mode == InsertVectorizedDrain) { - // For vectorized drain loop, 'zero_ctrl' should be the node merges exits + // For vectorized drain loop, 'zero_ctrl' should be the node that merges exits // from the main loop and the pre loop. zero_ctrl = main_exit->unique_ctrl_out_or_null(); assert(zero_ctrl != nullptr && zero_ctrl->is_Region(), @@ -2255,10 +2255,10 @@ void PhaseIdealLoop::create_assertion_predicates_at_main_or_post_loop(CountedLoo } // Rewire any control dependent nodes on the old target loop entry before adding Assertion Predicate related nodes. -// These have been added by PhaseIdealLoop::clone_up_backedge_goo() and assume to be ending up at the target loop entry +// These have been added by PhaseIdealLoop::resolve_value_for_preheader() and assume to be ending up at the target loop entry // which is no longer the case when adding additional Assertion Predicates. Fix this by rewiring these nodes to the new // target loop entry which corresponds to the tail of the last Assertion Predicate before the target loop. This is safe -// to do because these control dependent nodes on the old target loop entry created by clone_up_backedge_goo() were +// to do because these control dependent nodes on the old target loop entry created by resolve_value_for_preheader() were // pinned on the loop backedge before. The Assertion Predicates are not control dependent on these nodes in any way. void PhaseIdealLoop::rewire_old_target_loop_entry_dependency_to_new_entry( CountedLoopNode* target_loop_head, const Node* old_target_loop_entry, diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index d7d051eb774..84de319dedd 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -1429,13 +1429,13 @@ public: enum CloneLoopMode { IgnoreStripMined = 0, // Only clone inner strip mined loop CloneIncludesStripMined = 1, // clone both inner and outer strip mined loops - ControlAroundStripMined = 2, // Only clone inner strip mined loop, - // result control flow branches - // either to inner clone or outer - // strip mined loop. - InsertVectorizedDrain = 3 // Only clone inner strip mined vector loop, - // result control flow branches to inner clone or - // scalar post loop + ControlAroundStripMined = 2, // Only clone the inner strip-mined loop and insert + // control flow around it. Exit control flow + // branches either to inner clone or to the outer + // strip-mined loop. + InsertVectorizedDrain = 3 // Only clone the inner strip-mined vector loop and + // insert control flow that branches either to the + // cloned inner loop or to the scalar post loop. }; void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth, CloneLoopMode mode, Node* side_by_side_idom = nullptr); @@ -1489,18 +1489,12 @@ public: // Add a vectorized drain loop between the main loop and the current post loop. void insert_vectorized_drain_loop(IdealLoopTree* loop, Node_List& old_new); - // Return the appropriate node to use on the new loop's data-flow path derived from 'n'. - // If no suitable existing node can be reused, create a private clone controlled by the - // 'preheader_ctrl'. - // - // If 'back_ctrl' is not null: - // - Clone a private version of node 'n' in 'preheader_ctrl' if it resides in the 'back_ctrl' block. - // - Otherwise, return 'n' unchanged. - // - // If 'back_ctrl' is null: (Specially for pre-loop exit in resolve_input_for_drain_or_post()) - // - Clone 'n' into 'preheader_ctrl' if its block does not strictly dominate 'preheader_ctrl'. - // - Otherwise, return 'n'. - Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones ); + enum ResolveMode { + FromPreLoopExit, // Resolve the correct value coming from the pre-loop exit + FromBackedge, // Resolve the correct node coming from the main loop backedge + }; + Node* resolve_value_for_preheader(ResolveMode mode, Node* back_ctrl, Node* preheader_ctrl, + Node* n, VectorSet& visited, Node_Stack& clones ); // Determine and obtain the correct fall-in values for either the drain loop or the post loop. Node* resolve_input_for_drain_or_post(Node* post_head_ctrl, VectorSet& visited, diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp index 65193bc4ffa..e17329177ef 100644 --- a/src/hotspot/share/opto/loopopts.cpp +++ b/src/hotspot/share/opto/loopopts.cpp @@ -2373,7 +2373,7 @@ void PhaseIdealLoop::replace_input_with_new_clone(Node* use, Node* old_in, } if (make_change && use->_idx >= new_counter) { Node* hit = _igvn.hash_find_insert(use); - if (hit) + if (hit != nullptr) _igvn.replace_node(use, hit); } } @@ -2387,7 +2387,7 @@ void PhaseIdealLoop::replace_input_with_new_clone(Node* use, Node* old_in, // | | \ / // IfTrue | -----> PhiNode // | v | | -// loop end ------ addI('pre_incr') +// loop end ------ addI('pre_exit_value') // | / // IfFalse / // | / @@ -2395,22 +2395,22 @@ void PhaseIdealLoop::replace_input_with_new_clone(Node* use, Node* old_in, // main zero-trip guard // / \ // IfFalse IfTrue -// / | \_____________________________ -// / | \ -// | ---> main loop head ----> vectorized drain loop head -// | | | \ 'pre_incr' | | \ 'pre_incr' -// | | | \ / | | \ / -// | IfTrue | -----> PhiNode IfTrue | -----> PhiNode -// | | v | | | v | | -// | loop end ------ addI loop end ----- addI('drain_incr') -// | | | | -// \ IfFalse | IfFalse -// \ | | / -// \ / 'pre_incr' | / -// 'main_merge_region' | | / -// \ \ | | / -// \ 'main_merge_phi' / -// \ / +// / | \________________________________ +// / | \ +// | ---> main loop head ----> vectorized drain loop head +// | | | \'pre_exit_value' | | \ 'pre_exit_value' +// | | | \ / | | \ / +// | IfTrue | -----> PhiNode IfTrue | -----> PhiNode +// | | v | | | v | | +// | loop end -------- addI loop end ----- addI('drain_exit_value') +// | | | | +// \ IfFalse | IfFalse +// \ | | / +// \ / 'pre_exit_value' | / +// 'main_merge_region' | | / +// \ \ | | / +// \ 'main_merge_phi' / +// \ / // RegionNode('drain_merge_region') // | // | 'main_merge_phi' @@ -2428,15 +2428,15 @@ void PhaseIdealLoop::replace_input_with_new_clone(Node* use, Node* old_in, // ... // // The increment feeding the trip-counter phi in the new loop body -// is 'drain_incr', which is dangling. We create a new 'drain_merge_phi' -// node which will merge 'drain_incr' and 'main_merge_phi' and take +// is 'drain_exit_value', which is dangling. We create a new 'drain_merge_phi' +// node which will merge 'drain_exit_value' and 'main_merge_phi' and take // the RegionNode, 'new_prev', as the control input which was created in // fix_ctrl_uses_for_vectorized_drain(). This new node 'drain_merge_phi' // will replace all other uses of 'main_merge_phi'. // The data uses will become: // (new edges are marked with "*/*" or "*\*".) -// pre loop exit 'pre_incr' +// pre loop exit 'pre_exit_value' // | / // main zero-trip guard // / \ @@ -2444,17 +2444,18 @@ void PhaseIdealLoop::replace_input_with_new_clone(Node* use, Node* old_in, // / | \______________________________ // / | \ // | ---> main loop head ----> vectorized drain loop head -// | | | \ 'pre_incr' | | \ 'pre_incr' +// | | | \ 'pre_exit_value' | | \ 'pre_exit_value' // | | | \ / | | \ / // | IfTrue | ----> PhiNode IfTrue | ------> PhiNode // | | v | | | v | | -// | loop end ------ addI('main_old') loop end -------- addI('drain_incr') -// | | | | */* -// \ IfFalse | IfFalse('drain_exit') */* -// \ | | */* -// \ | 'pre_incr' | */* -// 'main_merge_region' | | */* -// \ \ | | */* +// | loop end ------ addI loop end -------- addI('drain_exit_value') +// | | ('main_exit_value') | */* +// | | | | */* +// \ IfFalse | IfFalse('drain_exit') */* +// \ | | */* +// \ | 'pre_exit_value' | */* +// 'main_merge_region' | | */* +// \ \ | | */* // \ PhiNode('main_merge_phi') */* // \ *|* */* // \ 'drain_exit' *|* */* @@ -2474,24 +2475,27 @@ void PhaseIdealLoop::replace_input_with_new_clone(Node* use, Node* old_in, // loop end ------ addI // | // ... -void PhaseIdealLoop::fix_data_uses_for_vectorized_drain(Node* main_old, Node_List &old_new, +void PhaseIdealLoop::fix_data_uses_for_vectorized_drain(Node* main_exit_value, Node_List& old_new, IdealLoopTree* loop, IdealLoopTree* outer_loop, Node_List& worklist, uint new_counter) { + assert(worklist.size() == 0, "worklist should be empty before we reach this point."); - for (DUIterator_Fast jmax, j = main_old->fast_outs(jmax); j < jmax; j++) { - worklist.push(main_old->fast_out(j)); + for (DUIterator_Fast jmax, j = main_exit_value->fast_outs(jmax); j < jmax; j++) { + worklist.push(main_exit_value->fast_out(j)); } Node_List visit_list; Node_List phi_list; + // Walk all non-loop-local uses of "main_exit_value" to clone exit-path data-flow and rebuild the + // corresponding `drain_merge_phi`s. while (worklist.size() != 0) { Node* use = worklist.pop(); if (!has_node(use)) continue; // Ignore dead nodes if (use->in(0) == C->top()) continue; IdealLoopTree* ctrl_or_self = get_loop(has_ctrl(use) ? get_ctrl(use) : use); if (!loop->is_member(ctrl_or_self) && !outer_loop->is_member(ctrl_or_self) && - (!main_old->is_CFG() || !use->is_CFG())) { + (!main_exit_value->is_CFG() || !use->is_CFG())) { // Find the phi node merging the data from pre-loop and vector main-loop. visit_list.clear(); @@ -2545,12 +2549,12 @@ void PhaseIdealLoop::fix_data_uses_for_vectorized_drain(Node* main_old, Node_Lis } } - // 'drain_incr' is now dangling. In the following while loop, for each + // 'drain_exit_value' is now dangling. In the following while loop, for each // 'main_merge_phi', we create a corresponding 'drain_merge_phi', // as illustrated below: // - // main_merge_phi = Phi(pre_incr, main_old) - // drain_merge_phi = Phi(drain_incr, main_merge_phi) + // main_merge_phi = Phi(pre_exit_value, main_exit_value) + // drain_merge_phi = Phi(drain_exit_value, main_merge_phi) // // The 'drain_merge_phi' takes the RegionNode 'drain_merge_region' as its // control input. This newly created 'drain_merge_phi' replaces all other @@ -2585,8 +2589,8 @@ void PhaseIdealLoop::fix_data_uses_for_vectorized_drain(Node* main_old, Node_Lis Node* drain_merge_region = old_new[main_merge_region->_idx]; assert(drain_merge_region != nullptr, "just made this in step 3"); // Make a new Phi merging data values properly - Node* drain_incr = old_new[last_in->_idx]; - Node* drain_merge_phi = PhiNode::make(drain_merge_region, drain_incr); + Node* drain_exit_value = old_new[last_in->_idx]; + Node* drain_merge_phi = PhiNode::make(drain_merge_region, drain_exit_value); drain_merge_phi->set_req(2, main_merge_phi); // If inserting a new Phi, check for prior hits Node* hit = _igvn.hash_find_insert(drain_merge_phi); diff --git a/src/hotspot/share/opto/predicates.hpp b/src/hotspot/share/opto/predicates.hpp index cd0832cc062..dce73884bd0 100644 --- a/src/hotspot/share/opto/predicates.hpp +++ b/src/hotspot/share/opto/predicates.hpp @@ -1080,8 +1080,8 @@ class NodeInOriginalLoopBody : public NodeInLoopBody { }; // This class checks whether a node is in the main loop body and not the pre loop body. We cannot use the -// NodeInOriginalLoopBody class because PhaseIdealLoop::clone_up_backedge_goo() could clone additional nodes that -// should be pinned at the main loop body entry. The check in NodeInOriginalLoopBody will ignore these. +// NodeInOriginalLoopBody class because PhaseIdealLoop::resolve_value_for_preheader() could clone additional nodes +// that should be pinned at the main loop body entry. The check in NodeInOriginalLoopBody will ignore these. class NodeInMainLoopBody : public NodeInLoopBody { const uint _first_node_index_in_pre_loop_body; const uint _last_node_index_in_pre_loop_body; @@ -1100,7 +1100,7 @@ class NodeInMainLoopBody : public NodeInLoopBody { // Check if 'node' is not a cloned node (i.e. "< _first_node_index_in_cloned_loop_body") and if we've created a // clone from 'node' (i.e. _old_new entry is non-null). Then we know that 'node' belongs to the original loop body. // Additionally check if a node was cloned after the pre loop was created. This indicates that it was created by - // PhaseIdealLoop::clone_up_backedge_goo(). These nodes should also be pinned at the main loop entry. + // PhaseIdealLoop::resolve_value_for_preheader(). These nodes should also be pinned at the main loop entry. bool check_node_in_loop_body(Node* node) const override { if (node->_idx < _first_node_index_in_pre_loop_body) { Node* cloned_node = _old_new[node->_idx]; @@ -1110,10 +1110,10 @@ class NodeInMainLoopBody : public NodeInLoopBody { "clone must be part of pre loop body"); return cloned_node_in_pre_loop_body; } - // Created in PhaseIdealLoop::clone_up_backedge_goo()? + // Created in PhaseIdealLoop::resolve_value_for_preheader()? bool node_created_by_backedge_goo = node->_idx > _last_node_index_in_pre_loop_body; assert(!node_created_by_backedge_goo || node->_idx <= _last_node_index_from_backedge_goo, - "cloned node must have been created in PhaseIdealLoop::clone_up_backedge_goo()"); + "cloned node must have been created in PhaseIdealLoop::resolve_value_for_preheader()"); return node_created_by_backedge_goo; } }; diff --git a/test/hotspot/jtreg/compiler/predicates/assertion/TestLoadPinnedAboveAssertionPredicatesAndUsingStore.java b/test/hotspot/jtreg/compiler/predicates/assertion/TestLoadPinnedAboveAssertionPredicatesAndUsingStore.java index 0eb5c27cd82..80cc7f1b564 100644 --- a/test/hotspot/jtreg/compiler/predicates/assertion/TestLoadPinnedAboveAssertionPredicatesAndUsingStore.java +++ b/test/hotspot/jtreg/compiler/predicates/assertion/TestLoadPinnedAboveAssertionPredicatesAndUsingStore.java @@ -25,8 +25,8 @@ /* * @test * @bug 8347018 - * @summary Test that stores cloned with clone_up_backedge_goo() are not pinned above Assertion Predicates on which a - * load node is pinned at which will later fail in scheduling. + * @summary Test that stores cloned with resolve_value_for_preheader() are not pinned above Assertion + * Predicates on which a load node is pinned at which will later fail in scheduling. * @run main/othervm -Xbatch -XX:CompileCommand=compileonly,*TestLoadPinnedAboveAssertionPredicatesAndUsingStore::test * compiler.predicates.assertion.TestLoadPinnedAboveAssertionPredicatesAndUsingStore */ diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorThroughputForIterationCount.java b/test/micro/org/openjdk/bench/vm/compiler/VectorThroughputForIterationCount.java index a3cc14cd062..062ba1d58df 100644 --- a/test/micro/org/openjdk/bench/vm/compiler/VectorThroughputForIterationCount.java +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorThroughputForIterationCount.java @@ -128,10 +128,33 @@ public abstract class VectorThroughputForIterationCount { private int seed; private Random r = new Random(seed); - // Enable a warm-up phase with a large iteration count to force - // auto-vectorization and unrolling. Without this, C2 may optimize - // away small fixed-trip loops based on profiling, making the - // effects of this patch unobservable. + // When enabled, run an additional warm-up phase using a large loop iteration + // count to encourage C2 to generate vectorized and unrolled loop bodies. + // + // Rationale: + // Some benchmarks in this suite use small, fixed trip-count loops. During + // early profiling, C2 may treat such loops as trivial, avoid vectorization, + // or optimize them away entirely. In those cases, changes that affect loop + // vectorization behavior, such as the improvement introduced by JDK-8307084, + // may not be observable in the generated code. + // + // As a result, this benchmark suite contains two main classes of + // microbenchmarks: + // 1) bench_xx_computeBound / bench_xx_memoryBound + // These measure the performance of C2-generated code for the given + // workload without relying on a special warm-up phase. + // 2) bench03xx_staticTripCount / bench03xx_dynamicTripCount + // These benchmarks are sensitive to early profiling. Enabling a + // large-loop warm-up forces the optimizer to observe the loop at scale, + // making vectorized code generation more likely and allowing such + // effects to be measured. + // + // Usage guidance: + // - Enable for microbenchmarks that rely on observing vectorization or + // unrolling effects, especially when loop trip counts are small or + // constant (e.g., bench03xx_staticTripCount and bench03xx_dynamicTripCount, + // introduced by JDK-8307084). + // - Disable for general regression testing and for other microbenchmarks. @Param({"true", "false"}) public static boolean ENABLE_LARGE_LOOP_WARMUP; @@ -270,12 +293,12 @@ public abstract class VectorThroughputForIterationCount { } @Benchmark - public void bench031B_drain_memoryBound() { + public void bench031B_staticTripCount() { byteadd(aB, bB, rB, START_IDX, ITERATION_COUNT); } @Benchmark - public void bench031B_drain_dynamic() { + public void bench031B_dynamicTripCount() { for (int r = 0; r < REPETITIONS; r++) { byteadd(aB, bB, rB, START_IDX, offsets[r]+ITERATION_COUNT); } @@ -316,12 +339,12 @@ public abstract class VectorThroughputForIterationCount { // } @Benchmark - public void bench032S_drain_memoryBound() { + public void bench032S_staticTripCount() { shortadd(aS, bS, rS, START_IDX, ITERATION_COUNT); } @Benchmark - public void bench032S_drain_dynamic() { + public void bench032S_dynamicTripCount() { for (int r = 0; r < REPETITIONS; r++) { shortadd(aS, bS, rS, START_IDX, offsets[r]+ITERATION_COUNT); } @@ -397,12 +420,12 @@ public abstract class VectorThroughputForIterationCount { } @Benchmark - public void bench034I_drain_memoryBound() { + public void bench034I_staticTripCount() { intadd(aI, bI, rI, START_IDX, ITERATION_COUNT); } @Benchmark - public void bench034I_drain_dynamic() { + public void bench034I_dynamicTripCount() { for (int r = 0; r < REPETITIONS; r++) { intadd(aI, bI, rI, START_IDX, offsets[r]+ITERATION_COUNT); } @@ -443,12 +466,12 @@ public abstract class VectorThroughputForIterationCount { } @Benchmark - public void bench035L_drain_memoryBound() { + public void bench035L_staticTripCount() { longadd(aL, bL, rL, START_IDX, ITERATION_COUNT); } @Benchmark - public void bench035L_drain_dynamic() { + public void bench035L_dynamicTripCount() { for (int r = 0; r < REPETITIONS; r++) { longadd(aL, bL, rL, START_IDX, offsets[r]+ITERATION_COUNT); }