mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8350756: C2 SuperWord Multiversioning: remove useless slow loop when the fast loop disappears
Reviewed-by: kvn, chagedorn
This commit is contained in:
parent
3626ac35b3
commit
e82031ec1a
@ -41,6 +41,7 @@
|
||||
#include "opto/movenode.hpp"
|
||||
#include "opto/mulnode.hpp"
|
||||
#include "opto/opaquenode.hpp"
|
||||
#include "opto/opcodes.hpp"
|
||||
#include "opto/predicates.hpp"
|
||||
#include "opto/rootnode.hpp"
|
||||
#include "opto/runtime.hpp"
|
||||
@ -2731,6 +2732,20 @@ Node* CountedLoopNode::match_incr_with_optional_truncation(Node* expr, Node** tr
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
IfNode* CountedLoopNode::find_multiversion_if_from_multiversion_fast_main_loop() {
|
||||
assert(is_main_loop() && is_multiversion_fast_loop(), "must be multiversion fast main loop");
|
||||
CountedLoopEndNode* pre_end = find_pre_loop_end();
|
||||
if (pre_end == nullptr) { return nullptr; }
|
||||
Node* pre_entry = pre_end->loopnode()->in(LoopNode::EntryControl);
|
||||
const Predicates predicates(pre_entry);
|
||||
IfTrueNode* before_predicates = predicates.entry()->isa_IfTrue();
|
||||
if (before_predicates != nullptr &&
|
||||
before_predicates->in(0)->in(1)->is_OpaqueMultiversioning()) {
|
||||
return before_predicates->in(0)->as_If();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LoopNode* CountedLoopNode::skip_strip_mined(int expect_skeleton) {
|
||||
if (is_strip_mined() && in(EntryControl) != nullptr && in(EntryControl)->is_OuterStripMinedLoop()) {
|
||||
verify_strip_mined(expect_skeleton);
|
||||
@ -4536,6 +4551,49 @@ void PhaseIdealLoop::eliminate_useless_zero_trip_guard() {
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseIdealLoop::eliminate_useless_multiversion_if() {
|
||||
if (_multiversion_opaque_nodes.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ResourceMark rm;
|
||||
Unique_Node_List useful_multiversioning_opaque_nodes;
|
||||
|
||||
// The OpaqueMultiversioning is only used from the fast main loop in AutoVectorization, to add
|
||||
// speculative runtime-checks to the multiversion_if. Thus, a OpaqueMultiversioning is only
|
||||
// useful if it can be found from a fast main loop. If it can not be found from a fast main loop,
|
||||
// then we cannot ever use that multiversion_if to add more speculative runtime-checks, and hence
|
||||
// it is useless. If it is still in delayed mode, i.e. has not yet had any runtime-checks added,
|
||||
// then we can let it constant fold towards the fast loop.
|
||||
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
|
||||
IdealLoopTree* lpt = iter.current();
|
||||
if (lpt->_child == nullptr && lpt->is_counted()) {
|
||||
CountedLoopNode* head = lpt->_head->as_CountedLoop();
|
||||
if (head->is_main_loop() && head->is_multiversion_fast_loop()) {
|
||||
// There are fast_loop pre/main/post loops, but the finding traversal starts at the main
|
||||
// loop, and traverses via the fast pre loop to the multiversion_if.
|
||||
IfNode* multiversion_if = head->find_multiversion_if_from_multiversion_fast_main_loop();
|
||||
if (multiversion_if != nullptr) {
|
||||
useful_multiversioning_opaque_nodes.push(multiversion_if->in(1)->as_OpaqueMultiversioning());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint i = 0; i < _multiversion_opaque_nodes.size(); i++) {
|
||||
OpaqueMultiversioningNode* opaque = _multiversion_opaque_nodes.at(i)->as_OpaqueMultiversioning();
|
||||
if (!useful_multiversioning_opaque_nodes.member(opaque)) {
|
||||
if (opaque->is_delayed_slow_loop()) {
|
||||
// We cannot hack the node directly, otherwise the slow_loop will complain that it cannot
|
||||
// find the multiversioning opaque node. Instead, we mark the opaque node as useless, and
|
||||
// it can be constant folded during IGVN.
|
||||
opaque->mark_useless();
|
||||
_igvn._worklist.push(opaque);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------process_expensive_nodes-----------------------------
|
||||
// Expensive nodes have their control input set to prevent the GVN
|
||||
// from commoning them and as a result forcing the resulting node to
|
||||
@ -4805,6 +4863,7 @@ void PhaseIdealLoop::build_and_optimize() {
|
||||
}
|
||||
|
||||
eliminate_useless_zero_trip_guard();
|
||||
eliminate_useless_multiversion_if();
|
||||
|
||||
if (stop_early) {
|
||||
assert(do_expensive_nodes, "why are we here?");
|
||||
@ -6596,6 +6655,9 @@ void PhaseIdealLoop::build_loop_late_post_work(Node *n, bool pinned) {
|
||||
_zero_trip_guard_opaque_nodes.push(n);
|
||||
}
|
||||
|
||||
if (!_verify_only && n->Opcode() == Op_OpaqueMultiversioning) {
|
||||
_multiversion_opaque_nodes.push(n);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
|
||||
@ -290,6 +290,8 @@ public:
|
||||
bool has_atomic_post_loop () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
|
||||
void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
|
||||
|
||||
IfNode* find_multiversion_if_from_multiversion_fast_main_loop();
|
||||
|
||||
int main_idx() const { return _main_idx; }
|
||||
|
||||
|
||||
@ -932,6 +934,7 @@ private:
|
||||
// clear out dead code after build_loop_late
|
||||
Node_List _deadlist;
|
||||
Node_List _zero_trip_guard_opaque_nodes;
|
||||
Node_List _multiversion_opaque_nodes;
|
||||
|
||||
// Support for faster execution of get_late_ctrl()/dom_lca()
|
||||
// when a node has many uses and dominator depth is deep.
|
||||
@ -1453,6 +1456,7 @@ public:
|
||||
void eliminate_useless_template_assertion_predicates(Unique_Node_List& useful_predicates);
|
||||
|
||||
void eliminate_useless_zero_trip_guard();
|
||||
void eliminate_useless_multiversion_if();
|
||||
|
||||
public:
|
||||
// Change the control input of expensive nodes to allow commoning by
|
||||
|
||||
@ -791,7 +791,10 @@ Node *PhaseIdealLoop::conditional_move( Node *region ) {
|
||||
// Ignore Template Assertion Predicates with OpaqueTemplateAssertionPredicate nodes.
|
||||
return nullptr;
|
||||
}
|
||||
assert(bol->Opcode() == Op_Bool, "Unexpected node");
|
||||
if (!bol->is_Bool()) {
|
||||
assert(false, "Expected Bool, but got %s", NodeClassNames[bol->Opcode()]);
|
||||
return nullptr;
|
||||
}
|
||||
int cmp_op = bol->in(1)->Opcode();
|
||||
if (cmp_op == Op_SubTypeCheck) { // SubTypeCheck expansion expects an IfNode
|
||||
return nullptr;
|
||||
|
||||
@ -82,6 +82,26 @@ IfNode* OpaqueZeroTripGuardNode::if_node() const {
|
||||
return iff->as_If();
|
||||
}
|
||||
|
||||
Node* OpaqueMultiversioningNode::Identity(PhaseGVN* phase) {
|
||||
// Constant fold the multiversion_if. Since the slow_loop is still delayed,
|
||||
// i.e. we have not yet added any possibly failing condition, we can just
|
||||
// take the true branch in all cases.
|
||||
if (_useless) {
|
||||
assert(_is_delayed_slow_loop, "the slow_loop should still be delayed");
|
||||
return in(1);
|
||||
}
|
||||
return Opaque1Node::Identity(phase);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void OpaqueMultiversioningNode::dump_spec(outputStream *st) const {
|
||||
Opaque1Node::dump_spec(st);
|
||||
if (_useless) {
|
||||
st->print(" #useless");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const Type* OpaqueNotNullNode::Value(PhaseGVN* phase) const {
|
||||
return phase->type(in(1));
|
||||
}
|
||||
|
||||
@ -101,17 +101,30 @@ public:
|
||||
class OpaqueMultiversioningNode : public Opaque1Node {
|
||||
private:
|
||||
bool _is_delayed_slow_loop;
|
||||
bool _useless;
|
||||
|
||||
public:
|
||||
OpaqueMultiversioningNode(Compile* C, Node* n) :
|
||||
Opaque1Node(C, n), _is_delayed_slow_loop(true)
|
||||
Opaque1Node(C, n), _is_delayed_slow_loop(true), _useless(false)
|
||||
{
|
||||
init_class_id(Class_OpaqueMultiversioning);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
virtual const Type* bottom_type() const { return TypeInt::BOOL; }
|
||||
bool is_delayed_slow_loop() const { return _is_delayed_slow_loop; }
|
||||
void notify_slow_loop_that_it_can_resume_optimizations() { _is_delayed_slow_loop = false; }
|
||||
|
||||
void notify_slow_loop_that_it_can_resume_optimizations() {
|
||||
assert(!_useless, "must still be useful");
|
||||
_is_delayed_slow_loop = false;
|
||||
}
|
||||
|
||||
void mark_useless() {
|
||||
assert(_is_delayed_slow_loop, "must still be delayed");
|
||||
_useless = true;
|
||||
}
|
||||
|
||||
virtual Node* Identity(PhaseGVN* phase);
|
||||
NOT_PRODUCT(virtual void dump_spec(outputStream* st) const;)
|
||||
};
|
||||
|
||||
// This node is used in the context of intrinsics. We sometimes implicitly know that an object is non-null even though
|
||||
|
||||
@ -329,6 +329,11 @@ public class IRNode {
|
||||
superWordNodes(ADD_REDUCTION_VL, "AddReductionVL");
|
||||
}
|
||||
|
||||
public static final String OPAQUE_MULTIVERSIONING = PREFIX + "OPAQUE_MULTIVERSIONING" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(OPAQUE_MULTIVERSIONING, "OpaqueMultiversioning");
|
||||
}
|
||||
|
||||
public static final String ADD_P_OF = COMPOSITE_PREFIX + "ADD_P_OF" + POSTFIX;
|
||||
static {
|
||||
String regex = START + "addP_" + IS_REPLACED + MID + ".*" + END;
|
||||
|
||||
@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.loopopts.superword;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8350756
|
||||
* @summary Test case where the multiversion fast_loop disappears, and we should
|
||||
* constant fold the multiversion_if, to remove the slow_loop.
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.loopopts.superword.TestMultiversionRemoveUselessSlowLoop
|
||||
*/
|
||||
|
||||
public class TestMultiversionRemoveUselessSlowLoop {
|
||||
|
||||
public static void main(String[] args) {
|
||||
TestFramework framework = new TestFramework(TestMultiversionRemoveUselessSlowLoop.class);
|
||||
// No traps means we cannot use the predicates version for SuperWord / AutoVectorization,
|
||||
// and instead use multiversioning directly.
|
||||
framework.addFlags("-XX:-TieredCompilation", "-XX:PerMethodTrapLimit=0");
|
||||
framework.setDefaultWarmup(0); // simulates Xcomp
|
||||
framework.start();
|
||||
}
|
||||
|
||||
public static final int SIZE = 20;
|
||||
public static final int[] a = new int[SIZE];
|
||||
public static final int[] b = new int[SIZE];
|
||||
public static final int SIZE2 = 10_000;
|
||||
public static final int[] a2 = new int[SIZE2];
|
||||
public static final int[] b2 = new int[SIZE2];
|
||||
|
||||
@Test
|
||||
@IR(counts = {"pre .* multiversion_fast", "= 2", // regular pre-main-post for both loops
|
||||
"main .* multiversion_fast", "= 2",
|
||||
"post .* multiversion_fast", "= 2",
|
||||
"multiversion_delayed_slow", "= 2", // both have the delayed slow_loop
|
||||
"multiversion", "= 8", // nothing unexpected
|
||||
IRNode.OPAQUE_MULTIVERSIONING, "= 2"}, // Both multiversion_if are still here
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||||
phase = CompilePhase.PHASEIDEALLOOP1)
|
||||
@IR(counts = {"pre .* multiversion_fast", "= 2",
|
||||
"main .* multiversion_fast", "= 1", // The first main loop is fully unrolled
|
||||
"post .* multiversion_fast", "= 3", // the second loop is vectorized, and has a vectorized post loop
|
||||
"multiversion_delayed_slow", "= 1", // As a consequence of the first main loop being removed, we constant fold the multiversion_if
|
||||
"multiversion", "= 7", // nothing unexpected
|
||||
IRNode.OPAQUE_MULTIVERSIONING, "= 1"}, // The multiversion_if of the first loop was constant folded, because the main loop disappeared.
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||||
phase = CompilePhase.PHASEIDEALLOOP_ITERATIONS)
|
||||
@IR(counts = {"pre .* multiversion_fast.*", ">= 1", // In some cases, the pre loop of the first loop also disappears because it only has a single iteration
|
||||
"pre .* multiversion_fast.*", "<= 2", // but not in other cases the pre loop of the first loop remains.
|
||||
"main .* multiversion_fast", "= 1",
|
||||
"post .* multiversion_fast", "= 3",
|
||||
"multiversion_delayed_slow", "= 0", // The second loop's multiversion_if was also not used, so it is constant folded after loop opts.
|
||||
"multiversion", ">= 5", // nothing unexpected
|
||||
"multiversion", "<= 6", // nothing unexpected
|
||||
IRNode.OPAQUE_MULTIVERSIONING, "= 0"}, // After loop-opts, we also constant fold the multiversion_if of the second loop, as it is unused.
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
|
||||
phase = CompilePhase.PRINT_IDEAL)
|
||||
public static void testIR() {
|
||||
// This loop is short, and the multiversion_fast main loop eventuall is fully unrolled.
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
a[i] = b[i];
|
||||
}
|
||||
// We take this second loop with a larger limit so that loop opts keeps going once the loop
|
||||
// above is fully optimized. It also gives us a reference where the main loop of the
|
||||
// multiverion fast_loop does not disappear.
|
||||
for (int i = 0; i < SIZE2; i++) {
|
||||
a2[i] = b2[i];
|
||||
}
|
||||
}
|
||||
|
||||
static long instanceCount;
|
||||
static int iFld;
|
||||
static int iFld1;
|
||||
|
||||
// The inner loop is Multiversioned, then PreMainPost and Unroll.
|
||||
// Eventually, both the fast and slow loops (pre main and post) disappear,
|
||||
// and leave us with a simple if-diamond using the multiversion_if.
|
||||
//
|
||||
// Verification code in PhaseIdealLoop::conditional_move finds this diamond
|
||||
// and expects a Bool but gets an OpaqueMultiversioning instead.
|
||||
//
|
||||
// If we let the multiversion_if constant fold soon after the main fast loop
|
||||
// disappears, then this issue does not occur any more.
|
||||
@Test
|
||||
public static void testCrash() {
|
||||
boolean b2 = true;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
for (int i21 = 82; i21 > 9; --i21) {
|
||||
if (b2)
|
||||
break;
|
||||
iFld1 = iFld;
|
||||
b2 = true;
|
||||
}
|
||||
instanceCount = iFld1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user