diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index d68135f694a..adff221d505 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -1042,7 +1042,7 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { } // Only attempt slp analysis when user controls do not prohibit it - if (!cl->range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) { + if (!range_checks_present() && (LoopMaxUnroll > _local_loop_unroll_factor)) { // Once policy_slp_analysis succeeds, mark the loop with the // maximal unroll factor so that we minimize analysis passes if (future_unroll_cnt >= _local_loop_unroll_factor) { @@ -1916,7 +1916,7 @@ void PhaseIdealLoop::insert_scalar_rced_post_loop(IdealLoopTree *loop, Node_List CountedLoopNode *cl = loop->_head->as_CountedLoop(); // only process RCE'd main loops - if (!cl->is_main_loop() || cl->range_checks_present()) return; + if (!cl->is_main_loop() || loop->range_checks_present()) return; #ifndef PRODUCT if (TraceLoopOpts) { @@ -3003,7 +3003,7 @@ Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoop //------------------------------do_range_check--------------------------------- // Eliminate range-checks and other trip-counter vs loop-invariant tests. -int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { +void PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { #ifndef PRODUCT if (PrintOpto && VerifyLoopOptimizations) { tty->print("Range Check Elimination "); @@ -3016,12 +3016,10 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { assert(RangeCheckElimination, ""); CountedLoopNode *cl = loop->_head->as_CountedLoop(); - // If we fail before trying to eliminate range checks, set multiversion state - int closed_range_checks = 1; // protect against stride not being a constant if (!cl->stride_is_con()) { - return closed_range_checks; + return; } // Find the trip counter; we are iteration splitting based on it Node *trip_counter = cl->phi(); @@ -3033,7 +3031,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { // Opaque1 node is optimized away and then another round // of loop opts attempted. if (cl->is_canonical_loop_entry() == NULL) { - return closed_range_checks; + return; } // Need to find the main-loop zero-trip guard @@ -3047,7 +3045,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { Node *p_f = iffm->in(0); // pre loop may have been optimized out if (p_f->Opcode() != Op_IfFalse) { - return closed_range_checks; + return; } CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); assert(pre_end->loopnode()->is_pre_loop(), ""); @@ -3056,7 +3054,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { // optimized away and then another round of loop opts attempted. // We can not optimize this particular loop in that case. if (pre_opaq1->Opcode() != Op_Opaque1) { - return closed_range_checks; + return; } Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; Node *pre_limit = pre_opaq->in(1); @@ -3068,7 +3066,7 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { // pre-loop Opaque1 node. Node *orig_limit = pre_opaq->original_loop_limit(); if (orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP) { - return closed_range_checks; + return; } // Must know if its a count-up or count-down loop @@ -3081,10 +3079,6 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { set_ctrl(one, C->root()); set_ctrl(mini, C->root()); - // Count number of range checks and reduce by load range limits, if zero, - // the loop is in canonical form to multiversion. - closed_range_checks = 0; - Node* predicate_proj = cl->skip_strip_mined()->in(LoopNode::EntryControl); assert(predicate_proj->is_Proj() && predicate_proj->in(0)->is_If(), "if projection only"); @@ -3095,7 +3089,6 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { iff->Opcode() == Op_RangeCheck) { // Test? // Test is an IfNode, has 2 projections. If BOTH are in the loop // we need loop unswitching instead of iteration splitting. - closed_range_checks++; Node *exit = loop->is_loop_exit(iff); if (!exit) continue; int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0; @@ -3264,9 +3257,6 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { --imax; } } - if (int_limit->Opcode() == Op_LoadRange) { - closed_range_checks--; - } } // End of is IF } if (predicate_proj != cl->skip_strip_mined()->in(LoopNode::EntryControl)) { @@ -3316,32 +3306,19 @@ int PhaseIdealLoop::do_range_check(IdealLoopTree *loop, Node_List &old_new) { assert(opqzm->outcnt() == 1, "cannot hack shared node"); _igvn.replace_input_of(opqzm, 1, main_limit); - return closed_range_checks; + return; } -//------------------------------has_range_checks------------------------------- -// Check to see if RCE cleaned the current loop of range-checks. -void PhaseIdealLoop::has_range_checks(IdealLoopTree *loop) { - assert(RangeCheckElimination, ""); - - // skip if not a counted loop - if (!loop->is_counted()) return; - - CountedLoopNode *cl = loop->_head->as_CountedLoop(); - - // skip this loop if it is already checked - if (cl->has_been_range_checked()) return; - - // Now check for existence of range checks - for (uint i = 0; i < loop->_body.size(); i++) { - Node *iff = loop->_body[i]; +bool IdealLoopTree::compute_has_range_checks() const { + assert(_head->is_CountedLoop(), ""); + for (uint i = 0; i < _body.size(); i++) { + Node *iff = _body[i]; int iff_opc = iff->Opcode(); if (iff_opc == Op_If || iff_opc == Op_RangeCheck) { - cl->mark_has_range_checks(); - break; + return true; } } - cl->set_has_been_range_checked(); + return false; } //-------------------------multi_version_post_loops---------------------------- @@ -4007,13 +3984,7 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n // with full checks, but the main-loop with no checks. Remove said checks // from the main body. if (should_rce) { - if (phase->do_range_check(this, old_new) != 0) { - cl->mark_has_range_checks(); - } else { - cl->clear_has_range_checks(); - } - } else if (PostLoopMultiversioning) { - phase->has_range_checks(this); + phase->do_range_check(this, old_new); } if (should_unroll && !should_peel && PostLoopMultiversioning && diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp index d2444363c52..9c1ea36818d 100644 --- a/src/hotspot/share/opto/loopnode.cpp +++ b/src/hotspot/share/opto/loopnode.cpp @@ -3942,7 +3942,7 @@ uint IdealLoopTree::est_loop_flow_merge_sz() const { #ifndef PRODUCT //------------------------------dump_head-------------------------------------- // Dump 1 liner for loop header info -void IdealLoopTree::dump_head() const { +void IdealLoopTree::dump_head() { tty->sp(2 * _nest); tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx); if (_irreducible) tty->print(" IRREDUCIBLE"); @@ -3990,7 +3990,7 @@ void IdealLoopTree::dump_head() const { if (cl->is_post_loop()) tty->print(" post"); if (cl->is_reduction_loop()) tty->print(" reduction"); if (cl->is_vectorized_loop()) tty->print(" vector"); - if (cl->range_checks_present()) tty->print(" rc "); + if (range_checks_present()) tty->print(" rc "); if (cl->is_multiversioned()) tty->print(" multi "); } if (_has_call) tty->print(" has_call"); @@ -4013,7 +4013,7 @@ void IdealLoopTree::dump_head() const { //------------------------------dump------------------------------------------- // Dump loops by loop tree -void IdealLoopTree::dump() const { +void IdealLoopTree::dump() { dump_head(); if (_child) _child->dump(); if (_next) _next ->dump(); @@ -4600,8 +4600,7 @@ void PhaseIdealLoop::build_and_optimize() { IdealLoopTree *lpt_next = lpt->_next; if (lpt_next && lpt_next->is_counted()) { CountedLoopNode *cl = lpt_next->_head->as_CountedLoop(); - has_range_checks(lpt_next); - if (cl->is_post_loop() && cl->range_checks_present()) { + if (cl->is_post_loop() && lpt_next->range_checks_present()) { if (!cl->is_multiversioned()) { if (multi_version_post_loops(lpt, lpt_next) == false) { // Cause the rce loop to be optimized away if we fail diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index 086b7c79514..b998c5c461d 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -72,17 +72,16 @@ protected: DoUnrollOnly = 1<<10, VectorizedLoop = 1<<11, HasAtomicPostLoop = 1<<12, - HasRangeChecks = 1<<13, - IsMultiversioned = 1<<14, - StripMined = 1<<15, - SubwordLoop = 1<<16, - ProfileTripFailed = 1<<17, - LoopNestInnerLoop = 1 << 18, - LoopNestLongOuterLoop = 1 << 19}; + IsMultiversioned = 1<<13, + StripMined = 1<<14, + SubwordLoop = 1<<15, + ProfileTripFailed = 1<<16, + LoopNestInnerLoop = 1 << 17, + LoopNestLongOuterLoop = 1 << 18}; char _unswitch_count; enum { _unswitch_max=3 }; char _postloop_flags; - enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 }; + enum { RCEPostLoop = 1 }; // Expected trip count from profile data float _profile_trip_cnt; @@ -94,7 +93,6 @@ public: bool is_inner_loop() const { return _loop_flags & InnerLoop; } void set_inner_loop() { _loop_flags |= InnerLoop; } - bool range_checks_present() const { return _loop_flags & HasRangeChecks; } bool is_multiversioned() const { return _loop_flags & IsMultiversioned; } bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; } bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; } @@ -113,8 +111,6 @@ public: void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; } void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; } void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; } - void mark_has_range_checks() { _loop_flags |= HasRangeChecks; } - void clear_has_range_checks() { _loop_flags &= ~HasRangeChecks; } void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; } void mark_strip_mined() { _loop_flags |= StripMined; } void clear_strip_mined() { _loop_flags &= ~StripMined; } @@ -126,8 +122,6 @@ public: int unswitch_max() { return _unswitch_max; } int unswitch_count() { return _unswitch_count; } - int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; } - void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; } int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; } void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; } @@ -621,7 +615,9 @@ public: uint8_t _irreducible:1, // True if irreducible _has_call:1, // True if has call safepoint _has_sfpt:1, // True if has non-call safepoint - _rce_candidate:1; // True if candidate for range check elimination + _rce_candidate:1, // True if candidate for range check elimination + _has_range_checks:1, + _has_range_checks_computed:1; Node_List* _safepts; // List of safepoints in this loop Node_List* _required_safept; // A inner loop cannot delete these safepts; @@ -633,6 +629,7 @@ public: _phase(phase), _local_loop_unroll_limit(0), _local_loop_unroll_factor(0), _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0), + _has_range_checks(0), _has_range_checks_computed(0), _safepts(NULL), _required_safept(NULL), _allow_optimizations(true) @@ -780,9 +777,20 @@ public: void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase); + bool compute_has_range_checks() const; + bool range_checks_present() { + if (!_has_range_checks_computed) { + if (compute_has_range_checks()) { + _has_range_checks = 1; + } + _has_range_checks_computed = 1; + } + return _has_range_checks; + } + #ifndef PRODUCT - void dump_head() const; // Dump loop head only - void dump() const; // Dump this loop recursively + void dump_head(); // Dump loop head only + void dump(); // Dump this loop recursively void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const; #endif @@ -1424,10 +1432,7 @@ public: } // Eliminate range-checks and other trip-counter vs loop-invariant tests. - int do_range_check( IdealLoopTree *loop, Node_List &old_new ); - - // Check to see if do_range_check(...) cleaned the main loop of range-checks - void has_range_checks(IdealLoopTree *loop); + void do_range_check(IdealLoopTree *loop, Node_List &old_new); // Process post loops which have range checks and try to build a multi-version // guard to safely determine if we can execute the post loop which was RCE'd. diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 4df7b4219e8..e0b23fbb692 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -180,11 +180,10 @@ bool SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) { IdealLoopTree *lpt_next = cl->is_strip_mined() ? lpt->_parent->_next : lpt->_next; CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop(); - _phase->has_range_checks(lpt_next); // Main loop SLP works well for manually unrolled loops. But post loop // vectorization doesn't work for these. To bail out the optimization // earlier, we have range check and loop stride conditions below. - if (cl_next->is_post_loop() && !cl_next->range_checks_present() && + if (cl_next->is_post_loop() && !lpt_next->range_checks_present() && cl_next->stride_is_con() && abs(cl_next->stride_con()) == 1) { if (!cl_next->is_vectorized_loop()) { // Propagate some main loop attributes to its corresponding scalar diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationNotRun.java b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationNotRun.java new file mode 100644 index 00000000000..26a4550c5c8 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorizationNotRun.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.c2.irTests; + +import compiler.lib.ir_framework.*; +import jdk.test.lib.Utils; +import jdk.internal.misc.Unsafe; +import java.util.Objects; +import java.util.Random; + +/* + * @test + * @bug 8300256 + * @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64") + * @modules java.base/jdk.internal.misc + * @library /test/lib / + * @run driver compiler.c2.irTests.TestVectorizationNotRun + */ + +public class TestVectorizationNotRun { + private static final Unsafe UNSAFE = Unsafe.getUnsafe(); + + public static void main(String[] args) { + TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED"); + } + + static int size = 1024; + static int sizeBytes = 8 * size; + static byte[] byteArray = new byte[sizeBytes]; + static long[] longArray = new long[size]; + + @Test + @IR(counts = { IRNode.LOAD_VECTOR, ">=1", IRNode.STORE_VECTOR, ">=1" }) + public static void test(byte[] dest, long[] src) { + for (int i = 0; i < src.length; i++) { + if ((i < 0) || (8 > sizeBytes - i)) { + throw new IndexOutOfBoundsException(); + } + UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + i * 8, src[i]); + } + } + + @Run(test = "test") + public static void test_runner() { + test(byteArray, longArray); + } + +}