diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index e792ed209b8..32ef3eb3e14 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -763,6 +763,7 @@ declare_constant(Deoptimization::Reason_constraint) \ declare_constant(Deoptimization::Reason_div0_check) \ declare_constant(Deoptimization::Reason_loop_limit_check) \ + declare_constant(Deoptimization::Reason_short_running_long_loop) \ declare_constant(Deoptimization::Reason_auto_vectorization_check) \ declare_constant(Deoptimization::Reason_type_checked_inlining) \ declare_constant(Deoptimization::Reason_optimized_type_check) \ diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index 789f906a3af..540b6600a0f 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -872,6 +872,15 @@ "could corrupt the graph in rare cases and should be used with " \ "care.") \ \ + product(bool, ShortRunningLongLoop, true, DIAGNOSTIC, \ + "long counted loop/long range checks: don't create loop nest if " \ + "loop runs for small enough number of iterations. Long loop is " \ + "converted to a single int loop.") \ + \ + develop(bool, StressShortRunningLongLoop, false, \ + "Speculate all long counted loops are short running when bounds " \ + "are unknown even if profile data doesn't say so.") \ + \ develop(bool, StressLoopPeeling, false, \ "Randomize loop peeling decision") \ diff --git a/src/hotspot/share/opto/castnode.cpp b/src/hotspot/share/opto/castnode.cpp index 96f5ba7e693..6d899c1f950 100644 --- a/src/hotspot/share/opto/castnode.cpp +++ b/src/hotspot/share/opto/castnode.cpp @@ -26,7 +26,9 @@ #include "opto/addnode.hpp" #include "opto/callnode.hpp" #include "opto/castnode.hpp" +#include "opto/cfgnode.hpp" #include "opto/connode.hpp" +#include "opto/loopnode.hpp" #include "opto/matcher.hpp" #include "opto/phaseX.hpp" #include "opto/subnode.hpp" @@ -323,6 +325,67 @@ const Type* CastLLNode::Value(PhaseGVN* phase) const { return widen_type(phase, res, T_LONG); } +bool CastLLNode::is_inner_loop_backedge(ProjNode* proj) { + if (proj != nullptr) { + Node* ctrl_use = proj->unique_ctrl_out_or_null(); + if (ctrl_use != nullptr && ctrl_use->Opcode() == Op_Loop && + ctrl_use->in(2) == proj && + ctrl_use->as_Loop()->is_loop_nest_inner_loop()) { + return true; + } + } + return false; +} + +bool CastLLNode::cmp_used_at_inner_loop_exit_test(CmpNode* cmp) { + for (DUIterator_Fast imax, i = cmp->fast_outs(imax); i < imax; i++) { + Node* bol = cmp->fast_out(i); + if (bol->Opcode() == Op_Bool) { + for (DUIterator_Fast jmax, j = bol->fast_outs(jmax); j < jmax; j++) { + Node* iff = bol->fast_out(j); + if (iff->Opcode() == Op_If) { + ProjNode* true_proj = iff->as_If()->proj_out_or_null(true); + ProjNode* false_proj = iff->as_If()->proj_out_or_null(false); + if (is_inner_loop_backedge(true_proj) || is_inner_loop_backedge(false_proj)) { + return true; + } + } + } + } + } + return false; +} + +// Find if this is a cast node added by PhaseIdealLoop::create_loop_nest() to narrow the number of iterations of the +// inner loop +bool CastLLNode::used_at_inner_loop_exit_test() const { + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + Node* convl2i = fast_out(i); + if (convl2i->Opcode() == Op_ConvL2I) { + for (DUIterator_Fast jmax, j = convl2i->fast_outs(jmax); j < jmax; j++) { + Node* cmp_or_sub = convl2i->fast_out(j); + if (cmp_or_sub->Opcode() == Op_CmpI) { + if (cmp_used_at_inner_loop_exit_test(cmp_or_sub->as_Cmp())) { + // (Loop .. .. (IfProj (If (Bool (CmpI (ConvL2I (CastLL ))))))) + return true; + } + } else if (cmp_or_sub->Opcode() == Op_SubI && cmp_or_sub->in(1)->find_int_con(-1) == 0) { + for (DUIterator_Fast kmax, k = cmp_or_sub->fast_outs(kmax); k < kmax; k++) { + Node* cmp = cmp_or_sub->fast_out(k); + if (cmp->Opcode() == Op_CmpI) { + if (cmp_used_at_inner_loop_exit_test(cmp->as_Cmp())) { + // (Loop .. .. (IfProj (If (Bool (CmpI (SubI 0 (ConvL2I (CastLL )))))))) + return true; + } + } + } + } + } + } + } + return false; +} + Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) { Node* progress = ConstraintCastNode::Ideal(phase, can_reshape); if (progress != nullptr) { @@ -352,7 +415,12 @@ Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) { } } } - return optimize_integer_cast(phase, T_LONG); + // If it's a cast created by PhaseIdealLoop::short_running_loop(), don't transform it until the counted loop is created + // in next loop opts pass + if (!can_reshape || !used_at_inner_loop_exit_test()) { + return optimize_integer_cast(phase, T_LONG); + } + return nullptr; } //------------------------------Value------------------------------------------ diff --git a/src/hotspot/share/opto/castnode.hpp b/src/hotspot/share/opto/castnode.hpp index 1b848e5efdf..3c6ade64aa8 100644 --- a/src/hotspot/share/opto/castnode.hpp +++ b/src/hotspot/share/opto/castnode.hpp @@ -138,6 +138,12 @@ public: } virtual const Type* Value(PhaseGVN* phase) const; + + static bool is_inner_loop_backedge(ProjNode* proj); + + static bool cmp_used_at_inner_loop_exit_test(CmpNode* cmp); + bool used_at_inner_loop_exit_test() const; + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegL; } diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp index 482189f7ec8..c58f7824c11 100644 --- a/src/hotspot/share/opto/graphKit.cpp +++ b/src/hotspot/share/opto/graphKit.cpp @@ -4050,6 +4050,11 @@ void GraphKit::add_parse_predicate(Deoptimization::DeoptReason reason, const int // Add Parse Predicates which serve as placeholders to create new Runtime Predicates above them. All // Runtime Predicates inside a Runtime Predicate block share the same uncommon trap as the Parse Predicate. void GraphKit::add_parse_predicates(int nargs) { + if (ShortRunningLongLoop) { + // Will narrow the limit down with a cast node. Predicates added later may depend on the cast so should be last when + // walking up from the loop. + add_parse_predicate(Deoptimization::Reason_short_running_long_loop, nargs); + } if (UseLoopPredicate) { add_parse_predicate(Deoptimization::Reason_predicate, nargs); if (UseProfiledLoopPredicate) { diff --git a/src/hotspot/share/opto/ifnode.cpp b/src/hotspot/share/opto/ifnode.cpp index 9daf2e6741e..b397c2c5852 100644 --- a/src/hotspot/share/opto/ifnode.cpp +++ b/src/hotspot/share/opto/ifnode.cpp @@ -2178,6 +2178,7 @@ ParsePredicateNode::ParsePredicateNode(Node* control, Deoptimization::DeoptReaso case Deoptimization::Reason_profile_predicate: case Deoptimization::Reason_auto_vectorization_check: case Deoptimization::Reason_loop_limit_check: + case Deoptimization::Reason_short_running_long_loop: break; default: assert(false, "unsupported deoptimization reason for Parse Predicate"); @@ -2226,6 +2227,9 @@ void ParsePredicateNode::dump_spec(outputStream* st) const { case Deoptimization::DeoptReason::Reason_loop_limit_check: st->print("Loop_Limit_Check "); break; + case Deoptimization::DeoptReason::Reason_short_running_long_loop: + st->print("Short_Running_Long_Loop "); + break; default: fatal("unknown kind"); } diff --git a/src/hotspot/share/opto/loopPredicate.cpp b/src/hotspot/share/opto/loopPredicate.cpp index 477ea48d419..561f3ce75cb 100644 --- a/src/hotspot/share/opto/loopPredicate.cpp +++ b/src/hotspot/share/opto/loopPredicate.cpp @@ -1054,7 +1054,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod #ifdef ASSERT const bool exact_trip_count = cl->has_exact_trip_count(); const uint trip_count = cl->trip_count(); - loop->compute_trip_count(this); + loop->compute_trip_count(this, T_INT); assert(exact_trip_count == cl->has_exact_trip_count() && trip_count == cl->trip_count(), "should have computed trip count on Loop Predication entry"); #endif @@ -1171,7 +1171,7 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree* loop) { // Do nothing for iteration-splitted loops return false; } - loop->compute_trip_count(this); + loop->compute_trip_count(this, T_INT); if (cl->trip_count() == 1) { // Not worth to hoist checks out of a loop that is only run for one iteration since the checks are only going to // be executed once anyway. diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp index cc680f66b62..5f5e0520e7e 100644 --- a/src/hotspot/share/opto/loopTransform.cpp +++ b/src/hotspot/share/opto/loopTransform.cpp @@ -96,11 +96,11 @@ void IdealLoopTree::record_for_igvn() { //------------------------------compute_exact_trip_count----------------------- // Compute loop trip count if possible. Do not recalculate trip count for // split loops (pre-main-post) which have their limits and inits behind Opaque node. -void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) { - if (!_head->as_Loop()->is_valid_counted_loop(T_INT)) { +void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase, BasicType loop_bt) { + if (!_head->as_Loop()->is_valid_counted_loop(loop_bt)) { return; } - CountedLoopNode* cl = _head->as_CountedLoop(); + BaseCountedLoopNode* cl = _head->as_BaseCountedLoop(); // Trip count may become nonexact for iteration split loops since // RCE modifies limits. Note, _trip_count value is not reset since // it is used to limit unrolling of main loop. @@ -119,24 +119,62 @@ void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) { Node* init_n = cl->init_trip(); Node* limit_n = cl->limit(); if (init_n != nullptr && limit_n != nullptr) { - // Use longs to avoid integer overflow. - int stride_con = cl->stride_con(); - const TypeInt* init_type = phase->_igvn.type(init_n)->is_int(); - const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int(); - jlong init_con = (stride_con > 0) ? init_type->_lo : init_type->_hi; - jlong limit_con = (stride_con > 0) ? limit_type->_hi : limit_type->_lo; - int stride_m = stride_con - (stride_con > 0 ? 1 : -1); - jlong trip_count = (limit_con - init_con + stride_m)/stride_con; + jlong stride_con = cl->stride_con(); + const TypeInteger* init_type = phase->_igvn.type(init_n)->is_integer(loop_bt); + const TypeInteger* limit_type = phase->_igvn.type(limit_n)->is_integer(loop_bt); + + // compute trip count + // It used to be computed as: + // max(1, limit_con - init_con + stride_m) / stride_con + // with stride_m = stride_con - (stride_con > 0 ? 1 : -1) + // for int counted loops only and by promoting all values to long to avoid overflow + // This implements the computation for int and long counted loops in a way that promotion to the next larger integer + // type is not needed to protect against overflow. + // + // Use unsigned longs to avoid overflow: number of iteration is a positive number but can be really large for + // instance if init_con = min_jint, limit_con = max_jint + jlong init_con = (stride_con > 0) ? init_type->lo_as_long() : init_type->hi_as_long(); + julong uinit_con = init_con; + jlong limit_con = (stride_con > 0) ? limit_type->hi_as_long() : limit_type->lo_as_long(); + julong ulimit_con = limit_con; // The loop body is always executed at least once even if init >= limit (for stride_con > 0) or // init <= limit (for stride_con < 0). - trip_count = MAX2(trip_count, (jlong)1); - if (trip_count < (jlong)max_juint) { + julong udiff = 1; + if (stride_con > 0 && limit_con > init_con) { + udiff = ulimit_con - uinit_con; + } else if (stride_con < 0 && limit_con < init_con) { + udiff = uinit_con - ulimit_con; + } + // The loop runs for one more iteration if the limit is (stride > 0 in this example): + // init + k * stride + small_value, 0 < small_value < stride + julong utrip_count = udiff / ABS(stride_con); + if (utrip_count * ABS(stride_con) != udiff) { + // Guaranteed to not overflow because it can only happen for ABS(stride) > 1 in which case, utrip_count can't be + // max_juint/max_julong + utrip_count++; + } + +#ifdef ASSERT + if (loop_bt == T_INT) { + // Use longs to avoid integer overflow. + jlong init_con = (stride_con > 0) ? init_type->is_int()->_lo : init_type->is_int()->_hi; + jlong limit_con = (stride_con > 0) ? limit_type->is_int()->_hi : limit_type->is_int()->_lo; + int stride_m = stride_con - (stride_con > 0 ? 1 : -1); + jlong trip_count = (limit_con - init_con + stride_m) / stride_con; + // The loop body is always executed at least once even if init >= limit (for stride_con > 0) or + // init <= limit (for stride_con < 0). + trip_count = MAX2(trip_count, (jlong)1); + assert(checked_cast(trip_count) == checked_cast(utrip_count), "incorrect trip count computation"); + } +#endif + + if (utrip_count < max_unsigned_integer(loop_bt)) { if (init_n->is_Con() && limit_n->is_Con()) { // Set exact trip count. - cl->set_exact_trip_count((uint)trip_count); - } else if (cl->unrolled_count() == 1) { + cl->set_exact_trip_count(utrip_count); + } else if (loop_bt == T_LONG || cl->as_CountedLoop()->unrolled_count() == 1) { // Set maximum trip count before unrolling. - cl->set_trip_count((uint)trip_count); + cl->set_trip_count(utrip_count); } } } @@ -1851,7 +1889,7 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj #ifndef PRODUCT if (TraceLoopOpts) { if (loop_head->trip_count() < (uint)LoopUnrollLimit) { - tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); + tty->print("Unroll %d(" JULONG_FORMAT_W(2) ") ", loop_head->unrolled_count()*2, loop_head->trip_count()); } else { tty->print("Unroll %d ", loop_head->unrolled_count()*2); } @@ -2104,7 +2142,7 @@ void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new assert(cl->trip_count() > 0, ""); #ifndef PRODUCT if (TraceLoopOpts) { - tty->print("MaxUnroll %d ", cl->trip_count()); + tty->print("MaxUnroll " JULONG_FORMAT " ", cl->trip_count()); loop->dump_head(); } #endif @@ -3359,7 +3397,7 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n return false; } // Compute loop trip count if possible. - compute_trip_count(phase); + compute_trip_count(phase, T_INT); // Convert one-iteration loop into normal code. if (do_one_iteration_loop(phase)) { diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp index e5efdb2a202..2c604e6d478 100644 --- a/src/hotspot/share/opto/loopnode.cpp +++ b/src/hotspot/share/opto/loopnode.cpp @@ -601,7 +601,6 @@ void PhaseIdealLoop::add_parse_predicate(Deoptimization::DeoptReason reason, Nod int trap_request = Deoptimization::make_trap_request(reason, Deoptimization::Action_maybe_recompile); address call_addr = OptoRuntime::uncommon_trap_blob()->entry_point(); const TypePtr* no_memory_effects = nullptr; - JVMState* jvms = sfpt->jvms(); CallNode* unc = new CallStaticJavaNode(OptoRuntime::uncommon_trap_Type(), call_addr, "uncommon_trap", no_memory_effects); @@ -856,8 +855,9 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) { return false; } + assert(iters_limit > 0, "can't be negative"); + PhiNode* phi = head->phi()->as_Phi(); - Node* incr = head->incr(); Node* back_control = head->in(LoopNode::LoopBackControl); @@ -888,7 +888,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) { // Take what we know about the number of iterations of the long counted loop into account when computing the limit of // the inner loop. - const Node* init = head->init_trip(); + Node* init = head->init_trip(); const TypeInteger* lo = _igvn.type(init)->is_integer(bt); const TypeInteger* hi = _igvn.type(limit)->is_integer(bt); if (stride_con < 0) { @@ -907,7 +907,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) { // going to execute as many range checks once transformed with range checks eliminated (1 peeled iteration with // range checks + 2 predicates per range checks) as it would have not transformed. It also has to pay for the extra // logic on loop entry and for the outer loop. - loop->compute_trip_count(this); + loop->compute_trip_count(this, bt); if (head->is_CountedLoop() && head->as_CountedLoop()->has_exact_trip_count()) { if (head->as_CountedLoop()->trip_count() <= 3) { return false; @@ -920,6 +920,11 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) { } } + if (try_make_short_running_loop(loop, stride_con, range_checks, iters_limit)) { + C->set_major_progress(); + return true; + } + julong orig_iters = (julong)hi->hi_as_long() - lo->lo_as_long(); iters_limit = checked_cast(MIN2((julong)iters_limit, orig_iters)); @@ -1118,6 +1123,9 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) { if (safepoint != nullptr) { SafePointNode* cloned_sfpt = old_new[safepoint->_idx]->as_SafePoint(); + if (ShortRunningLongLoop) { + add_parse_predicate(Deoptimization::Reason_short_running_long_loop, inner_head, outer_ilt, cloned_sfpt); + } if (UseLoopPredicate) { add_parse_predicate(Deoptimization::Reason_predicate, inner_head, outer_ilt, cloned_sfpt); if (UseProfiledLoopPredicate) { @@ -1147,6 +1155,215 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) { return true; } +// Make a copy of Parse/Template Assertion predicates below existing predicates at the loop passed as argument +class CloneShortLoopPredicateVisitor : public PredicateVisitor { + ClonePredicateToTargetLoop _clone_predicate_to_loop; + PhaseIdealLoop* const _phase; + +public: + CloneShortLoopPredicateVisitor(LoopNode* target_loop_head, + const NodeInSingleLoopBody &node_in_loop_body, + PhaseIdealLoop* phase) + : _clone_predicate_to_loop(target_loop_head, node_in_loop_body, phase), + _phase(phase) { + } + NONCOPYABLE(CloneShortLoopPredicateVisitor); + + using PredicateVisitor::visit; + + void visit(const ParsePredicate& parse_predicate) override { + _clone_predicate_to_loop.clone_parse_predicate(parse_predicate, true); + parse_predicate.kill(_phase->igvn()); + } + + void visit(const TemplateAssertionPredicate& template_assertion_predicate) override { + _clone_predicate_to_loop.clone_template_assertion_predicate(template_assertion_predicate); + template_assertion_predicate.kill(_phase->igvn()); + } +}; + +// If the loop is either statically known to run for a small enough number of iterations or if profile data indicates +// that, we don't want an outer loop because the overhead of having an outer loop whose backedge is never taken, has a +// measurable cost. Furthermore, creating the loop nest usually causes one iteration of the loop to be peeled so +// predicates can be set up. If the loop is short running, then it's an extra iteration that's run with range checks +// (compared to an int counted loop with int range checks). +// +// In the short running case, turn the loop into a regular loop again and transform the long range checks: +// - LongCountedLoop: Create LoopNode but keep the loop limit type with a CastLL node to avoid that we later try to +// create a Loop Limit Check when turning the LoopNode into a CountedLoopNode. +// - CountedLoop: Can be reused. +bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint stride_con, const Node_List &range_checks, + const uint iters_limit) { + if (!ShortRunningLongLoop) { + return false; + } + BaseCountedLoopNode* head = loop->_head->as_BaseCountedLoop(); + BasicType bt = head->bt(); + Node* entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl); + + loop->compute_trip_count(this, bt); + // Loop must run for no more than iter_limits as it guarantees no overflow of scale * iv in long range checks (see + // comment above PhaseIdealLoop::transform_long_range_checks()). + // iters_limit / ABS(stride_con) is the largest trip count for which we know it's correct to not create a loop nest: + // it's always beneficial to have a single loop rather than a loop nest, so we try to apply this transformation as + // often as possible. + bool known_short_running_loop = head->trip_count() <= iters_limit / ABS(stride_con); + bool profile_short_running_loop = false; + if (!known_short_running_loop) { + loop->compute_profile_trip_cnt(this); + if (StressShortRunningLongLoop) { + profile_short_running_loop = true; + } else { + profile_short_running_loop = !head->is_profile_trip_failed() && head->profile_trip_cnt() <= iters_limit / ABS(stride_con); + } + } + + if (!known_short_running_loop && !profile_short_running_loop) { + return false; + } + + Node* limit = head->limit(); + Node* init = head->init_trip(); + + Node* new_limit; + if (stride_con > 0) { + new_limit = SubNode::make(limit, init, bt); + } else { + new_limit = SubNode::make(init, limit, bt); + } + register_new_node(new_limit, entry_control); + + PhiNode* phi = head->phi()->as_Phi(); + if (profile_short_running_loop) { + // Add a Short Running Long Loop Predicate. It's the first predicate in the predicate chain before entering a loop + // because a cast that's control dependent on the Short Running Long Loop Predicate is added to narrow the limit and + // future predicates may be dependent on the new limit (so have to be between the loop and Short Running Long Loop + // Predicate). The current limit could, itself, be dependent on an existing predicate. Clone parse and template + // assertion predicates below existing predicates to get proper ordering of predicates when walking from the loop + // up: future predicates, Short Running Long Loop Predicate, existing predicates. + // + // Existing Hoisted + // Check Predicates + // | + // New Short Running Long + // Loop Predicate + // | + // Cloned Parse Predicates and + // Template Assertion Predicates + // (future predicates added here) + // | + // Loop + const Predicates predicates_before_cloning(entry_control); + const PredicateBlock* short_running_long_loop_predicate_block = predicates_before_cloning.short_running_long_loop_predicate_block(); + if (!short_running_long_loop_predicate_block->has_parse_predicate()) { // already trapped + return false; + } + PredicateIterator predicate_iterator(entry_control); + NodeInSingleLoopBody node_in_short_loop_body(this, loop); + CloneShortLoopPredicateVisitor clone_short_loop_predicates_visitor(head, node_in_short_loop_body, this); + predicate_iterator.for_each(clone_short_loop_predicates_visitor); + + entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl); + + const Predicates predicates_after_cloning(entry_control); + + ParsePredicateSuccessProj* short_running_loop_predicate_proj = predicates_after_cloning. + short_running_long_loop_predicate_block()-> + parse_predicate_success_proj(); + assert(short_running_loop_predicate_proj->in(0)->is_ParsePredicate(), "must be parse predicate"); + + const jlong iters_limit_long = iters_limit; + Node* cmp_limit = CmpNode::make(new_limit, _igvn.integercon(iters_limit_long, bt), bt); + Node* bol = new BoolNode(cmp_limit, BoolTest::le); + Node* new_predicate_proj = create_new_if_for_predicate(short_running_loop_predicate_proj, + nullptr, + Deoptimization::Reason_short_running_long_loop, + Op_If); + Node* iff = new_predicate_proj->in(0); + _igvn.replace_input_of(iff, 1, bol); + register_new_node(cmp_limit, iff->in(0)); + register_new_node(bol, iff->in(0)); + new_limit = ConstraintCastNode::make_cast_for_basic_type(new_predicate_proj, new_limit, + TypeInteger::make(1, iters_limit_long, Type::WidenMin, bt), + ConstraintCastNode::UnconditionalDependency, bt); + register_new_node(new_limit, new_predicate_proj); + +#ifndef PRODUCT + if (TraceLoopLimitCheck) { + tty->print_cr("Short Long Loop Check Predicate generated:"); + DEBUG_ONLY(bol->dump(2);) + } +#endif + entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl); + } else if (bt == T_LONG) { + // We're turning a long counted loop into a regular loop that will be converted into an int counted loop. That loop + // won't need loop limit check predicates (iters_limit guarantees that). Add a cast to make sure that, whatever + // transformation happens by the time the counted loop is created (in a subsequent pass of loop opts), C2 knows + // enough about the loop's limit that it doesn't try to add loop limit check predicates. + const Predicates predicates(entry_control); + const TypeLong* new_limit_t = new_limit->Value(&_igvn)->is_long(); + new_limit = ConstraintCastNode::make_cast_for_basic_type(predicates.entry(), new_limit, + TypeLong::make(0, new_limit_t->_hi, new_limit_t->_widen), + ConstraintCastNode::UnconditionalDependency, bt); + register_new_node(new_limit, predicates.entry()); + } else { + assert(bt == T_INT && known_short_running_loop, "only CountedLoop statically known to be short running"); + } + IfNode* exit_test = head->loopexit(); + + if (bt == T_LONG) { + // The loop is short running so new_limit fits into an int: either we determined that statically or added a guard + new_limit = new ConvL2INode(new_limit); + register_new_node(new_limit, entry_control); + } + + Node* int_zero = intcon(0); + if (stride_con < 0) { + new_limit = new SubINode(int_zero, new_limit); + register_new_node(new_limit, entry_control); + } + + // Clone the iv data nodes as an integer iv + Node* int_stride = intcon(stride_con); + Node* inner_phi = new PhiNode(head, TypeInt::INT); + Node* inner_incr = new AddINode(inner_phi, int_stride); + Node* inner_cmp = new CmpINode(inner_incr, new_limit); + Node* inner_bol = new BoolNode(inner_cmp, exit_test->in(1)->as_Bool()->_test._test); + inner_phi->set_req(LoopNode::EntryControl, int_zero); + inner_phi->set_req(LoopNode::LoopBackControl, inner_incr); + register_new_node(inner_phi, head); + register_new_node(inner_incr, head); + register_new_node(inner_cmp, head); + register_new_node(inner_bol, head); + + _igvn.replace_input_of(exit_test, 1, inner_bol); + + // Replace inner loop long iv phi as inner loop int iv phi + outer + // loop iv phi + Node* iv_add = loop_nest_replace_iv(phi, inner_phi, init, head, bt); + + LoopNode* inner_head = head; + if (bt == T_LONG) { + // Turn the loop back to a counted loop + inner_head = create_inner_head(loop, head, exit_test); + } else { + // Use existing counted loop + revert_to_normal_loop(head); + } + + if (bt == T_INT) { + init = new ConvI2LNode(init); + register_new_node(init, entry_control); + } + + transform_long_range_checks(stride_con, range_checks, init, new_limit, + inner_phi, iv_add, inner_head); + + inner_head->mark_loop_nest_inner_loop(); + + return true; +} + int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jint stride_con, int iters_limit, PhiNode* phi, Node_List& range_checks) { const jlong min_iters = 2; @@ -1318,7 +1535,6 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List for (uint i = 0; i < range_checks.size(); i++) { ProjNode* proj = range_checks.at(i)->as_Proj(); - ProjNode* unc_proj = proj->other_if_proj(); RangeCheckNode* rc = proj->in(0)->as_RangeCheck(); jlong scale = 0; Node* offset = nullptr; @@ -4415,6 +4631,9 @@ void IdealLoopTree::dump_head() { if (predicates.loop_limit_check_predicate_block()->is_non_empty()) { tty->print(" limit_check"); } + if (predicates.short_running_long_loop_predicate_block()->is_non_empty()) { + tty->print(" short_running"); + } if (UseLoopPredicate) { if (UseProfiledLoopPredicate && predicates.profiled_loop_predicate_block()->is_non_empty()) { tty->print(" profile_predicated"); @@ -4922,7 +5141,7 @@ void PhaseIdealLoop::build_and_optimize() { for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { IdealLoopTree* lpt = iter.current(); if (lpt->is_innermost() && lpt->_allow_optimizations && !lpt->_has_call && lpt->is_counted()) { - lpt->compute_trip_count(this); + lpt->compute_trip_count(this, T_INT); if (!lpt->do_one_iteration_loop(this) && !lpt->do_remove_empty_loop(this)) { AutoNodeBudget node_budget(this); diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp index 15206c1a351..27e397790d4 100644 --- a/src/hotspot/share/opto/loopnode.hpp +++ b/src/hotspot/share/opto/loopnode.hpp @@ -218,6 +218,18 @@ public: jlong stride_con() const; static BaseCountedLoopNode* make(Node* entry, Node* backedge, BasicType bt); + + virtual void set_trip_count(julong tc) = 0; + virtual julong trip_count() const = 0; + + bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; } + void set_exact_trip_count(julong tc) { + set_trip_count(tc); + _loop_flags |= HasExactTripCount; + } + void set_nonexact_trip_count() { + _loop_flags &= ~HasExactTripCount; + } }; @@ -298,26 +310,17 @@ public: int main_idx() const { return _main_idx; } + void set_trip_count(julong tc) { + assert(tc < max_juint, "Cannot set trip count to max_juint"); + _trip_count = checked_cast(tc); + } + julong trip_count() const { return _trip_count; } void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; } void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; } - // We use max_juint for the default value of _trip_count to signal it wasn't set. - // We shouldn't set _trip_count to max_juint explicitly. - void set_trip_count(uint tc) { assert(tc < max_juint, "Cannot set trip count to max_juint"); _trip_count = tc; } - uint trip_count() { return _trip_count; } - - bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; } - void set_exact_trip_count(uint tc) { - assert(tc < max_juint, "Cannot set trip count to max_juint"); - _trip_count = tc; - _loop_flags |= HasExactTripCount; - } - void set_nonexact_trip_count() { - _loop_flags &= ~HasExactTripCount; - } void set_notpassed_slp() { _loop_flags &= ~PassedSlpAnalysis; } @@ -380,9 +383,15 @@ public: }; class LongCountedLoopNode : public BaseCountedLoopNode { +private: + virtual uint size_of() const { return sizeof(*this); } + + // Known trip count calculated by compute_exact_trip_count() + julong _trip_count; + public: LongCountedLoopNode(Node *entry, Node *backedge) - : BaseCountedLoopNode(entry, backedge) { + : BaseCountedLoopNode(entry, backedge), _trip_count(max_julong) { init_class_id(Class_LongCountedLoop); } @@ -392,6 +401,12 @@ public: return T_LONG; } + void set_trip_count(julong tc) { + assert(tc < max_julong, "Cannot set trip count to max_julong"); + _trip_count = tc; + } + julong trip_count() const { return _trip_count; } + LongCountedLoopEndNode* loopexit_or_null() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit_or_null(); } LongCountedLoopEndNode* loopexit() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit(); } }; @@ -778,7 +793,7 @@ public: uint est_loop_unroll_sz(uint factor) const; // Compute loop trip count if possible - void compute_trip_count(PhaseIdealLoop* phase); + void compute_trip_count(PhaseIdealLoop* phase, BasicType bt); // Compute loop trip count from profile data float compute_profile_trip_cnt_helper(Node* n); @@ -1829,6 +1844,8 @@ public: Node* ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node); + bool try_make_short_running_loop(IdealLoopTree* loop, jint stride_con, const Node_List& range_checks, const uint iters_limit); + ConINode* intcon(jint i); ConLNode* longcon(jlong i); diff --git a/src/hotspot/share/opto/predicates.cpp b/src/hotspot/share/opto/predicates.cpp index 137d16712d8..da9f704ee8d 100644 --- a/src/hotspot/share/opto/predicates.cpp +++ b/src/hotspot/share/opto/predicates.cpp @@ -82,12 +82,11 @@ ParsePredicateNode* ParsePredicate::init_parse_predicate(const Node* parse_predi return nullptr; } -ParsePredicate ParsePredicate::clone_to_unswitched_loop(Node* new_control, const bool is_false_path_loop, - PhaseIdealLoop* phase) const { +ParsePredicate ParsePredicate::clone_to_loop(Node* new_control, const bool rewire_uncommon_proj_phi_inputs, + PhaseIdealLoop* phase) const { ParsePredicateSuccessProj* success_proj = phase->create_new_if_for_predicate(_success_proj, new_control, _parse_predicate_node->deopt_reason(), - Op_ParsePredicate, is_false_path_loop); - NOT_PRODUCT(trace_cloned_parse_predicate(is_false_path_loop, success_proj)); + Op_ParsePredicate, rewire_uncommon_proj_phi_inputs); return ParsePredicate(success_proj, _parse_predicate_node->deopt_reason()); } @@ -97,11 +96,10 @@ void ParsePredicate::kill(PhaseIterGVN& igvn) const { } #ifndef PRODUCT -void ParsePredicate::trace_cloned_parse_predicate(const bool is_false_path_loop, - const ParsePredicateSuccessProj* success_proj) { - if (TraceLoopPredicate) { +void ParsePredicate::trace_cloned_parse_predicate(const bool is_false_path_loop) const { + if (TraceLoopUnswitching) { tty->print("Parse Predicate cloned to %s path loop: ", is_false_path_loop ? "false" : "true"); - success_proj->in(0)->dump(); + head()->dump(); } } #endif // NOT PRODUCT @@ -126,6 +124,7 @@ bool RuntimePredicate::has_valid_uncommon_trap(const Node* success_proj) { assert(RegularPredicate::may_be_predicate_if(success_proj), "must have been checked before"); const Deoptimization::DeoptReason deopt_reason = uncommon_trap_reason(success_proj->as_IfProj()); return (deopt_reason == Deoptimization::Reason_loop_limit_check || + deopt_reason == Deoptimization::Reason_short_running_long_loop || deopt_reason == Deoptimization::Reason_auto_vectorization_check || deopt_reason == Deoptimization::Reason_predicate || deopt_reason == Deoptimization::Reason_profile_predicate); @@ -941,6 +940,8 @@ void Predicates::dump() const { _profiled_loop_predicate_block.dump(" "); tty->print_cr("- Loop Predicate Block:"); _loop_predicate_block.dump(" "); + tty->print_cr("- Short Running Long Loop Predicate Block:"); + _short_running_long_loop_predicate_block.dump(" "); tty->cr(); } else { tty->print_cr(""); @@ -999,6 +1000,10 @@ InitializedAssertionPredicate CreateAssertionPredicatesVisitor::initialize_from_ return initialized_assertion_predicate; } +bool NodeInSingleLoopBody::check_node_in_loop_body(Node* node) const { + return _phase->is_member(_ilt, _phase->get_ctrl(node)); +} + // Clone the provided Template Assertion Predicate and set '_init' as new input for the OpaqueLoopInitNode. TemplateAssertionPredicate CreateAssertionPredicatesVisitor::clone_template_and_replace_init_input( const TemplateAssertionPredicate& template_assertion_predicate) const { @@ -1108,11 +1113,18 @@ void CloneUnswitchedLoopPredicatesVisitor::visit(const ParsePredicate& parse_pre if (_is_counted_loop && deopt_reason == Deoptimization::Reason_loop_limit_check) { return; } - _clone_predicate_to_true_path_loop.clone_parse_predicate(parse_predicate, false); - _clone_predicate_to_false_path_loop.clone_parse_predicate(parse_predicate, true); + clone_parse_predicate(parse_predicate, false); + clone_parse_predicate(parse_predicate, true); parse_predicate.kill(_phase->igvn()); } +void CloneUnswitchedLoopPredicatesVisitor::clone_parse_predicate(const ParsePredicate& parse_predicate, + const bool is_false_path_loop) { + ClonePredicateToTargetLoop& clone_predicate_to_loop = is_false_path_loop ? _clone_predicate_to_false_path_loop : _clone_predicate_to_true_path_loop; + const ParsePredicate cloned_parse_predicate = clone_predicate_to_loop.clone_parse_predicate(parse_predicate, is_false_path_loop); + NOT_PRODUCT(cloned_parse_predicate.trace_cloned_parse_predicate(is_false_path_loop);) +} + // Clone the Template Assertion Predicate, which is currently found before the newly added unswitched loop selector, // to the true path and false path loop. void CloneUnswitchedLoopPredicatesVisitor::visit(const TemplateAssertionPredicate& template_assertion_predicate) { diff --git a/src/hotspot/share/opto/predicates.hpp b/src/hotspot/share/opto/predicates.hpp index 2181e498bd4..ef7c5600853 100644 --- a/src/hotspot/share/opto/predicates.hpp +++ b/src/hotspot/share/opto/predicates.hpp @@ -73,6 +73,14 @@ class TemplateAssertionPredicate; * counted loop to avoid these overflow problems. * The predicate does not replace an actual check inside the loop. This predicate can only * be added once above the Loop Limit Check Parse Predicate for a loop. + * - Short: This predicate is created when a long counted loop is transformed into an int counted + * Running Long loop. In general, that transformation requires an outer loop to guarantee that the new + * Loop loop nest iterates over the entire range of the loop before transformation. However, if the + * Predicate loop is speculated to run for a small enough number of iterations, the outer loop is not + * needed. This predicate is added to catch mis-speculation in this case. It also applies to + * int counted loops with long range checks for which a loop nest also needs to be created + * in the general case (so the transformation of long range checks to int range checks is + * legal). * - Assertion Predicate: An always true predicate which will never fail (its range is already covered by an earlier * Hoisted Check Predicate or the main-loop entry guard) but is required in order to fold away a * dead sub loop in which some data could be proven to be dead (by the type system) and replaced @@ -288,8 +296,6 @@ class ParsePredicate : public Predicate { } static ParsePredicateNode* init_parse_predicate(const Node* parse_predicate_proj, Deoptimization::DeoptReason deopt_reason); - NOT_PRODUCT(static void trace_cloned_parse_predicate(bool is_false_path_loop, - const ParsePredicateSuccessProj* success_proj);) public: ParsePredicate(Node* parse_predicate_proj, Deoptimization::DeoptReason deopt_reason) @@ -320,8 +326,8 @@ class ParsePredicate : public Predicate { return _success_proj; } - ParsePredicate clone_to_unswitched_loop(Node* new_control, bool is_false_path_loop, - PhaseIdealLoop* phase) const; + ParsePredicate clone_to_loop(Node* new_control, bool rewire_uncommon_proj_phi_inputs, PhaseIdealLoop* phase) const; + NOT_PRODUCT(void trace_cloned_parse_predicate(bool is_false_path_loop) const;) void kill(PhaseIterGVN& igvn) const; }; @@ -786,7 +792,8 @@ class PredicateIterator : public StackObj { PredicateBlockIterator loop_predicate_iterator(current_node, Deoptimization::Reason_predicate); current_node = loop_predicate_iterator.for_each(predicate_visitor); } - return current_node; + PredicateBlockIterator short_running_loop_predicate_iterator(current_node, Deoptimization::Reason_short_running_long_loop); + return short_running_loop_predicate_iterator.for_each(predicate_visitor); } }; @@ -953,6 +960,7 @@ class Predicates : public StackObj { const PredicateBlock _auto_vectorization_check_block; const PredicateBlock _profiled_loop_predicate_block; const PredicateBlock _loop_predicate_block; + const PredicateBlock _short_running_long_loop_predicate_block; Node* const _entry; public: @@ -965,7 +973,9 @@ class Predicates : public StackObj { Deoptimization::Reason_profile_predicate), _loop_predicate_block(_profiled_loop_predicate_block.entry(), Deoptimization::Reason_predicate), - _entry(_loop_predicate_block.entry()) {} + _short_running_long_loop_predicate_block(_loop_predicate_block.entry(), + Deoptimization::Reason_short_running_long_loop), + _entry(_short_running_long_loop_predicate_block.entry()) {} NONCOPYABLE(Predicates); // Returns the control input the first predicate if there are any predicates. If there are no predicates, the same @@ -990,6 +1000,10 @@ class Predicates : public StackObj { return &_loop_limit_check_predicate_block; } + const PredicateBlock* short_running_long_loop_predicate_block() const { + return &_short_running_long_loop_predicate_block; + } + bool has_any() const { return _entry != _tail; } @@ -1082,6 +1096,19 @@ class NodeInClonedLoopBody : public NodeInLoopBody { } }; +// This class checks whether a node is in the loop body passed to the constructor. +class NodeInSingleLoopBody : public NodeInLoopBody { + PhaseIdealLoop* const _phase; + IdealLoopTree* const _ilt; + +public: + NodeInSingleLoopBody(PhaseIdealLoop* phase, IdealLoopTree* ilt) : _phase(phase), _ilt(ilt) { + } + NONCOPYABLE(NodeInSingleLoopBody); + + bool check_node_in_loop_body(Node* node) const override; +}; + // Visitor to create Initialized Assertion Predicates at a target loop from Template Assertion Predicates from a source // loop. This visitor can be used in combination with a PredicateIterator. class CreateAssertionPredicatesVisitor : public PredicateVisitor { @@ -1158,10 +1185,11 @@ public: ClonePredicateToTargetLoop(LoopNode* target_loop_head, const NodeInLoopBody& node_in_loop_body, PhaseIdealLoop* phase); // Clones the provided Parse Predicate to the head of the current predicate chain at the target loop. - void clone_parse_predicate(const ParsePredicate& parse_predicate, bool is_false_path_loop) { - ParsePredicate cloned_parse_predicate = parse_predicate.clone_to_unswitched_loop(_old_target_loop_entry, - is_false_path_loop, _phase); + ParsePredicate clone_parse_predicate(const ParsePredicate& parse_predicate, bool rewire_uncommon_proj_phi_inputs) { + ParsePredicate cloned_parse_predicate = parse_predicate.clone_to_loop(_old_target_loop_entry, + rewire_uncommon_proj_phi_inputs, _phase); _target_loop_predicate_chain.insert_predicate(cloned_parse_predicate); + return cloned_parse_predicate; } void clone_template_assertion_predicate(const TemplateAssertionPredicate& template_assertion_predicate); @@ -1189,6 +1217,9 @@ class CloneUnswitchedLoopPredicatesVisitor : public PredicateVisitor { using PredicateVisitor::visit; void visit(const ParsePredicate& parse_predicate) override; + + void clone_parse_predicate(const ParsePredicate &parse_predicate, + bool is_false_path_loop); void visit(const TemplateAssertionPredicate& template_assertion_predicate) override; }; diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp index 110f4ca8c07..5f9a80cf100 100644 --- a/src/hotspot/share/runtime/deoptimization.cpp +++ b/src/hotspot/share/runtime/deoptimization.cpp @@ -2767,8 +2767,8 @@ const char* Deoptimization::_trap_reason_name[] = { "unstable_if", "unstable_fused_if", "receiver_constraint", + "short_running_loop" JVMCI_ONLY("_or_aliasing"), #if INCLUDE_JVMCI - "aliasing", "transfer_to_interpreter", "not_compiled_exception_handler", "unresolved", diff --git a/src/hotspot/share/runtime/deoptimization.hpp b/src/hotspot/share/runtime/deoptimization.hpp index 42cf25e5162..5d97e2056ad 100644 --- a/src/hotspot/share/runtime/deoptimization.hpp +++ b/src/hotspot/share/runtime/deoptimization.hpp @@ -117,8 +117,9 @@ class Deoptimization : AllStatic { Reason_unstable_if, // a branch predicted always false was taken Reason_unstable_fused_if, // fused two ifs that had each one untaken branch. One is now taken. Reason_receiver_constraint, // receiver subtype check failed + Reason_short_running_long_loop, // profile reports loop runs for small number of iterations #if INCLUDE_JVMCI - Reason_aliasing, // optimistic assumption about aliasing failed + Reason_aliasing = Reason_short_running_long_loop, // optimistic assumption about aliasing failed Reason_transfer_to_interpreter, // explicit transferToInterpreter() Reason_not_compiled_exception_handler, Reason_unresolved, diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index 71cba9ec085..6fc16f9b045 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -1566,6 +1566,7 @@ declare_constant(Deoptimization::Reason_age) \ declare_constant(Deoptimization::Reason_predicate) \ declare_constant(Deoptimization::Reason_loop_limit_check) \ + declare_constant(Deoptimization::Reason_short_running_long_loop) \ declare_constant(Deoptimization::Reason_auto_vectorization_check) \ declare_constant(Deoptimization::Reason_speculate_class_check) \ declare_constant(Deoptimization::Reason_speculate_null_check) \ @@ -1573,7 +1574,6 @@ declare_constant(Deoptimization::Reason_unstable_if) \ declare_constant(Deoptimization::Reason_unstable_fused_if) \ declare_constant(Deoptimization::Reason_receiver_constraint) \ - NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_aliasing))) \ NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_transfer_to_interpreter))) \ NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_not_compiled_exception_handler))) \ NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_unresolved))) \ diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp index 46daa867644..f6d162a81e4 100644 --- a/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/src/hotspot/share/utilities/globalDefinitions.hpp @@ -148,6 +148,9 @@ class oopDesc; #ifndef JULONG_FORMAT_X #define JULONG_FORMAT_X UINT64_FORMAT_X #endif +#ifndef JULONG_FORMAT_W +#define JULONG_FORMAT_W(width) UINT64_FORMAT_W(width) +#endif // Format pointers and padded integral values which change size between 32- and 64-bit. #ifdef _LP64 @@ -771,6 +774,14 @@ inline jlong min_signed_integer(BasicType bt) { return min_jlong; } +inline julong max_unsigned_integer(BasicType bt) { + if (bt == T_INT) { + return max_juint; + } + assert(bt == T_LONG, "unsupported"); + return max_julong; +} + inline uint bits_per_java_integer(BasicType bt) { if (bt == T_INT) { return BitsPerJavaInteger; diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java b/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java index 479a2a45cb9..de829f84775 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2022, Red Hat, Inc. All rights reserved. + * Copyright (c) 2021, 2022, 2025 Red Hat, Inc. All rights reserved. * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -38,6 +38,9 @@ import java.util.Objects; public class TestLongRangeChecks { public static void main(String[] args) { + TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0"); + TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0"); + TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0"); TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0"); TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0"); TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0"); @@ -45,7 +48,8 @@ public class TestLongRangeChecks { @Test - @IR(counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStridePosScalePos(long start, long stop, long length, long offset) { final long scale = 1; @@ -66,7 +70,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStridePosScalePosInIntLoop1(int start, int stop, long length, long offset) { final long scale = 2; @@ -84,7 +89,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStridePosScalePosInIntLoop2(int start, int stop, long length, long offset) { final int scale = 2; @@ -102,7 +108,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1"}) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStrideNegScaleNeg(long start, long stop, long length, long offset) { final long scale = -1; @@ -118,7 +125,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStrideNegScaleNegInIntLoop1(int start, int stop, long length, long offset) { final long scale = -2; @@ -135,7 +143,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStrideNegScaleNegInIntLoop2(int start, int stop, long length, long offset) { final int scale = -2; @@ -152,7 +161,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1"}) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStrideNegScalePos(long start, long stop, long length, long offset) { final long scale = 1; @@ -168,7 +178,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStrideNegScalePosInIntLoop1(int start, int stop, long length, long offset) { final long scale = 2; @@ -184,7 +195,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStrideNegScalePosInIntLoop2(int start, int stop, long length, long offset) { final int scale = 2; @@ -200,7 +212,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1"}) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStridePosScaleNeg(long start, long stop, long length, long offset) { final long scale = -1; @@ -216,7 +229,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1"}) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStridePosScaleNegInIntLoop1(int start, int stop, long length, long offset) { final long scale = -2; @@ -232,7 +246,8 @@ public class TestLongRangeChecks { } @Test - @IR(counts = { IRNode.LOOP, "1"}) + @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" }) + @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP) @IR(failOn = { IRNode.COUNTED_LOOP}) public static void testStridePosScaleNegInIntLoop2(int start, int stop, long length, long offset) { final int scale = -2; diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 01bc13482fd..1b843e27587 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -1663,6 +1663,11 @@ public class IRNode { trapNodes(RANGE_CHECK_TRAP, "range_check"); } + public static final String SHORT_RUNNING_LOOP_TRAP = PREFIX + "SHORT_RUNNING_LOOP_TRAP" + POSTFIX; + static { + trapNodes(SHORT_RUNNING_LOOP_TRAP, "short_running_loop"); + } + public static final String REINTERPRET_S2HF = PREFIX + "REINTERPRET_S2HF" + POSTFIX; static { beforeMatchingNameRegex(REINTERPRET_S2HF, "ReinterpretS2HF"); diff --git a/test/hotspot/jtreg/compiler/longcountedloops/TestShortLoopLostLimit.java b/test/hotspot/jtreg/compiler/longcountedloops/TestShortLoopLostLimit.java new file mode 100644 index 00000000000..117bbab53e5 --- /dev/null +++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortLoopLostLimit.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8342692 + * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops + * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation TestShortLoopLostLimit + * @run main/othervm TestShortLoopLostLimit + */ + +public class TestShortLoopLostLimit { + private static volatile int volatileField; + + public static void main(String[] args) { + for (int i = 0; i < 20_000; i++) { + test1(0, 100); + test2(0, 100); + } + } + + private static void test1(int a, long b) { + for (long i = 0; i < a + b; i += 2) { + volatileField = 42; + } + } + + private static void test2(int a, long b) { + for (long i = a + b; i > 0; i -= 2) { + volatileField = 42; + } + } +} diff --git a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningIntLoopWithLongChecksPredicates.java b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningIntLoopWithLongChecksPredicates.java new file mode 100644 index 00000000000..fcee53371ca --- /dev/null +++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningIntLoopWithLongChecksPredicates.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8342692 + * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops + * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopUnrollLimit=100 + * TestShortRunningIntLoopWithLongChecksPredicates + * @run main/othervm TestShortRunningIntLoopWithLongChecksPredicates + */ + +import java.util.Objects; + +// int RC is first eliminated by predication which causes Assertion +// Predicates to be added. Then the loop is transformed to make it +// possible to optimize long RC. Finally unrolling happen which +// require the Assertion Predicates to have been properly copied when +// the loop was transformed for the long range check. +public class TestShortRunningIntLoopWithLongChecksPredicates { + private static volatile int volatileField; + + public static void main(String[] args) { + int[] array = new int[100]; + for (int i = 0; i < 20_000; i++) { + helper1(100, array, 100); + test1(1, 100); + } + } + + private static void test1(int stop, long range) { + int[] array = new int[3]; + helper1(stop, array, range); + } + + private static void helper1(int stop, int[] array, long range) { + for (int i = 0; i < stop; i++) { + if (i % 2 == 0) { + array[i] += i; + } else { + volatileField = 42; + } + Objects.checkIndex(i, range); + } + } +} diff --git a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoop.java b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoop.java new file mode 100644 index 00000000000..7e55353e0f7 --- /dev/null +++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoop.java @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.longcountedloops; +import compiler.lib.ir_framework.*; +import compiler.whitebox.CompilerWhiteBoxTest; +import jdk.test.whitebox.WhiteBox; + +import java.util.Objects; +/* + * @test + * @bug 8342692 + * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops + * @library /test/lib / + * @build jdk.test.whitebox.WhiteBox + * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.longcountedloops.TestShortRunningLongCountedLoop + */ + +public class TestShortRunningLongCountedLoop { + private static volatile int volatileField; + private final static WhiteBox wb = WhiteBox.getWhiteBox(); + + public static void main(String[] args) { + // IR rules expect a single loop so disable unrolling + // IR rules expect strip mined loop to be enabled + // testIntLoopUnknownBoundsShortUnswitchedLoop and testLongLoopUnknownBoundsShortUnswitchedLoop need -XX:-UseProfiledLoopPredicate + TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:LoopStripMiningIter=1000", "-XX:+UseCountedLoopSafepoints", "-XX:-UseProfiledLoopPredicate"); + } + + // Check IR only has a counted loop when bounds are known and loop run for a short time + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopConstantBoundsShortLoop1() { + int j = 0; + for (long i = 0; i < 100; i++) { + volatileField = 42; + j++; + } + return j; + } + + @Check(test = "testLongLoopConstantBoundsShortLoop1") + public static void checkTestLongLoopConstantBoundsShortLoop1(int res) { + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Same with stride > 1 + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopConstantBoundsShortLoop2() { + int j = 0; + for (long i = 0; i < 2000; i += 20) { + volatileField = 42; + j++; + } + return j; + } + + @Check(test = "testLongLoopConstantBoundsShortLoop2") + public static void checkTestLongLoopConstantBoundsShortLoop2(int res) { + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Same with loop going downward + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopConstantBoundsShortLoop3() { + int j = 0; + for (long i = 99; i >= 0; i--) { + volatileField = 42; + j++; + } + return j; + } + + @Check(test = "testLongLoopConstantBoundsShortLoop3") + public static void checkTestLongLoopConstantBoundsShortLoop3(int res) { + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Same with loop going downward and stride > 1 + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopConstantBoundsShortLoop4() { + int j = 0; + for (long i = 1999; i >= 0; i-=20) { + volatileField = 42; + j++; + } + return j; + } + + @Check(test = "testLongLoopConstantBoundsShortLoop4") + public static void checkTestLongLoopConstantBoundsShortLoop4(int res) { + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Check IR only has a counted loop when bounds are known but not exact and loop run for a short time + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopConstantBoundsShortLoop5(int start, int stop) { + start= Integer.max(start, 0); + stop= Integer.min(stop, 999); + int j = 0; + for (long i = start; i < stop; i++) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopConstantBoundsShortLoop5") + public static void testLongLoopConstantBoundsShortLoop5_runner() { + int res = testLongLoopConstantBoundsShortLoop5(0, 100); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Check that loop nest is created when bounds are known and loop is not short run + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1"}) + @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP, IRNode.OUTER_STRIP_MINED_LOOP }) + public static int testLongLoopConstantBoundsLongLoop1() { + final long stride = Integer.MAX_VALUE / 1000; + int j = 0; + for (long i = 0; i < stride * 1001; i += stride) { + volatileField = 42; + j++; + } + return j; + } + + @Check(test = "testLongLoopConstantBoundsLongLoop1") + public static void checkTestLongLoopConstantBoundsLongLoop1(int res) { + if (res != 1001) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Same with negative stride + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1"}) + @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP, IRNode.OUTER_STRIP_MINED_LOOP }) + public static int testLongLoopConstantBoundsLongLoop2() { + final long stride = Integer.MAX_VALUE / 1000; + int j = 0; + for (long i = stride * 1000; i >= 0; i -= stride) { + volatileField = 42; + j++; + } + return j; + } + + @Check(test = "testLongLoopConstantBoundsLongLoop2") + public static void checkTestLongLoopConstantBoundsLongLoop2(int res) { + if (res != 1001) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Check IR only has a counted loop when bounds are unknown but profile reports a short running loop + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP }) + public static int testLongLoopUnknownBoundsShortLoop(long start, long stop) { + int j = 0; + for (long i = start; i < stop; i++) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsShortLoop") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsShortLoop_runner() { + int res = testLongLoopUnknownBoundsShortLoop(0, 100); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // same with stride > 1 + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP }) + public static int testLongLoopUnknownBoundsShortLoop2(long start, long stop) { + int j = 0; + for (long i = start; i < stop; i+=20) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsShortLoop2") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsShortLoop2_runner() { + int res = testLongLoopUnknownBoundsShortLoop2(0, 2000); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // same with negative stride + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP }) + public static int testLongLoopUnknownBoundsShortLoop3(long start, long stop) { + int j = 0; + for (long i = start; i >= stop; i--) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsShortLoop3") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsShortLoop3_runner() { + int res = testLongLoopUnknownBoundsShortLoop3(99, 0); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // same with negative stride > 1 + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP }) + public static int testLongLoopUnknownBoundsShortLoop4(long start, long stop) { + int j = 0; + for (long i = start; i >= stop; i -= 20) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsShortLoop4") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsShortLoop4_runner() { + int res = testLongLoopUnknownBoundsShortLoop4(1999, 0); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Check that loop nest is created when bounds are not known but profile reports loop is not short run + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1", IRNode.LOOP, "1"}) + @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopUnknownBoundsLongLoop1(long start, long stop, long range) { + int j = 0; + for (long i = start; i < stop; i++) { + volatileField = 42; + Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024 + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsLongLoop1") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsLongLoop1_runner() { + int res = testLongLoopUnknownBoundsLongLoop1(0, 3000, Long.MAX_VALUE); + if (res != 3000) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // same with negative stride + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1", IRNode.LOOP, "1"}) + @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopUnknownBoundsLongLoop2(long start, long stop, long range) { + int j = 0; + for (long i = start; i >= stop; i--) { + volatileField = 42; + Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024 + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsLongLoop2") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsLongLoop2_runner() { + int res = testLongLoopUnknownBoundsLongLoop2(2999, 0, Long.MAX_VALUE); + if (res != 3000) { + throw new RuntimeException("incorrect result: " + res); + } + } + + // Check IR has a loop nest when bounds are unknown, profile reports a short running loop but trap is taken + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopUnknownBoundsShortLoopFailedSpeculation(long start, long stop, long range) { + int j = 0; + for (long i = start; i < stop; i++) { + volatileField = 42; + Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024 + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsShortLoopFailedSpeculation") + @Warmup(1) + public static void testLongLoopUnknownBoundsShortLoopFailedSpeculation_runner(RunInfo info) { + if (info.isWarmUp()) { + for (int i = 0; i < 10_0000; i++) { + int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 100, Long.MAX_VALUE); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + wb.enqueueMethodForCompilation(info.getTest(), CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); + if (!wb.isMethodCompiled(info.getTest())) { + throw new RuntimeException("Should be compiled now"); + } + for (int i = 0; i < 10; i++) { + int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 10_000, Long.MAX_VALUE); + if (res != 10_000) { + throw new RuntimeException("incorrect result: " + res); + } + } + } else { + int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 100, Long.MAX_VALUE); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + } + + // Check IR has a loop nest when bounds are known, is short running loop but trap was taken + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopKnownBoundsShortLoopFailedSpeculation() { + return testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 100); + } + + @ForceInline + private static int testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(long start, long stop) { + int j = 0; + for (long i = start; i < stop; i++) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopKnownBoundsShortLoopFailedSpeculation") + @Warmup(1) + public static void testLongLoopKnownBoundsShortLoopFailedSpeculation_runner(RunInfo info) { + if (info.isWarmUp()) { + for (int i = 0; i < 10_0000; i++) { + int res = testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 100); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + for (int i = 0; i < 10; i++) { + int res = testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 10_000); + if (res != 10_000) { + throw new RuntimeException("incorrect result: " + res); + } + } + for (int i = 0; i < 10_0000; i++) { + int res = testLongLoopKnownBoundsShortLoopFailedSpeculation(); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + } else { + int res = testLongLoopKnownBoundsShortLoopFailedSpeculation(); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } + } + + // Check range check can be eliminated by predication + @Test + @IR(counts = { IRNode.PREDICATE_TRAP, "1" }) + @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static void testLongLoopConstantBoundsPredication(long range) { + for (long i = 0; i < 100; i++) { + Objects.checkIndex(i, range); + } + } + + @Run(test = "testLongLoopConstantBoundsPredication") + public static void testLongLoopConstantBoundsPredication_runner() { + testLongLoopConstantBoundsPredication(100); + } + + @Test + @IR(counts = { IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1" }) + @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP }) + public static void testLongLoopUnknownBoundsShortLoopPredication(long start, long stop, long range) { + for (long i = start; i < stop; i++) { + Objects.checkIndex(i, range); + } + } + + @Run(test = "testLongLoopUnknownBoundsShortLoopPredication") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsShortLoopPredication_runner() { + testLongLoopUnknownBoundsShortLoopPredication(0, 100, 100); + } + + // If scale too large, transformation can't happen + static final long veryLargeScale = Integer.MAX_VALUE / 99; + @Test + @IR(counts = { IRNode.LOOP, "1", IRNode.PREDICATE_TRAP, "2"}) + @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static void testLongLoopConstantBoundsLargeScale(long range) { + for (long i = 0; i < 100; i++) { + Objects.checkIndex(veryLargeScale * i, range); + } + } + + @Run(test = "testLongLoopConstantBoundsLargeScale") + public static void testLongLoopConstantBoundsLargeScale_runner() { + testLongLoopConstantBoundsLargeScale(veryLargeScale * 100); + } + + @Test + @IR(counts = { IRNode.LOOP, "1", IRNode.PREDICATE_TRAP, "2"}) + @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static void testLongLoopUnknownBoundsShortLoopLargeScale(long start, long stop, long range) { + for (long i = start; i < stop; i++) { + Objects.checkIndex(veryLargeScale * i, range); + } + } + + @Run(test = "testLongLoopUnknownBoundsShortLoopLargeScale") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsShortLoopLargeScale_runner() { + testLongLoopUnknownBoundsShortLoopLargeScale(0, 100, veryLargeScale * 100); + } + + // Check IR only has a counted loop when bounds are known and loop run for a short time (int loop case) + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.PREDICATE_TRAP, "1" }) + @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static void testIntLoopConstantBoundsShortLoop1(long range) { + for (int i = 0; i < 100; i++) { + Objects.checkIndex(i, range); + volatileField = 42; + } + } + + @Run(test = "testIntLoopConstantBoundsShortLoop1") + public static void testIntLoopConstantBoundsShortLoop1_runner() { + testIntLoopConstantBoundsShortLoop1(100); + } + + // Check IR only has a counted loop when bounds are unknown but profile reports a short running loop (int loop case) + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP }) + public static void testIntLoopUnknownBoundsShortLoop(int start, int stop, long range) { + for (int i = start; i < stop; i++) { + Objects.checkIndex(i, range); + volatileField = 42; + } + } + + @Run(test = "testIntLoopUnknownBoundsShortLoop") + @Warmup(10_000) + public static void testIntLoopUnknownBoundsShortLoop_runner() { + testIntLoopUnknownBoundsShortLoop(0, 100, 100); + } + + // Same with unswitched loop + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "2", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "2" }) + @IR(failOn = { IRNode.LOOP }) + public static void testIntLoopUnknownBoundsShortUnswitchedLoop(int start, int stop, long range, boolean flag) { + for (int i = start; i < stop; i++) { + if (flag) { + Objects.checkIndex(i, range); + volatileField = 42; + } else { + Objects.checkIndex(i, range); + volatileField = 42; + } + } + } + + @Run(test = "testIntLoopUnknownBoundsShortUnswitchedLoop") + @Warmup(10_000) + public static void testIntLoopUnknownBoundsShortUnswitchedLoop_runner() { + testIntLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, true); + testIntLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, false); + } + + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "2", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "2" }) + @IR(failOn = { IRNode.LOOP }) + public static void testLongLoopUnknownBoundsShortUnswitchedLoop(long start, long stop, long range, boolean flag) { + for (long i = start; i < stop; i++) { + if (flag) { + Objects.checkIndex(i, range); + volatileField = 42; + } else { + Objects.checkIndex(i, range); + volatileField = 42; + } + } + } + + @Run(test = "testLongLoopUnknownBoundsShortUnswitchedLoop") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsShortUnswitchedLoop_runner() { + testLongLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, true); + testLongLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, false); + } + + @Test + @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(failOn = { IRNode.LOOP }) + public static int testLongLoopUnknownBoundsAddLimitShortLoop(int stop1, long stop2) { + int j = 0; + for (long i = 0; i < stop1 + stop2; i++) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsAddLimitShortLoop") + @Warmup(10_000) + public static void testLongLoopUnknownBoundsAddLimitShortLoop_runner() { + int res = testLongLoopUnknownBoundsAddLimitShortLoop(100, 0); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } +} diff --git a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopPredicatesClone.java b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopPredicatesClone.java new file mode 100644 index 00000000000..bc412eb8f89 --- /dev/null +++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopPredicatesClone.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8342692 + * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops + * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0 + * TestShortRunningLongCountedLoopPredicatesClone + * @run main/othervm TestShortRunningLongCountedLoopPredicatesClone + */ + +import java.util.Objects; + +// Predicate added after int counted loop is created depends on +// narrowed limit which depends on predicate added before the int +// counted loop was created: predicates need to be properly ordered. +public class TestShortRunningLongCountedLoopPredicatesClone { + public static void main(String[] args) { + A a = new A(100); + for (int i = 0; i < 20_000; i++) { + test1(a, 0); + } + } + + private static void test1(A a, long start) { + long i = start; + do { + synchronized (new Object()) {} + Objects.checkIndex(i, a.range); + i++; + } while (i < a.range); + } + + static class A { + A(long range) { + this.range = range; + } + + long range; + } +} diff --git a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopScaleOverflow.java b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopScaleOverflow.java new file mode 100644 index 00000000000..218d8a293d1 --- /dev/null +++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopScaleOverflow.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8342692 + * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops + * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0 + * -XX:-UseLoopPredicate -XX:-RangeCheckElimination TestShortRunningLongCountedLoopScaleOverflow + * @run main/othervm TestShortRunningLongCountedLoopScaleOverflow + */ + +import java.util.Objects; + +// When scale is large, even if loop is short running having a single +// counted loop is not possible. +public class TestShortRunningLongCountedLoopScaleOverflow { + public static void main(String[] args) { + for (int i = 0; i < 20_000; i++) { + test1(Integer.MAX_VALUE, 0); + test2(Integer.MAX_VALUE, 0, 100); + } + boolean exception = false; + try { + test1(Integer.MAX_VALUE, 10); + } catch (IndexOutOfBoundsException indexOutOfBoundsException) { + exception = true; + } + if (!exception) { + throw new RuntimeException("Expected exception not thrown"); + } + exception = false; + try { + test2(Integer.MAX_VALUE, 10, 100); + } catch (IndexOutOfBoundsException indexOutOfBoundsException) { + exception = true; + } + if (!exception) { + throw new RuntimeException("Expected exception not thrown"); + } + } + + static final long veryLargeScale = 1 << 29; + + private static void test1(long range, long j) { + Objects.checkIndex(0, range); + for (long i = 0; i < 100; i++) { + if (i == j) { + Objects.checkIndex(veryLargeScale * i, range); + } + } + } + + private static void test2(long range, long j, long stop) { + Objects.checkIndex(0, range); + for (long i = 0; i < stop; i++) { + if (i == j) { + Objects.checkIndex(veryLargeScale * i, range); + } + } + } +} diff --git a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopVectorization.java b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopVectorization.java new file mode 100644 index 00000000000..4238fe1073c --- /dev/null +++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopVectorization.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.longcountedloops; +import jdk.internal.misc.Unsafe; + +import java.util.Objects; +/* + * @test + * @bug 8342692 + * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops + * @modules java.base/jdk.internal.misc + * @run main/othervm -XX:-BackgroundCompilation compiler.longcountedloops.TestShortRunningLongCountedLoopVectorization + */ + +public class TestShortRunningLongCountedLoopVectorization { + private static final Unsafe UNSAFE = Unsafe.getUnsafe(); + private static volatile int volatileField; + + public static void main(String[] args) { + for (int i = 0; i < 20_000; i++) { + test1(); + } + } + + static int size = 1024; + static long longSize = size; + static int[] intArray = new int[size]; + + public static void test1() { + boolean doIt = true; + int localSize = Integer.max(Integer.min(size, 10000), 0); + int i = 0; + while (true) { + synchronized (new Object()) {}; + if (i >= localSize) { + break; + } + if (doIt) { + volatileField = 42; + doIt = false; + } + long j = Objects.checkIndex(i, longSize); + UNSAFE.putInt(intArray, Unsafe.ARRAY_INT_BASE_OFFSET + j * Unsafe.ARRAY_INT_INDEX_SCALE, 42); + i++; + } + } +}; diff --git a/test/hotspot/jtreg/compiler/longcountedloops/TestStressShortRunningLongCountedLoop.java b/test/hotspot/jtreg/compiler/longcountedloops/TestStressShortRunningLongCountedLoop.java new file mode 100644 index 00000000000..2e04e13d233 --- /dev/null +++ b/test/hotspot/jtreg/compiler/longcountedloops/TestStressShortRunningLongCountedLoop.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.longcountedloops; + +import compiler.lib.ir_framework.*; + +/* + * @test + * @bug 8342692 + * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops + * @library /test/lib / + * @run driver compiler.longcountedloops.TestStressShortRunningLongCountedLoop + */ + +public class TestStressShortRunningLongCountedLoop { + private static volatile int volatileField; + + public static void main(String[] args) { + TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+StressShortRunningLongLoop"); + TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:-StressShortRunningLongLoop"); + } + + @Test + @IR(applyIf = { "StressShortRunningLongLoop", "true" }, counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(applyIf = { "StressShortRunningLongLoop", "true" }, failOn = { IRNode.LOOP }) + @IR(applyIf = { "StressShortRunningLongLoop", "false" }, counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" }) + @IR(applyIf = { "StressShortRunningLongLoop", "false" }, failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP }) + public static int testLongLoopUnknownBoundsShortLoop(long start, long stop) { + int j = 0; + for (long i = start; i < stop; i++) { + volatileField = 42; + j++; + } + return j; + } + + @Run(test = "testLongLoopUnknownBoundsShortLoop") + @Warmup(0) + public static void testLongLoopUnknownBoundsShortLoop_runner() { + int res = testLongLoopUnknownBoundsShortLoop(0, 100); + if (res != 100) { + throw new RuntimeException("incorrect result: " + res); + } + } +} diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestMemorySegment.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestMemorySegment.java index b4003cc5e73..7ecc14e8980 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestMemorySegment.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestMemorySegment.java @@ -47,6 +47,23 @@ import java.lang.foreign.*; * @run driver compiler.loopopts.superword.TestMemorySegment ByteArray AlignVector */ +/* + * @test id=byte-array-NoShortRunningLongLoop + * @bug 8329273 8342692 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestMemorySegment ByteArray NoShortRunningLongLoop + */ + +/* + * @test id=byte-array-AlignVector-NoShortRunningLongLoop + * @bug 8329273 8348263 8342692 + * @summary Test vectorization of loops over MemorySegment + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestMemorySegment ByteArray AlignVector NoShortRunningLongLoop + */ + + /* * @test id=char-array * @bug 8329273 @@ -172,6 +189,13 @@ public class TestMemorySegment { public static void main(String[] args) { TestFramework framework = new TestFramework(TestMemorySegmentImpl.class); framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0]); + for (int i = 1; i < args.length; i++) { + String tag = args[i]; + switch (tag) { + case "AlignVector" -> framework.addFlags("-XX:+AlignVector"); + case "NoShortRunningLongLoop" -> framework.addFlags("-XX:-ShortRunningLongLoop"); + } + } if (args.length > 1 && args[1].equals("AlignVector")) { framework.addFlags("-XX:+AlignVector"); } @@ -777,6 +801,13 @@ class TestMemorySegmentImpl { @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", IRNode.ADD_VI, "= 0", IRNode.STORE_VECTOR, "= 0"}, + applyIfAnd = { "ShortRunningLongLoop", "false", "AlignVector", "false" }, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.ADD_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = { "ShortRunningLongLoop", "true", "AlignVector", "false" }, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"}) // FAILS: invariants are sorted differently, because of differently inserted Cast. @@ -795,6 +826,13 @@ class TestMemorySegmentImpl { @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", IRNode.ADD_VI, "= 0", IRNode.STORE_VECTOR, "= 0"}, + applyIfAnd = { "ShortRunningLongLoop", "false", "AlignVector", "false" }, + applyIfPlatform = {"64-bit", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.ADD_VI, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = { "ShortRunningLongLoop", "true", "AlignVector", "false" }, applyIfPlatform = {"64-bit", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"}) // FAILS: invariants are sorted differently, because of differently inserted Cast. diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/HeapMismatchManualLoopTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/HeapMismatchManualLoopTest.java new file mode 100644 index 00000000000..d16f4c874b6 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/HeapMismatchManualLoopTest.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package org.openjdk.bench.java.lang.foreign; + +import org.openjdk.jmh.annotations.*; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; +import jdk.internal.misc.Unsafe; +import java.util.Objects; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@State(org.openjdk.jmh.annotations.Scope.Thread) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@Fork(value = 3, jvmArgs = { "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED" }) +public class HeapMismatchManualLoopTest { + + @Param({"4", "8", "16", "32", "64", "128"}) + public int ELEM_SIZE; + + static final Unsafe unsafe = Utils.unsafe; + + byte[] srcArray; + byte[] dstArray; + MemorySegment srcSegment; + MemorySegment dstSegment; + ByteBuffer srcBuffer; + ByteBuffer dstBuffer; + long srcByteSize; + long dstByteSize; + + @Setup + public void setup() { + srcArray = new byte[ELEM_SIZE]; + dstArray = new byte[ELEM_SIZE]; + srcSegment = MemorySegment.ofArray(srcArray); + dstSegment = MemorySegment.ofArray(dstArray); + srcBuffer = ByteBuffer.wrap(srcArray); + dstBuffer = ByteBuffer.wrap(dstArray); + srcByteSize = ELEM_SIZE; + dstByteSize = ELEM_SIZE; + } + + @Benchmark + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public int array_mismatch() { + for (int i = 0; i < srcArray.length ; i++) { + if (srcArray[i] != dstArray[i]) { + return i; + } + } + return -1; + } + + @Benchmark + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public long segment_mismatch() { + for (long i = 0; i < srcSegment.byteSize() ; i++) { + if (srcSegment.get(ValueLayout.JAVA_BYTE, i) != dstSegment.get(ValueLayout.JAVA_BYTE, i)) { + return i; + } + } + return -1; + } + + @Benchmark + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public int buffer_mismatch() { + for (int i = 0; i < srcBuffer.capacity() ; i++) { + if (srcBuffer.get(i) != dstBuffer.get(i)) { + return i; + } + } + return -1; + } + + @Benchmark + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public long unsafe_mismatch() { + for (long i = 0; i < srcByteSize ; i++) { + Objects.checkIndex(i, srcByteSize); + Objects.checkIndex(i, dstByteSize); + long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET + i * Unsafe.ARRAY_BYTE_INDEX_SCALE; + if (unsafe.getByte(srcArray, offset) != unsafe.getByte(dstArray, offset)) { + return i; + } + } + return -1; + } + + @Benchmark + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public long unsafe_mismatch2() { + for (long i = 0; i < srcByteSize ; i++) { + long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET + i * Unsafe.ARRAY_BYTE_INDEX_SCALE; + if (unsafe.getByte(srcArray, offset) != unsafe.getByte(dstArray, offset)) { + return i; + } + } + return -1; + } +}