8342692: C2: long counted loop/long range checks: don't create loop-nest for short running loops

Co-authored-by: Maurizio Cimadamore <mcimadamore@openjdk.org> Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org> Reviewed-by: chagedorn, thartmann
2026-01-28 12:09:14 +00:00 · 2025-07-22 08:35:36 +00:00 · 2025-07-22 08:35:36 +00:00 · f155661151
commit f155661151
parent c68697e178
27 changed files with 1665 additions and 79 deletions
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@ -763,6 +763,7 @@
  declare_constant(Deoptimization::Reason_constraint)                     \
  declare_constant(Deoptimization::Reason_div0_check)                     \
  declare_constant(Deoptimization::Reason_loop_limit_check)               \
+  declare_constant(Deoptimization::Reason_short_running_long_loop)        \
  declare_constant(Deoptimization::Reason_auto_vectorization_check)       \
  declare_constant(Deoptimization::Reason_type_checked_inlining)          \
  declare_constant(Deoptimization::Reason_optimized_type_check)           \
--- a/src/hotspot/share/opto/c2_globals.hpp
+++ b/src/hotspot/share/opto/c2_globals.hpp
@ -872,6 +872,15 @@
          "could corrupt the graph in rare cases and should be used with "  \
          "care.")                                                          \
                                                                            \
+  product(bool, ShortRunningLongLoop, true, DIAGNOSTIC,                     \
+          "long counted loop/long range checks: don't create loop nest if " \
+          "loop runs for small enough number of iterations. Long loop is "  \
+          "converted to a single int loop.")                                \
+                                                                            \
+  develop(bool, StressShortRunningLongLoop, false,                          \
+          "Speculate all long counted loops are short running when bounds " \
+          "are unknown even if profile data doesn't say so.")               \
+                                                                            \
  develop(bool, StressLoopPeeling, false,                                   \
          "Randomize loop peeling decision")                                \

--- a/src/hotspot/share/opto/castnode.cpp
+++ b/src/hotspot/share/opto/castnode.cpp
@ -26,7 +26,9 @@
 #include "opto/addnode.hpp"
 #include "opto/callnode.hpp"
 #include "opto/castnode.hpp"
+#include "opto/cfgnode.hpp"
 #include "opto/connode.hpp"
+#include "opto/loopnode.hpp"
 #include "opto/matcher.hpp"
 #include "opto/phaseX.hpp"
 #include "opto/subnode.hpp"
@ -323,6 +325,67 @@ const Type* CastLLNode::Value(PhaseGVN* phase) const {
  return widen_type(phase, res, T_LONG);
 }

+bool CastLLNode::is_inner_loop_backedge(ProjNode* proj) {
+  if (proj != nullptr) {
+    Node* ctrl_use = proj->unique_ctrl_out_or_null();
+    if (ctrl_use != nullptr && ctrl_use->Opcode() == Op_Loop &&
+        ctrl_use->in(2) == proj &&
+        ctrl_use->as_Loop()->is_loop_nest_inner_loop()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CastLLNode::cmp_used_at_inner_loop_exit_test(CmpNode* cmp) {
+  for (DUIterator_Fast imax, i = cmp->fast_outs(imax); i < imax; i++) {
+    Node* bol = cmp->fast_out(i);
+    if (bol->Opcode() == Op_Bool) {
+      for (DUIterator_Fast jmax, j = bol->fast_outs(jmax); j < jmax; j++) {
+        Node* iff = bol->fast_out(j);
+        if (iff->Opcode() == Op_If) {
+          ProjNode* true_proj = iff->as_If()->proj_out_or_null(true);
+          ProjNode* false_proj = iff->as_If()->proj_out_or_null(false);
+          if (is_inner_loop_backedge(true_proj) || is_inner_loop_backedge(false_proj)) {
+            return true;
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Find if this is a cast node added by PhaseIdealLoop::create_loop_nest() to narrow the number of iterations of the
+// inner loop
+bool CastLLNode::used_at_inner_loop_exit_test() const {
+  for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+    Node* convl2i = fast_out(i);
+    if (convl2i->Opcode() == Op_ConvL2I) {
+      for (DUIterator_Fast jmax, j = convl2i->fast_outs(jmax); j < jmax; j++) {
+        Node* cmp_or_sub = convl2i->fast_out(j);
+        if (cmp_or_sub->Opcode() == Op_CmpI) {
+          if (cmp_used_at_inner_loop_exit_test(cmp_or_sub->as_Cmp())) {
+            // (Loop .. .. (IfProj (If (Bool (CmpI (ConvL2I (CastLL )))))))
+            return true;
+          }
+        } else if (cmp_or_sub->Opcode() == Op_SubI && cmp_or_sub->in(1)->find_int_con(-1) == 0) {
+          for (DUIterator_Fast kmax, k = cmp_or_sub->fast_outs(kmax); k < kmax; k++) {
+            Node* cmp = cmp_or_sub->fast_out(k);
+            if (cmp->Opcode() == Op_CmpI) {
+              if (cmp_used_at_inner_loop_exit_test(cmp->as_Cmp())) {
+                // (Loop .. .. (IfProj (If (Bool (CmpI (SubI 0 (ConvL2I (CastLL ))))))))
+                return true;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
 Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
  Node* progress = ConstraintCastNode::Ideal(phase, can_reshape);
  if (progress != nullptr) {
@ -352,7 +415,12 @@ Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
      }
    }
  }
-  return optimize_integer_cast(phase, T_LONG);
+  // If it's a cast created by PhaseIdealLoop::short_running_loop(), don't transform it until the counted loop is created
+  // in next loop opts pass
+  if (!can_reshape || !used_at_inner_loop_exit_test()) {
+    return optimize_integer_cast(phase, T_LONG);
+  }
+  return nullptr;
 }

 //------------------------------Value------------------------------------------
--- a/src/hotspot/share/opto/castnode.hpp
+++ b/src/hotspot/share/opto/castnode.hpp
@ -138,6 +138,12 @@ public:
  }

  virtual const Type* Value(PhaseGVN* phase) const;
+
+  static bool is_inner_loop_backedge(ProjNode* proj);
+
+  static bool cmp_used_at_inner_loop_exit_test(CmpNode* cmp);
+  bool used_at_inner_loop_exit_test() const;
+
  virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
  virtual int Opcode() const;
  virtual uint ideal_reg() const { return Op_RegL; }
--- a/src/hotspot/share/opto/graphKit.cpp
+++ b/src/hotspot/share/opto/graphKit.cpp
@ -4050,6 +4050,11 @@ void GraphKit::add_parse_predicate(Deoptimization::DeoptReason reason, const int
 // Add Parse Predicates which serve as placeholders to create new Runtime Predicates above them. All
 // Runtime Predicates inside a Runtime Predicate block share the same uncommon trap as the Parse Predicate.
 void GraphKit::add_parse_predicates(int nargs) {
+  if (ShortRunningLongLoop) {
+    // Will narrow the limit down with a cast node. Predicates added later may depend on the cast so should be last when
+    // walking up from the loop.
+    add_parse_predicate(Deoptimization::Reason_short_running_long_loop, nargs);
+  }
  if (UseLoopPredicate) {
    add_parse_predicate(Deoptimization::Reason_predicate, nargs);
    if (UseProfiledLoopPredicate) {
--- a/src/hotspot/share/opto/ifnode.cpp
+++ b/src/hotspot/share/opto/ifnode.cpp
@ -2178,6 +2178,7 @@ ParsePredicateNode::ParsePredicateNode(Node* control, Deoptimization::DeoptReaso
    case Deoptimization::Reason_profile_predicate:
    case Deoptimization::Reason_auto_vectorization_check:
    case Deoptimization::Reason_loop_limit_check:
+    case Deoptimization::Reason_short_running_long_loop:
      break;
    default:
      assert(false, "unsupported deoptimization reason for Parse Predicate");
@ -2226,6 +2227,9 @@ void ParsePredicateNode::dump_spec(outputStream* st) const {
    case Deoptimization::DeoptReason::Reason_loop_limit_check:
      st->print("Loop_Limit_Check ");
      break;
+    case Deoptimization::DeoptReason::Reason_short_running_long_loop:
+      st->print("Short_Running_Long_Loop ");
+      break;
    default:
      fatal("unknown kind");
  }
--- a/src/hotspot/share/opto/loopPredicate.cpp
+++ b/src/hotspot/share/opto/loopPredicate.cpp
@ -1054,7 +1054,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
 #ifdef ASSERT
    const bool exact_trip_count = cl->has_exact_trip_count();
    const uint trip_count = cl->trip_count();
-    loop->compute_trip_count(this);
+    loop->compute_trip_count(this, T_INT);
    assert(exact_trip_count == cl->has_exact_trip_count() && trip_count == cl->trip_count(),
           "should have computed trip count on Loop Predication entry");
 #endif
@ -1171,7 +1171,7 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree* loop) {
      // Do nothing for iteration-splitted loops
      return false;
    }
-    loop->compute_trip_count(this);
+    loop->compute_trip_count(this, T_INT);
    if (cl->trip_count() == 1) {
      // Not worth to hoist checks out of a loop that is only run for one iteration since the checks are only going to
      // be executed once anyway.
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -96,11 +96,11 @@ void IdealLoopTree::record_for_igvn() {
 //------------------------------compute_exact_trip_count-----------------------
 // Compute loop trip count if possible. Do not recalculate trip count for
 // split loops (pre-main-post) which have their limits and inits behind Opaque node.
-void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
-  if (!_head->as_Loop()->is_valid_counted_loop(T_INT)) {
+void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase, BasicType loop_bt) {
+  if (!_head->as_Loop()->is_valid_counted_loop(loop_bt)) {
    return;
  }
-  CountedLoopNode* cl = _head->as_CountedLoop();
+  BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
  // Trip count may become nonexact for iteration split loops since
  // RCE modifies limits. Note, _trip_count value is not reset since
  // it is used to limit unrolling of main loop.
@ -119,24 +119,62 @@ void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
  Node* init_n = cl->init_trip();
  Node* limit_n = cl->limit();
  if (init_n != nullptr && limit_n != nullptr) {
-    // Use longs to avoid integer overflow.
-    int stride_con = cl->stride_con();
-    const TypeInt* init_type = phase->_igvn.type(init_n)->is_int();
-    const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
-    jlong init_con = (stride_con > 0) ? init_type->_lo : init_type->_hi;
-    jlong limit_con = (stride_con > 0) ? limit_type->_hi : limit_type->_lo;
-    int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
-    jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
+    jlong stride_con = cl->stride_con();
+    const TypeInteger* init_type = phase->_igvn.type(init_n)->is_integer(loop_bt);
+    const TypeInteger* limit_type = phase->_igvn.type(limit_n)->is_integer(loop_bt);
+
+    // compute trip count
+    // It used to be computed as:
+    // max(1, limit_con - init_con + stride_m) / stride_con
+    // with stride_m = stride_con - (stride_con > 0 ? 1 : -1)
+    // for int counted loops only and by promoting all values to long to avoid overflow
+    // This implements the computation for int and long counted loops in a way that promotion to the next larger integer
+    // type is not needed to protect against overflow.
+    //
+    // Use unsigned longs to avoid overflow: number of iteration is a positive number but can be really large for
+    // instance if init_con = min_jint, limit_con = max_jint
+    jlong init_con = (stride_con > 0) ? init_type->lo_as_long() : init_type->hi_as_long();
+    julong uinit_con = init_con;
+    jlong limit_con = (stride_con > 0) ? limit_type->hi_as_long() : limit_type->lo_as_long();
+    julong ulimit_con = limit_con;
    // The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
    // init <= limit (for stride_con < 0).
-    trip_count = MAX2(trip_count, (jlong)1);
-    if (trip_count < (jlong)max_juint) {
+    julong udiff = 1;
+    if (stride_con > 0 && limit_con > init_con) {
+      udiff = ulimit_con - uinit_con;
+    } else if (stride_con < 0 && limit_con < init_con) {
+      udiff = uinit_con - ulimit_con;
+    }
+    // The loop runs for one more iteration if the limit is (stride > 0 in this example):
+    // init + k * stride + small_value, 0 < small_value < stride
+    julong utrip_count = udiff / ABS(stride_con);
+    if (utrip_count * ABS(stride_con) != udiff) {
+      // Guaranteed to not overflow because it can only happen for ABS(stride) > 1 in which case, utrip_count can't be
+      // max_juint/max_julong
+      utrip_count++;
+    }
+
+#ifdef ASSERT
+    if (loop_bt == T_INT) {
+      // Use longs to avoid integer overflow.
+      jlong init_con = (stride_con > 0) ? init_type->is_int()->_lo : init_type->is_int()->_hi;
+      jlong limit_con = (stride_con > 0) ? limit_type->is_int()->_hi : limit_type->is_int()->_lo;
+      int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
+      jlong trip_count = (limit_con - init_con + stride_m) / stride_con;
+      // The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
+      // init <= limit (for stride_con < 0).
+      trip_count = MAX2(trip_count, (jlong)1);
+      assert(checked_cast<juint>(trip_count) == checked_cast<juint>(utrip_count), "incorrect trip count computation");
+    }
+#endif
+
+    if (utrip_count < max_unsigned_integer(loop_bt)) {
      if (init_n->is_Con() && limit_n->is_Con()) {
        // Set exact trip count.
-        cl->set_exact_trip_count((uint)trip_count);
-      } else if (cl->unrolled_count() == 1) {
+        cl->set_exact_trip_count(utrip_count);
+      } else if (loop_bt == T_LONG || cl->as_CountedLoop()->unrolled_count() == 1) {
        // Set maximum trip count before unrolling.
-        cl->set_trip_count((uint)trip_count);
+        cl->set_trip_count(utrip_count);
      }
    }
  }
@ -1851,7 +1889,7 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
 #ifndef PRODUCT
  if (TraceLoopOpts) {
    if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
-      tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
+      tty->print("Unroll %d(" JULONG_FORMAT_W(2) ") ", loop_head->unrolled_count()*2, loop_head->trip_count());
    } else {
      tty->print("Unroll %d     ", loop_head->unrolled_count()*2);
    }
@ -2104,7 +2142,7 @@ void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new
  assert(cl->trip_count() > 0, "");
 #ifndef PRODUCT
  if (TraceLoopOpts) {
-    tty->print("MaxUnroll  %d ", cl->trip_count());
+    tty->print("MaxUnroll  " JULONG_FORMAT " ", cl->trip_count());
    loop->dump_head();
  }
 #endif
@ -3359,7 +3397,7 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
    return false;
  }
  // Compute loop trip count if possible.
-  compute_trip_count(phase);
+  compute_trip_count(phase, T_INT);

  // Convert one-iteration loop into normal code.
  if (do_one_iteration_loop(phase)) {
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -601,7 +601,6 @@ void PhaseIdealLoop::add_parse_predicate(Deoptimization::DeoptReason reason, Nod
    int trap_request = Deoptimization::make_trap_request(reason, Deoptimization::Action_maybe_recompile);
    address call_addr = OptoRuntime::uncommon_trap_blob()->entry_point();
    const TypePtr* no_memory_effects = nullptr;
-    JVMState* jvms = sfpt->jvms();
    CallNode* unc = new CallStaticJavaNode(OptoRuntime::uncommon_trap_Type(), call_addr, "uncommon_trap",
                                           no_memory_effects);

@ -856,8 +855,9 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
    return false;
  }

+  assert(iters_limit > 0, "can't be negative");
+
  PhiNode* phi = head->phi()->as_Phi();
-  Node* incr = head->incr();

  Node* back_control = head->in(LoopNode::LoopBackControl);

@ -888,7 +888,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {

  // Take what we know about the number of iterations of the long counted loop into account when computing the limit of
  // the inner loop.
-  const Node* init = head->init_trip();
+  Node* init = head->init_trip();
  const TypeInteger* lo = _igvn.type(init)->is_integer(bt);
  const TypeInteger* hi = _igvn.type(limit)->is_integer(bt);
  if (stride_con < 0) {
@ -907,7 +907,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
    // going to execute as many range checks once transformed with range checks eliminated (1 peeled iteration with
    // range checks + 2 predicates per range checks) as it would have not transformed. It also has to pay for the extra
    // logic on loop entry and for the outer loop.
-    loop->compute_trip_count(this);
+    loop->compute_trip_count(this, bt);
    if (head->is_CountedLoop() && head->as_CountedLoop()->has_exact_trip_count()) {
      if (head->as_CountedLoop()->trip_count() <= 3) {
        return false;
@ -920,6 +920,11 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
    }
  }

+  if (try_make_short_running_loop(loop, stride_con, range_checks, iters_limit)) {
+    C->set_major_progress();
+    return true;
+  }
+
  julong orig_iters = (julong)hi->hi_as_long() - lo->lo_as_long();
  iters_limit = checked_cast<int>(MIN2((julong)iters_limit, orig_iters));

@ -1118,6 +1123,9 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
  if (safepoint != nullptr) {
    SafePointNode* cloned_sfpt = old_new[safepoint->_idx]->as_SafePoint();

+    if (ShortRunningLongLoop) {
+      add_parse_predicate(Deoptimization::Reason_short_running_long_loop, inner_head, outer_ilt, cloned_sfpt);
+    }
    if (UseLoopPredicate) {
      add_parse_predicate(Deoptimization::Reason_predicate, inner_head, outer_ilt, cloned_sfpt);
      if (UseProfiledLoopPredicate) {
@ -1147,6 +1155,215 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
  return true;
 }

+// Make a copy of Parse/Template Assertion predicates below existing predicates at the loop passed as argument
+class CloneShortLoopPredicateVisitor : public PredicateVisitor {
+  ClonePredicateToTargetLoop _clone_predicate_to_loop;
+  PhaseIdealLoop* const _phase;
+
+public:
+  CloneShortLoopPredicateVisitor(LoopNode* target_loop_head,
+                                 const NodeInSingleLoopBody &node_in_loop_body,
+                                 PhaseIdealLoop* phase)
+    : _clone_predicate_to_loop(target_loop_head, node_in_loop_body, phase),
+      _phase(phase) {
+  }
+  NONCOPYABLE(CloneShortLoopPredicateVisitor);
+
+  using PredicateVisitor::visit;
+
+  void visit(const ParsePredicate& parse_predicate) override {
+    _clone_predicate_to_loop.clone_parse_predicate(parse_predicate, true);
+    parse_predicate.kill(_phase->igvn());
+  }
+
+  void visit(const TemplateAssertionPredicate& template_assertion_predicate) override {
+    _clone_predicate_to_loop.clone_template_assertion_predicate(template_assertion_predicate);
+    template_assertion_predicate.kill(_phase->igvn());
+  }
+};
+
+// If the loop is either statically known to run for a small enough number of iterations or if profile data indicates
+// that, we don't want an outer loop because the overhead of having an outer loop whose backedge is never taken, has a
+// measurable cost. Furthermore, creating the loop nest usually causes one iteration of the loop to be peeled so
+// predicates can be set up. If the loop is short running, then it's an extra iteration that's run with range checks
+// (compared to an int counted loop with int range checks).
+//
+// In the short running case, turn the loop into a regular loop again and transform the long range checks:
+// - LongCountedLoop: Create LoopNode but keep the loop limit type with a CastLL node to avoid that we later try to
+//                    create a Loop Limit Check when turning the LoopNode into a CountedLoopNode.
+// - CountedLoop: Can be reused.
+bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint stride_con, const Node_List &range_checks,
+                                                 const uint iters_limit) {
+  if (!ShortRunningLongLoop) {
+    return false;
+  }
+  BaseCountedLoopNode* head = loop->_head->as_BaseCountedLoop();
+  BasicType bt = head->bt();
+  Node* entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl);
+
+  loop->compute_trip_count(this, bt);
+  // Loop must run for no more than iter_limits as it guarantees no overflow of scale * iv in long range checks (see
+  // comment above PhaseIdealLoop::transform_long_range_checks()).
+  // iters_limit / ABS(stride_con) is the largest trip count for which we know it's correct to not create a loop nest:
+  // it's always beneficial to have a single loop rather than a loop nest, so we try to apply this transformation as
+  // often as possible.
+  bool known_short_running_loop = head->trip_count() <= iters_limit / ABS(stride_con);
+  bool profile_short_running_loop = false;
+  if (!known_short_running_loop) {
+    loop->compute_profile_trip_cnt(this);
+    if (StressShortRunningLongLoop) {
+      profile_short_running_loop = true;
+    } else {
+      profile_short_running_loop = !head->is_profile_trip_failed() && head->profile_trip_cnt() <= iters_limit / ABS(stride_con);
+    }
+  }
+
+  if (!known_short_running_loop && !profile_short_running_loop) {
+    return false;
+  }
+
+  Node* limit = head->limit();
+  Node* init = head->init_trip();
+
+  Node* new_limit;
+  if (stride_con > 0) {
+    new_limit = SubNode::make(limit, init, bt);
+  } else {
+    new_limit = SubNode::make(init, limit, bt);
+  }
+  register_new_node(new_limit, entry_control);
+
+  PhiNode* phi = head->phi()->as_Phi();
+  if (profile_short_running_loop) {
+    // Add a Short Running Long Loop Predicate. It's the first predicate in the predicate chain before entering a loop
+    // because a cast that's control dependent on the Short Running Long Loop Predicate is added to narrow the limit and
+    // future predicates may be dependent on the new limit (so have to be between the loop and Short Running Long Loop
+    // Predicate). The current limit could, itself, be dependent on an existing predicate. Clone parse and template
+    // assertion predicates below existing predicates to get proper ordering of predicates when walking from the loop
+    // up: future predicates, Short Running Long Loop Predicate, existing predicates.
+    //
+    //        Existing Hoisted
+    //        Check Predicates
+    //               |
+    //     New Short Running Long
+    //         Loop Predicate
+    //               |
+    //   Cloned Parse Predicates and
+    //  Template Assertion Predicates
+    //  (future predicates added here)
+    //               |
+    //             Loop
+    const Predicates predicates_before_cloning(entry_control);
+    const PredicateBlock* short_running_long_loop_predicate_block = predicates_before_cloning.short_running_long_loop_predicate_block();
+    if (!short_running_long_loop_predicate_block->has_parse_predicate()) { // already trapped
+      return false;
+    }
+    PredicateIterator predicate_iterator(entry_control);
+    NodeInSingleLoopBody node_in_short_loop_body(this, loop);
+    CloneShortLoopPredicateVisitor clone_short_loop_predicates_visitor(head, node_in_short_loop_body, this);
+    predicate_iterator.for_each(clone_short_loop_predicates_visitor);
+
+    entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl);
+
+    const Predicates predicates_after_cloning(entry_control);
+
+    ParsePredicateSuccessProj* short_running_loop_predicate_proj = predicates_after_cloning.
+        short_running_long_loop_predicate_block()->
+        parse_predicate_success_proj();
+    assert(short_running_loop_predicate_proj->in(0)->is_ParsePredicate(), "must be parse predicate");
+
+    const jlong iters_limit_long = iters_limit;
+    Node* cmp_limit = CmpNode::make(new_limit, _igvn.integercon(iters_limit_long, bt), bt);
+    Node* bol = new BoolNode(cmp_limit, BoolTest::le);
+    Node* new_predicate_proj = create_new_if_for_predicate(short_running_loop_predicate_proj,
+                                                           nullptr,
+                                                           Deoptimization::Reason_short_running_long_loop,
+                                                           Op_If);
+    Node* iff = new_predicate_proj->in(0);
+    _igvn.replace_input_of(iff, 1, bol);
+    register_new_node(cmp_limit, iff->in(0));
+    register_new_node(bol, iff->in(0));
+    new_limit = ConstraintCastNode::make_cast_for_basic_type(new_predicate_proj, new_limit,
+                                                             TypeInteger::make(1, iters_limit_long, Type::WidenMin, bt),
+                                                             ConstraintCastNode::UnconditionalDependency, bt);
+    register_new_node(new_limit, new_predicate_proj);
+
+#ifndef PRODUCT
+    if (TraceLoopLimitCheck) {
+      tty->print_cr("Short Long Loop Check Predicate generated:");
+      DEBUG_ONLY(bol->dump(2);)
+    }
+#endif
+    entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl);
+  } else if (bt == T_LONG) {
+    // We're turning a long counted loop into a regular loop that will be converted into an int counted loop. That loop
+    // won't need loop limit check predicates (iters_limit guarantees that). Add a cast to make sure that, whatever
+    // transformation happens by the time the counted loop is created (in a subsequent pass of loop opts), C2 knows
+    // enough about the loop's limit that it doesn't try to add loop limit check predicates.
+    const Predicates predicates(entry_control);
+    const TypeLong* new_limit_t = new_limit->Value(&_igvn)->is_long();
+    new_limit = ConstraintCastNode::make_cast_for_basic_type(predicates.entry(), new_limit,
+                                                             TypeLong::make(0, new_limit_t->_hi, new_limit_t->_widen),
+                                                             ConstraintCastNode::UnconditionalDependency, bt);
+    register_new_node(new_limit, predicates.entry());
+  } else {
+    assert(bt == T_INT && known_short_running_loop, "only CountedLoop statically known to be short running");
+  }
+  IfNode* exit_test = head->loopexit();
+
+  if (bt == T_LONG) {
+    // The loop is short running so new_limit fits into an int: either we determined that statically or added a guard
+    new_limit = new ConvL2INode(new_limit);
+    register_new_node(new_limit, entry_control);
+  }
+
+  Node* int_zero = intcon(0);
+  if (stride_con < 0) {
+    new_limit = new SubINode(int_zero, new_limit);
+    register_new_node(new_limit, entry_control);
+  }
+
+  // Clone the iv data nodes as an integer iv
+  Node* int_stride = intcon(stride_con);
+  Node* inner_phi = new PhiNode(head, TypeInt::INT);
+  Node* inner_incr = new AddINode(inner_phi, int_stride);
+  Node* inner_cmp = new CmpINode(inner_incr, new_limit);
+  Node* inner_bol = new BoolNode(inner_cmp, exit_test->in(1)->as_Bool()->_test._test);
+  inner_phi->set_req(LoopNode::EntryControl, int_zero);
+  inner_phi->set_req(LoopNode::LoopBackControl, inner_incr);
+  register_new_node(inner_phi, head);
+  register_new_node(inner_incr, head);
+  register_new_node(inner_cmp, head);
+  register_new_node(inner_bol, head);
+
+  _igvn.replace_input_of(exit_test, 1, inner_bol);
+
+  // Replace inner loop long iv phi as inner loop int iv phi + outer
+  // loop iv phi
+  Node* iv_add = loop_nest_replace_iv(phi, inner_phi, init, head, bt);
+
+  LoopNode* inner_head = head;
+  if (bt == T_LONG) {
+    // Turn the loop back to a counted loop
+    inner_head = create_inner_head(loop, head, exit_test);
+  } else {
+    // Use existing counted loop
+    revert_to_normal_loop(head);
+  }
+
+  if (bt == T_INT) {
+    init = new ConvI2LNode(init);
+    register_new_node(init, entry_control);
+  }
+
+  transform_long_range_checks(stride_con, range_checks, init, new_limit,
+                              inner_phi, iv_add, inner_head);
+
+  inner_head->mark_loop_nest_inner_loop();
+
+  return true;
+}
+
 int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jint stride_con, int iters_limit, PhiNode* phi,
                                              Node_List& range_checks) {
  const jlong min_iters = 2;
@ -1318,7 +1535,6 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List

  for (uint i = 0; i < range_checks.size(); i++) {
    ProjNode* proj = range_checks.at(i)->as_Proj();
-    ProjNode* unc_proj = proj->other_if_proj();
    RangeCheckNode* rc = proj->in(0)->as_RangeCheck();
    jlong scale = 0;
    Node* offset = nullptr;
@ -4415,6 +4631,9 @@ void IdealLoopTree::dump_head() {
  if (predicates.loop_limit_check_predicate_block()->is_non_empty()) {
    tty->print(" limit_check");
  }
+  if (predicates.short_running_long_loop_predicate_block()->is_non_empty()) {
+    tty->print(" short_running");
+  }
  if (UseLoopPredicate) {
    if (UseProfiledLoopPredicate && predicates.profiled_loop_predicate_block()->is_non_empty()) {
      tty->print(" profile_predicated");
@ -4922,7 +5141,7 @@ void PhaseIdealLoop::build_and_optimize() {
    for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
      IdealLoopTree* lpt = iter.current();
      if (lpt->is_innermost() && lpt->_allow_optimizations && !lpt->_has_call && lpt->is_counted()) {
-        lpt->compute_trip_count(this);
+        lpt->compute_trip_count(this, T_INT);
        if (!lpt->do_one_iteration_loop(this) &&
            !lpt->do_remove_empty_loop(this)) {
          AutoNodeBudget node_budget(this);
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -218,6 +218,18 @@ public:
  jlong stride_con() const;

  static BaseCountedLoopNode* make(Node* entry, Node* backedge, BasicType bt);
+
+  virtual void set_trip_count(julong tc) = 0;
+  virtual julong trip_count() const = 0;
+
+  bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
+  void set_exact_trip_count(julong tc) {
+    set_trip_count(tc);
+    _loop_flags |= HasExactTripCount;
+  }
+  void set_nonexact_trip_count() {
+    _loop_flags &= ~HasExactTripCount;
+  }
 };


@ -298,26 +310,17 @@ public:

  int main_idx() const { return _main_idx; }

+  void set_trip_count(julong tc) {
+    assert(tc < max_juint, "Cannot set trip count to max_juint");
+    _trip_count = checked_cast<uint>(tc);
+  }
+  julong trip_count() const      { return _trip_count; }

  void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
  void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
  void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
  void set_normal_loop(                    ) { _loop_flags &= ~PreMainPostFlagsMask; }

-  // We use max_juint for the default value of _trip_count to signal it wasn't set.
-  // We shouldn't set _trip_count to max_juint explicitly.
-  void set_trip_count(uint tc) { assert(tc < max_juint, "Cannot set trip count to max_juint"); _trip_count = tc; }
-  uint trip_count()            { return _trip_count; }
-
-  bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
-  void set_exact_trip_count(uint tc) {
-    assert(tc < max_juint, "Cannot set trip count to max_juint");
-    _trip_count = tc;
-    _loop_flags |= HasExactTripCount;
-  }
-  void set_nonexact_trip_count() {
-    _loop_flags &= ~HasExactTripCount;
-  }
  void set_notpassed_slp() {
    _loop_flags &= ~PassedSlpAnalysis;
  }
@ -380,9 +383,15 @@ public:
 };

 class LongCountedLoopNode : public BaseCountedLoopNode {
+private:
+  virtual uint size_of() const { return sizeof(*this); }
+
+  // Known trip count calculated by compute_exact_trip_count()
+  julong _trip_count;
+
 public:
  LongCountedLoopNode(Node *entry, Node *backedge)
-    : BaseCountedLoopNode(entry, backedge) {
+    : BaseCountedLoopNode(entry, backedge), _trip_count(max_julong) {
    init_class_id(Class_LongCountedLoop);
  }

@ -392,6 +401,12 @@ public:
    return T_LONG;
  }

+  void set_trip_count(julong tc) {
+    assert(tc < max_julong, "Cannot set trip count to max_julong");
+    _trip_count = tc;
+  }
+  julong trip_count() const      { return _trip_count; }
+
  LongCountedLoopEndNode* loopexit_or_null() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit_or_null(); }
  LongCountedLoopEndNode* loopexit() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit(); }
 };
@ -778,7 +793,7 @@ public:
  uint est_loop_unroll_sz(uint factor) const;

  // Compute loop trip count if possible
-  void compute_trip_count(PhaseIdealLoop* phase);
+  void compute_trip_count(PhaseIdealLoop* phase, BasicType bt);

  // Compute loop trip count from profile data
  float compute_profile_trip_cnt_helper(Node* n);
@ -1829,6 +1844,8 @@ public:

  Node* ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node);

+  bool try_make_short_running_loop(IdealLoopTree* loop, jint stride_con, const Node_List& range_checks, const uint iters_limit);
+
  ConINode* intcon(jint i);

  ConLNode* longcon(jlong i);
--- a/src/hotspot/share/opto/predicates.cpp
+++ b/src/hotspot/share/opto/predicates.cpp
@ -82,12 +82,11 @@ ParsePredicateNode* ParsePredicate::init_parse_predicate(const Node* parse_predi
  return nullptr;
 }

-ParsePredicate ParsePredicate::clone_to_unswitched_loop(Node* new_control, const bool is_false_path_loop,
-                                                        PhaseIdealLoop* phase) const {
+ParsePredicate ParsePredicate::clone_to_loop(Node* new_control, const bool rewire_uncommon_proj_phi_inputs,
+                                             PhaseIdealLoop* phase) const {
  ParsePredicateSuccessProj* success_proj = phase->create_new_if_for_predicate(_success_proj, new_control,
                                                                               _parse_predicate_node->deopt_reason(),
-                                                                               Op_ParsePredicate, is_false_path_loop);
-  NOT_PRODUCT(trace_cloned_parse_predicate(is_false_path_loop, success_proj));
+                                                                               Op_ParsePredicate, rewire_uncommon_proj_phi_inputs);
  return ParsePredicate(success_proj, _parse_predicate_node->deopt_reason());
 }

@ -97,11 +96,10 @@ void ParsePredicate::kill(PhaseIterGVN& igvn) const {
 }

 #ifndef PRODUCT
-void ParsePredicate::trace_cloned_parse_predicate(const bool is_false_path_loop,
-                                                  const ParsePredicateSuccessProj* success_proj) {
-  if (TraceLoopPredicate) {
+void ParsePredicate::trace_cloned_parse_predicate(const bool is_false_path_loop) const {
+  if (TraceLoopUnswitching) {
    tty->print("Parse Predicate cloned to %s path loop: ", is_false_path_loop ? "false" : "true");
-    success_proj->in(0)->dump();
+    head()->dump();
  }
 }
 #endif // NOT PRODUCT
@ -126,6 +124,7 @@ bool RuntimePredicate::has_valid_uncommon_trap(const Node* success_proj) {
  assert(RegularPredicate::may_be_predicate_if(success_proj), "must have been checked before");
  const Deoptimization::DeoptReason deopt_reason = uncommon_trap_reason(success_proj->as_IfProj());
  return (deopt_reason == Deoptimization::Reason_loop_limit_check ||
+          deopt_reason == Deoptimization::Reason_short_running_long_loop ||
          deopt_reason == Deoptimization::Reason_auto_vectorization_check ||
          deopt_reason == Deoptimization::Reason_predicate ||
          deopt_reason == Deoptimization::Reason_profile_predicate);
@ -941,6 +940,8 @@ void Predicates::dump() const {
    _profiled_loop_predicate_block.dump("  ");
    tty->print_cr("- Loop Predicate Block:");
    _loop_predicate_block.dump("  ");
+    tty->print_cr("- Short Running Long Loop Predicate Block:");
+    _short_running_long_loop_predicate_block.dump("  ");
    tty->cr();
  } else {
    tty->print_cr("<no predicates>");
@ -999,6 +1000,10 @@ InitializedAssertionPredicate CreateAssertionPredicatesVisitor::initialize_from_
  return initialized_assertion_predicate;
 }

+bool NodeInSingleLoopBody::check_node_in_loop_body(Node* node) const {
+  return _phase->is_member(_ilt, _phase->get_ctrl(node));
+}
+
 // Clone the provided Template Assertion Predicate and set '_init' as new input for the OpaqueLoopInitNode.
 TemplateAssertionPredicate CreateAssertionPredicatesVisitor::clone_template_and_replace_init_input(
    const TemplateAssertionPredicate& template_assertion_predicate) const {
@ -1108,11 +1113,18 @@ void CloneUnswitchedLoopPredicatesVisitor::visit(const ParsePredicate& parse_pre
  if (_is_counted_loop && deopt_reason == Deoptimization::Reason_loop_limit_check) {
    return;
  }
-  _clone_predicate_to_true_path_loop.clone_parse_predicate(parse_predicate, false);
-  _clone_predicate_to_false_path_loop.clone_parse_predicate(parse_predicate, true);
+  clone_parse_predicate(parse_predicate, false);
+  clone_parse_predicate(parse_predicate, true);
  parse_predicate.kill(_phase->igvn());
 }

+void CloneUnswitchedLoopPredicatesVisitor::clone_parse_predicate(const ParsePredicate& parse_predicate,
+                                                                 const bool is_false_path_loop) {
+  ClonePredicateToTargetLoop& clone_predicate_to_loop = is_false_path_loop ? _clone_predicate_to_false_path_loop : _clone_predicate_to_true_path_loop;
+  const ParsePredicate cloned_parse_predicate = clone_predicate_to_loop.clone_parse_predicate(parse_predicate, is_false_path_loop);
+  NOT_PRODUCT(cloned_parse_predicate.trace_cloned_parse_predicate(is_false_path_loop);)
+}
+
 // Clone the Template Assertion Predicate, which is currently found before the newly added unswitched loop selector,
 // to the true path and false path loop.
 void CloneUnswitchedLoopPredicatesVisitor::visit(const TemplateAssertionPredicate& template_assertion_predicate) {
--- a/src/hotspot/share/opto/predicates.hpp
+++ b/src/hotspot/share/opto/predicates.hpp
@ -73,6 +73,14 @@ class TemplateAssertionPredicate;
 *                           counted loop to avoid these overflow problems.
 *                           The predicate does not replace an actual check inside the loop. This predicate can only
 *                           be added once above the Loop Limit Check Parse Predicate for a loop.
+ *     - Short:              This predicate is created when a long counted loop is transformed into an int counted
+ *       Running Long        loop. In general, that transformation requires an outer loop to guarantee that the new
+ *       Loop                loop nest iterates over the entire range of the loop before transformation. However, if the
+ *       Predicate           loop is speculated to run for a small enough number of iterations, the outer loop is not
+ *                           needed. This predicate is added to catch mis-speculation in this case. It also applies to
+ *                           int counted loops with long range checks for which a loop nest also needs to be created
+ *                           in the general case (so the transformation of long range checks to int range checks is
+ *                           legal).
 * - Assertion Predicate: An always true predicate which will never fail (its range is already covered by an earlier
 *                        Hoisted Check Predicate or the main-loop entry guard) but is required in order to fold away a
 *                        dead sub loop in which some data could be proven to be dead (by the type system) and replaced
@ -288,8 +296,6 @@ class ParsePredicate : public Predicate {
  }

  static ParsePredicateNode* init_parse_predicate(const Node* parse_predicate_proj, Deoptimization::DeoptReason deopt_reason);
-  NOT_PRODUCT(static void trace_cloned_parse_predicate(bool is_false_path_loop,
-                                                       const ParsePredicateSuccessProj* success_proj);)

 public:
  ParsePredicate(Node* parse_predicate_proj, Deoptimization::DeoptReason deopt_reason)
@ -320,8 +326,8 @@ class ParsePredicate : public Predicate {
    return _success_proj;
  }

-  ParsePredicate clone_to_unswitched_loop(Node* new_control, bool is_false_path_loop,
-                                          PhaseIdealLoop* phase) const;
+  ParsePredicate clone_to_loop(Node* new_control, bool rewire_uncommon_proj_phi_inputs, PhaseIdealLoop* phase) const;
+  NOT_PRODUCT(void trace_cloned_parse_predicate(bool is_false_path_loop) const;)

  void kill(PhaseIterGVN& igvn) const;
 };
@ -786,7 +792,8 @@ class PredicateIterator : public StackObj {
      PredicateBlockIterator loop_predicate_iterator(current_node, Deoptimization::Reason_predicate);
      current_node = loop_predicate_iterator.for_each(predicate_visitor);
    }
-    return current_node;
+    PredicateBlockIterator short_running_loop_predicate_iterator(current_node, Deoptimization::Reason_short_running_long_loop);
+    return short_running_loop_predicate_iterator.for_each(predicate_visitor);
  }
 };

@ -953,6 +960,7 @@ class Predicates : public StackObj {
  const PredicateBlock _auto_vectorization_check_block;
  const PredicateBlock _profiled_loop_predicate_block;
  const PredicateBlock _loop_predicate_block;
+  const PredicateBlock _short_running_long_loop_predicate_block;
  Node* const _entry;

 public:
@ -965,7 +973,9 @@ class Predicates : public StackObj {
                                       Deoptimization::Reason_profile_predicate),
        _loop_predicate_block(_profiled_loop_predicate_block.entry(),
                              Deoptimization::Reason_predicate),
-        _entry(_loop_predicate_block.entry()) {}
+        _short_running_long_loop_predicate_block(_loop_predicate_block.entry(),
+                                            Deoptimization::Reason_short_running_long_loop),
+        _entry(_short_running_long_loop_predicate_block.entry()) {}
  NONCOPYABLE(Predicates);

  // Returns the control input the first predicate if there are any predicates. If there are no predicates, the same
@ -990,6 +1000,10 @@ class Predicates : public StackObj {
    return &_loop_limit_check_predicate_block;
  }

+  const PredicateBlock* short_running_long_loop_predicate_block() const {
+    return &_short_running_long_loop_predicate_block;
+  }
+
  bool has_any() const {
    return _entry != _tail;
  }
@ -1082,6 +1096,19 @@ class NodeInClonedLoopBody : public NodeInLoopBody {
  }
 };

+// This class checks whether a node is in the loop body passed to the constructor.
+class NodeInSingleLoopBody : public NodeInLoopBody {
+  PhaseIdealLoop* const _phase;
+  IdealLoopTree* const _ilt;
+
+public:
+  NodeInSingleLoopBody(PhaseIdealLoop* phase, IdealLoopTree* ilt) : _phase(phase), _ilt(ilt) {
+  }
+  NONCOPYABLE(NodeInSingleLoopBody);
+
+  bool check_node_in_loop_body(Node* node) const override;
+};
+
 // Visitor to create Initialized Assertion Predicates at a target loop from Template Assertion Predicates from a source
 // loop. This visitor can be used in combination with a PredicateIterator.
 class CreateAssertionPredicatesVisitor : public PredicateVisitor {
@ -1158,10 +1185,11 @@ public:
  ClonePredicateToTargetLoop(LoopNode* target_loop_head, const NodeInLoopBody& node_in_loop_body, PhaseIdealLoop* phase);

  // Clones the provided Parse Predicate to the head of the current predicate chain at the target loop.
-  void clone_parse_predicate(const ParsePredicate& parse_predicate, bool is_false_path_loop) {
-    ParsePredicate cloned_parse_predicate = parse_predicate.clone_to_unswitched_loop(_old_target_loop_entry,
-                                                                                     is_false_path_loop, _phase);
+  ParsePredicate clone_parse_predicate(const ParsePredicate& parse_predicate, bool rewire_uncommon_proj_phi_inputs) {
+    ParsePredicate cloned_parse_predicate = parse_predicate.clone_to_loop(_old_target_loop_entry,
+                                                                          rewire_uncommon_proj_phi_inputs, _phase);
    _target_loop_predicate_chain.insert_predicate(cloned_parse_predicate);
+    return cloned_parse_predicate;
  }

  void clone_template_assertion_predicate(const TemplateAssertionPredicate& template_assertion_predicate);
@ -1189,6 +1217,9 @@ class CloneUnswitchedLoopPredicatesVisitor : public PredicateVisitor {
  using PredicateVisitor::visit;

  void visit(const ParsePredicate& parse_predicate) override;
+
+  void clone_parse_predicate(const ParsePredicate &parse_predicate,
+                             bool is_false_path_loop);
  void visit(const TemplateAssertionPredicate& template_assertion_predicate) override;
 };

--- a/src/hotspot/share/runtime/deoptimization.cpp
+++ b/src/hotspot/share/runtime/deoptimization.cpp
@ -2767,8 +2767,8 @@ const char* Deoptimization::_trap_reason_name[] = {
  "unstable_if",
  "unstable_fused_if",
  "receiver_constraint",
+  "short_running_loop" JVMCI_ONLY("_or_aliasing"),
 #if INCLUDE_JVMCI
-  "aliasing",
  "transfer_to_interpreter",
  "not_compiled_exception_handler",
  "unresolved",
--- a/src/hotspot/share/runtime/deoptimization.hpp
+++ b/src/hotspot/share/runtime/deoptimization.hpp
@ -117,8 +117,9 @@ class Deoptimization : AllStatic {
    Reason_unstable_if,           // a branch predicted always false was taken
    Reason_unstable_fused_if,     // fused two ifs that had each one untaken branch. One is now taken.
    Reason_receiver_constraint,   // receiver subtype check failed
+    Reason_short_running_long_loop,    // profile reports loop runs for small number of iterations
 #if INCLUDE_JVMCI
-    Reason_aliasing,              // optimistic assumption about aliasing failed
+    Reason_aliasing = Reason_short_running_long_loop, // optimistic assumption about aliasing failed
    Reason_transfer_to_interpreter, // explicit transferToInterpreter()
    Reason_not_compiled_exception_handler,
    Reason_unresolved,
--- a/src/hotspot/share/runtime/vmStructs.cpp
+++ b/src/hotspot/share/runtime/vmStructs.cpp
@ -1566,6 +1566,7 @@
  declare_constant(Deoptimization::Reason_age)                            \
  declare_constant(Deoptimization::Reason_predicate)                      \
  declare_constant(Deoptimization::Reason_loop_limit_check)               \
+  declare_constant(Deoptimization::Reason_short_running_long_loop)        \
  declare_constant(Deoptimization::Reason_auto_vectorization_check)       \
  declare_constant(Deoptimization::Reason_speculate_class_check)          \
  declare_constant(Deoptimization::Reason_speculate_null_check)           \
@ -1573,7 +1574,6 @@
  declare_constant(Deoptimization::Reason_unstable_if)                    \
  declare_constant(Deoptimization::Reason_unstable_fused_if)              \
  declare_constant(Deoptimization::Reason_receiver_constraint)            \
-  NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_aliasing)))                       \
  NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_transfer_to_interpreter)))        \
  NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_not_compiled_exception_handler))) \
  NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_unresolved)))                     \
--- a/src/hotspot/share/utilities/globalDefinitions.hpp
+++ b/src/hotspot/share/utilities/globalDefinitions.hpp
@ -148,6 +148,9 @@ class oopDesc;
 #ifndef JULONG_FORMAT_X
 #define JULONG_FORMAT_X          UINT64_FORMAT_X
 #endif
+#ifndef JULONG_FORMAT_W
+#define JULONG_FORMAT_W(width)   UINT64_FORMAT_W(width)
+#endif

 // Format pointers and padded integral values which change size between 32- and 64-bit.
 #ifdef  _LP64
@ -771,6 +774,14 @@ inline jlong min_signed_integer(BasicType bt) {
  return min_jlong;
 }

+inline julong max_unsigned_integer(BasicType bt) {
+  if (bt == T_INT) {
+    return max_juint;
+  }
+  assert(bt == T_LONG, "unsupported");
+  return max_julong;
+}
+
 inline uint bits_per_java_integer(BasicType bt) {
  if (bt == T_INT) {
    return BitsPerJavaInteger;
--- a/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, 2022, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2021, 2022, 2025 Red Hat, Inc. All rights reserved.
 * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
@ -38,6 +38,9 @@ import java.util.Objects;

 public class TestLongRangeChecks {
    public static void main(String[] args) {
+        TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0");
+        TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0");
+        TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0");
        TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0");
        TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0");
        TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0");
@ -45,7 +48,8 @@ public class TestLongRangeChecks {


    @Test
-    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStridePosScalePos(long start, long stop, long length, long offset) {
        final long scale = 1;
@ -66,7 +70,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStridePosScalePosInIntLoop1(int start, int stop, long length, long offset) {
        final long scale = 2;
@ -84,7 +89,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStridePosScalePosInIntLoop2(int start, int stop, long length, long offset) {
        final int scale = 2;
@ -102,7 +108,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStrideNegScaleNeg(long start, long stop, long length, long offset) {
        final long scale = -1;
@ -118,7 +125,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStrideNegScaleNegInIntLoop1(int start, int stop, long length, long offset) {
        final long scale = -2;
@ -135,7 +143,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStrideNegScaleNegInIntLoop2(int start, int stop, long length, long offset) {
        final int scale = -2;
@ -152,7 +161,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStrideNegScalePos(long start, long stop, long length, long offset) {
        final long scale = 1;
@ -168,7 +178,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStrideNegScalePosInIntLoop1(int start, int stop, long length, long offset) {
        final long scale = 2;
@ -184,7 +195,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStrideNegScalePosInIntLoop2(int start, int stop, long length, long offset) {
        final int scale = 2;
@ -200,7 +212,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStridePosScaleNeg(long start, long stop, long length, long offset) {
        final long scale = -1;
@ -216,7 +229,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStridePosScaleNegInIntLoop1(int start, int stop, long length, long offset) {
        final long scale = -2;
@ -232,7 +246,8 @@ public class TestLongRangeChecks {
    }

    @Test
-    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
+    @IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
    @IR(failOn = { IRNode.COUNTED_LOOP})
    public static void testStridePosScaleNegInIntLoop2(int start, int stop, long length, long offset) {
        final int scale = -2;
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
@ -1663,6 +1663,11 @@ public class IRNode {
        trapNodes(RANGE_CHECK_TRAP, "range_check");
    }

+    public static final String SHORT_RUNNING_LOOP_TRAP = PREFIX + "SHORT_RUNNING_LOOP_TRAP" + POSTFIX;
+    static {
+        trapNodes(SHORT_RUNNING_LOOP_TRAP, "short_running_loop");
+    }
+
    public static final String REINTERPRET_S2HF = PREFIX + "REINTERPRET_S2HF" + POSTFIX;
    static {
        beforeMatchingNameRegex(REINTERPRET_S2HF, "ReinterpretS2HF");
--- a/test/hotspot/jtreg/compiler/longcountedloops/TestShortLoopLostLimit.java
+++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortLoopLostLimit.java
@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2025, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8342692
+ * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
+ * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation TestShortLoopLostLimit
+ * @run main/othervm TestShortLoopLostLimit
+ */
+
+public class TestShortLoopLostLimit {
+    private static volatile int volatileField;
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 20_000; i++) {
+            test1(0, 100);
+            test2(0, 100);
+        }
+    }
+
+    private static void test1(int a, long b) {
+        for (long i = 0; i < a + b; i += 2) {
+            volatileField = 42;
+        }
+    }
+
+    private static void test2(int a, long b) {
+        for (long i = a + b; i > 0; i -= 2) {
+            volatileField = 42;
+        }
+    }
+}
--- a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningIntLoopWithLongChecksPredicates.java
+++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningIntLoopWithLongChecksPredicates.java
@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2025, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8342692
+ * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
+ * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopUnrollLimit=100
+ *                   TestShortRunningIntLoopWithLongChecksPredicates
+ * @run main/othervm TestShortRunningIntLoopWithLongChecksPredicates
+ */
+
+import java.util.Objects;
+
+// int RC is first eliminated by predication which causes Assertion
+// Predicates to be added. Then the loop is transformed to make it
+// possible to optimize long RC. Finally unrolling happen which
+// require the Assertion Predicates to have been properly copied when
+// the loop was transformed for the long range check.
+public class TestShortRunningIntLoopWithLongChecksPredicates {
+    private static volatile int volatileField;
+
+    public static void main(String[] args) {
+        int[] array = new int[100];
+        for (int i = 0; i < 20_000; i++) {
+            helper1(100, array, 100);
+            test1(1, 100);
+        }
+    }
+
+    private static void test1(int stop, long range) {
+        int[] array = new int[3];
+        helper1(stop, array, range);
+    }
+
+    private static void helper1(int stop, int[] array, long range) {
+        for (int i = 0; i < stop; i++) {
+            if (i % 2 == 0) {
+                array[i] += i;
+            } else {
+                volatileField = 42;
+            }
+            Objects.checkIndex(i, range);
+        }
+    }
+}
--- a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoop.java
+++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoop.java
@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2025, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.longcountedloops;
+import compiler.lib.ir_framework.*;
+import compiler.whitebox.CompilerWhiteBoxTest;
+import jdk.test.whitebox.WhiteBox;
+
+import java.util.Objects;
+/*
+ * @test
+ * @bug 8342692
+ * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
+ * @library /test/lib /
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.longcountedloops.TestShortRunningLongCountedLoop
+ */
+
+public class TestShortRunningLongCountedLoop {
+    private static volatile int volatileField;
+    private final static WhiteBox wb = WhiteBox.getWhiteBox();
+
+    public static void main(String[] args) {
+        // IR rules expect a single loop so disable unrolling
+        // IR rules expect strip mined loop to be enabled
+        // testIntLoopUnknownBoundsShortUnswitchedLoop and testLongLoopUnknownBoundsShortUnswitchedLoop need -XX:-UseProfiledLoopPredicate
+        TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:LoopStripMiningIter=1000", "-XX:+UseCountedLoopSafepoints", "-XX:-UseProfiledLoopPredicate");
+    }
+
+    // Check IR only has a counted loop when bounds are known and loop run for a short time
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopConstantBoundsShortLoop1() {
+        int j = 0;
+        for (long i = 0; i < 100; i++) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Check(test = "testLongLoopConstantBoundsShortLoop1")
+    public static void checkTestLongLoopConstantBoundsShortLoop1(int res) {
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Same with stride > 1
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopConstantBoundsShortLoop2() {
+        int j = 0;
+        for (long i = 0; i < 2000; i += 20) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Check(test = "testLongLoopConstantBoundsShortLoop2")
+    public static void checkTestLongLoopConstantBoundsShortLoop2(int res) {
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Same with loop going downward
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopConstantBoundsShortLoop3() {
+        int j = 0;
+        for (long i = 99; i >= 0; i--) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Check(test = "testLongLoopConstantBoundsShortLoop3")
+    public static void checkTestLongLoopConstantBoundsShortLoop3(int res) {
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Same with loop going downward and stride > 1
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopConstantBoundsShortLoop4() {
+        int j = 0;
+        for (long i = 1999; i >= 0; i-=20) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Check(test = "testLongLoopConstantBoundsShortLoop4")
+    public static void checkTestLongLoopConstantBoundsShortLoop4(int res) {
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Check IR only has a counted loop when bounds are known but not exact and loop run for a short time
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopConstantBoundsShortLoop5(int start, int stop) {
+        start= Integer.max(start, 0);
+        stop= Integer.min(stop, 999);
+        int j = 0;
+        for (long i = start; i < stop; i++) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopConstantBoundsShortLoop5")
+    public static void testLongLoopConstantBoundsShortLoop5_runner() {
+        int res = testLongLoopConstantBoundsShortLoop5(0, 100);
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Check that loop nest is created when bounds are known and loop is not short run
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1"})
+    @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP, IRNode.OUTER_STRIP_MINED_LOOP })
+    public static int testLongLoopConstantBoundsLongLoop1() {
+        final long stride = Integer.MAX_VALUE / 1000;
+        int j = 0;
+        for (long i = 0; i < stride * 1001; i += stride) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Check(test = "testLongLoopConstantBoundsLongLoop1")
+    public static void checkTestLongLoopConstantBoundsLongLoop1(int res) {
+        if (res != 1001) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Same with negative stride
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1"})
+    @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP, IRNode.OUTER_STRIP_MINED_LOOP })
+    public static int testLongLoopConstantBoundsLongLoop2() {
+        final long stride = Integer.MAX_VALUE / 1000;
+        int j = 0;
+        for (long i = stride * 1000; i >= 0; i -= stride) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Check(test = "testLongLoopConstantBoundsLongLoop2")
+    public static void checkTestLongLoopConstantBoundsLongLoop2(int res) {
+        if (res != 1001) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Check IR only has a counted loop when bounds are unknown but profile reports a short running loop
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP })
+    public static int testLongLoopUnknownBoundsShortLoop(long start, long stop) {
+        int j = 0;
+        for (long i = start; i < stop; i++) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoop")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsShortLoop_runner() {
+        int res = testLongLoopUnknownBoundsShortLoop(0, 100);
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // same with stride > 1
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP })
+    public static int testLongLoopUnknownBoundsShortLoop2(long start, long stop) {
+        int j = 0;
+        for (long i = start; i < stop; i+=20) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoop2")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsShortLoop2_runner() {
+        int res = testLongLoopUnknownBoundsShortLoop2(0, 2000);
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // same with negative stride
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP })
+    public static int testLongLoopUnknownBoundsShortLoop3(long start, long stop) {
+        int j = 0;
+        for (long i = start; i >= stop; i--) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoop3")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsShortLoop3_runner() {
+        int res = testLongLoopUnknownBoundsShortLoop3(99, 0);
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // same with negative stride > 1
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP })
+    public static int testLongLoopUnknownBoundsShortLoop4(long start, long stop) {
+        int j = 0;
+        for (long i = start; i >= stop; i -= 20) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoop4")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsShortLoop4_runner() {
+        int res = testLongLoopUnknownBoundsShortLoop4(1999, 0);
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Check that loop nest is created when bounds are not known but profile reports loop is not short run
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1", IRNode.LOOP,  "1"})
+    @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopUnknownBoundsLongLoop1(long start, long stop, long range) {
+        int j = 0;
+        for (long i = start; i < stop; i++) {
+            volatileField = 42;
+            Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsLongLoop1")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsLongLoop1_runner() {
+        int res = testLongLoopUnknownBoundsLongLoop1(0, 3000, Long.MAX_VALUE);
+        if (res != 3000) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // same with negative stride
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1", IRNode.LOOP,  "1"})
+    @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopUnknownBoundsLongLoop2(long start, long stop, long range) {
+        int j = 0;
+        for (long i = start; i >= stop; i--) {
+            volatileField = 42;
+            Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsLongLoop2")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsLongLoop2_runner() {
+        int res = testLongLoopUnknownBoundsLongLoop2(2999, 0, Long.MAX_VALUE);
+        if (res != 3000) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+
+    // Check IR has a loop nest when bounds are unknown, profile reports a short running loop but trap is taken
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopUnknownBoundsShortLoopFailedSpeculation(long start, long stop, long range) {
+        int j = 0;
+        for (long i = start; i < stop; i++) {
+            volatileField = 42;
+            Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoopFailedSpeculation")
+    @Warmup(1)
+    public static void testLongLoopUnknownBoundsShortLoopFailedSpeculation_runner(RunInfo info) {
+        if (info.isWarmUp()) {
+            for (int i = 0; i < 10_0000; i++) {
+                int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 100, Long.MAX_VALUE);
+                if (res != 100) {
+                    throw new RuntimeException("incorrect result: " + res);
+                }
+            }
+            wb.enqueueMethodForCompilation(info.getTest(), CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
+            if (!wb.isMethodCompiled(info.getTest())) {
+                throw new RuntimeException("Should be compiled now");
+            }
+            for (int i = 0; i < 10; i++) {
+                int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 10_000, Long.MAX_VALUE);
+                if (res != 10_000) {
+                    throw new RuntimeException("incorrect result: " + res);
+                }
+            }
+        } else {
+            int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 100, Long.MAX_VALUE);
+            if (res != 100) {
+                throw new RuntimeException("incorrect result: " + res);
+            }
+        }
+    }
+
+    // Check IR has a loop nest when bounds are known, is short running loop but trap was taken
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1"  })
+    @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopKnownBoundsShortLoopFailedSpeculation() {
+        return testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 100);
+    }
+
+    @ForceInline
+    private static int testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(long start, long stop) {
+        int j = 0;
+        for (long i = start; i < stop; i++) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopKnownBoundsShortLoopFailedSpeculation")
+    @Warmup(1)
+    public static void testLongLoopKnownBoundsShortLoopFailedSpeculation_runner(RunInfo info) {
+        if (info.isWarmUp()) {
+            for (int i = 0; i < 10_0000; i++) {
+                int res = testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 100);
+                if (res != 100) {
+                    throw new RuntimeException("incorrect result: " + res);
+                }
+            }
+            for (int i = 0; i < 10; i++) {
+                int res = testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 10_000);
+                if (res != 10_000) {
+                    throw new RuntimeException("incorrect result: " + res);
+                }
+            }
+            for (int i = 0; i < 10_0000; i++) {
+                int res = testLongLoopKnownBoundsShortLoopFailedSpeculation();
+                if (res != 100) {
+                    throw new RuntimeException("incorrect result: " + res);
+                }
+            }
+        } else {
+            int res = testLongLoopKnownBoundsShortLoopFailedSpeculation();
+            if (res != 100) {
+                throw new RuntimeException("incorrect result: " + res);
+            }
+        }
+    }
+
+    // Check range check can be eliminated by predication
+    @Test
+    @IR(counts = { IRNode.PREDICATE_TRAP, "1" })
+    @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static void testLongLoopConstantBoundsPredication(long range) {
+        for (long i = 0; i < 100; i++) {
+            Objects.checkIndex(i, range);
+        }
+    }
+
+    @Run(test = "testLongLoopConstantBoundsPredication")
+    public static void testLongLoopConstantBoundsPredication_runner() {
+        testLongLoopConstantBoundsPredication(100);
+    }
+
+    @Test
+    @IR(counts = { IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1" })
+    @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP })
+    public static void testLongLoopUnknownBoundsShortLoopPredication(long start, long stop, long range) {
+        for (long i = start; i < stop; i++) {
+            Objects.checkIndex(i, range);
+        }
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoopPredication")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsShortLoopPredication_runner() {
+        testLongLoopUnknownBoundsShortLoopPredication(0, 100, 100);
+    }
+
+    // If scale too large, transformation can't happen
+    static final long veryLargeScale = Integer.MAX_VALUE / 99;
+    @Test
+    @IR(counts = { IRNode.LOOP, "1", IRNode.PREDICATE_TRAP, "2"})
+    @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static void testLongLoopConstantBoundsLargeScale(long range) {
+        for (long i = 0; i < 100; i++) {
+            Objects.checkIndex(veryLargeScale * i, range);
+        }
+    }
+
+    @Run(test = "testLongLoopConstantBoundsLargeScale")
+    public static void testLongLoopConstantBoundsLargeScale_runner() {
+        testLongLoopConstantBoundsLargeScale(veryLargeScale * 100);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1", IRNode.PREDICATE_TRAP, "2"})
+    @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static void testLongLoopUnknownBoundsShortLoopLargeScale(long start, long stop, long range) {
+        for (long i = start; i < stop; i++) {
+            Objects.checkIndex(veryLargeScale * i, range);
+        }
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoopLargeScale")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsShortLoopLargeScale_runner() {
+        testLongLoopUnknownBoundsShortLoopLargeScale(0, 100, veryLargeScale * 100);
+    }
+
+    // Check IR only has a counted loop when bounds are known and loop run for a short time (int loop case)
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.PREDICATE_TRAP, "1" })
+    @IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static void testIntLoopConstantBoundsShortLoop1(long range) {
+        for (int i = 0; i < 100; i++) {
+            Objects.checkIndex(i, range);
+            volatileField = 42;
+        }
+    }
+
+    @Run(test = "testIntLoopConstantBoundsShortLoop1")
+    public static void testIntLoopConstantBoundsShortLoop1_runner() {
+        testIntLoopConstantBoundsShortLoop1(100);
+    }
+
+    // Check IR only has a counted loop when bounds are unknown but profile reports a short running loop (int loop case)
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP })
+    public static void testIntLoopUnknownBoundsShortLoop(int start, int stop, long range) {
+        for (int i = start; i < stop; i++) {
+            Objects.checkIndex(i, range);
+            volatileField = 42;
+        }
+    }
+
+    @Run(test = "testIntLoopUnknownBoundsShortLoop")
+    @Warmup(10_000)
+    public static void testIntLoopUnknownBoundsShortLoop_runner() {
+        testIntLoopUnknownBoundsShortLoop(0, 100, 100);
+    }
+
+    // Same with unswitched loop
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "2", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "2" })
+    @IR(failOn = { IRNode.LOOP })
+    public static void testIntLoopUnknownBoundsShortUnswitchedLoop(int start, int stop, long range, boolean flag) {
+        for (int i = start; i < stop; i++) {
+            if (flag) {
+                Objects.checkIndex(i, range);
+                volatileField = 42;
+            } else {
+                Objects.checkIndex(i, range);
+                volatileField = 42;
+            }
+        }
+    }
+
+    @Run(test = "testIntLoopUnknownBoundsShortUnswitchedLoop")
+    @Warmup(10_000)
+    public static void testIntLoopUnknownBoundsShortUnswitchedLoop_runner() {
+        testIntLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, true);
+        testIntLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, false);
+    }
+
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "2", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "2" })
+    @IR(failOn = { IRNode.LOOP })
+    public static void testLongLoopUnknownBoundsShortUnswitchedLoop(long start, long stop, long range, boolean flag) {
+        for (long i = start; i < stop; i++) {
+            if (flag) {
+                Objects.checkIndex(i, range);
+                volatileField = 42;
+            } else {
+                Objects.checkIndex(i, range);
+                volatileField = 42;
+            }
+        }
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortUnswitchedLoop")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsShortUnswitchedLoop_runner() {
+        testLongLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, true);
+        testLongLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, false);
+    }
+
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(failOn = { IRNode.LOOP })
+    public static int testLongLoopUnknownBoundsAddLimitShortLoop(int stop1, long stop2) {
+        int j = 0;
+        for (long i = 0; i < stop1 + stop2; i++) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsAddLimitShortLoop")
+    @Warmup(10_000)
+    public static void testLongLoopUnknownBoundsAddLimitShortLoop_runner() {
+        int res = testLongLoopUnknownBoundsAddLimitShortLoop(100, 0);
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+}
--- a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopPredicatesClone.java
+++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopPredicatesClone.java
@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2025, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8342692
+ * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
+ * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0
+ *                   TestShortRunningLongCountedLoopPredicatesClone
+ * @run main/othervm TestShortRunningLongCountedLoopPredicatesClone
+ */
+
+import java.util.Objects;
+
+// Predicate added after int counted loop is created depends on
+// narrowed limit which depends on predicate added before the int
+// counted loop was created: predicates need to be properly ordered.
+public class TestShortRunningLongCountedLoopPredicatesClone {
+    public static void main(String[] args) {
+        A a = new A(100);
+        for (int i = 0; i < 20_000; i++) {
+            test1(a, 0);
+        }
+    }
+
+    private static void test1(A a, long start) {
+        long i = start;
+        do {
+            synchronized (new Object()) {}
+            Objects.checkIndex(i, a.range);
+            i++;
+        } while (i < a.range);
+    }
+
+    static class A {
+        A(long range) {
+            this.range = range;
+        }
+
+        long range;
+    }
+}
--- a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopScaleOverflow.java
+++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopScaleOverflow.java
@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2025, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8342692
+ * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
+ * @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0
+ *                   -XX:-UseLoopPredicate -XX:-RangeCheckElimination TestShortRunningLongCountedLoopScaleOverflow
+ * @run main/othervm TestShortRunningLongCountedLoopScaleOverflow
+ */
+
+import java.util.Objects;
+
+// When scale is large, even if loop is short running having a single
+// counted loop is not possible.
+public class TestShortRunningLongCountedLoopScaleOverflow {
+    public static void main(String[] args) {
+        for (int i = 0; i < 20_000; i++) {
+            test1(Integer.MAX_VALUE, 0);
+            test2(Integer.MAX_VALUE, 0, 100);
+        }
+        boolean exception = false;
+        try {
+            test1(Integer.MAX_VALUE, 10);
+        } catch (IndexOutOfBoundsException indexOutOfBoundsException) {
+            exception = true;
+        }
+        if (!exception) {
+            throw new RuntimeException("Expected exception not thrown");
+        }
+        exception = false;
+        try {
+            test2(Integer.MAX_VALUE, 10, 100);
+        } catch (IndexOutOfBoundsException indexOutOfBoundsException) {
+            exception = true;
+        }
+        if (!exception) {
+            throw new RuntimeException("Expected exception not thrown");
+        }
+    }
+
+    static final long veryLargeScale = 1 << 29;
+
+    private static void test1(long range, long j) {
+        Objects.checkIndex(0, range);
+        for (long i = 0; i < 100; i++) {
+            if (i == j) {
+                Objects.checkIndex(veryLargeScale * i, range);
+            }
+        }
+    }
+
+    private static void test2(long range, long j, long stop) {
+        Objects.checkIndex(0, range);
+        for (long i = 0; i < stop; i++) {
+            if (i == j) {
+                Objects.checkIndex(veryLargeScale * i, range);
+            }
+        }
+    }
+}
--- a/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopVectorization.java
+++ b/test/hotspot/jtreg/compiler/longcountedloops/TestShortRunningLongCountedLoopVectorization.java
@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2025, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.longcountedloops;
+import jdk.internal.misc.Unsafe;
+
+import java.util.Objects;
+/*
+ * @test
+ * @bug 8342692
+ * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
+ * @modules java.base/jdk.internal.misc
+ * @run main/othervm -XX:-BackgroundCompilation compiler.longcountedloops.TestShortRunningLongCountedLoopVectorization
+ */
+
+public class TestShortRunningLongCountedLoopVectorization {
+    private static final Unsafe UNSAFE = Unsafe.getUnsafe();
+    private static volatile int volatileField;
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 20_000; i++) {
+            test1();
+        }
+    }
+
+    static int size = 1024;
+    static long longSize = size;
+    static int[] intArray = new int[size];
+
+    public static void test1() {
+        boolean doIt = true;
+        int localSize = Integer.max(Integer.min(size, 10000), 0);
+        int i = 0;
+        while (true) {
+            synchronized (new Object()) {};
+            if (i >= localSize) {
+                break;
+            }
+            if (doIt) {
+                volatileField = 42;
+                doIt = false;
+            }
+            long j = Objects.checkIndex(i, longSize);
+            UNSAFE.putInt(intArray, Unsafe.ARRAY_INT_BASE_OFFSET + j * Unsafe.ARRAY_INT_INDEX_SCALE, 42);
+            i++;
+        }
+    }
+};
--- a/test/hotspot/jtreg/compiler/longcountedloops/TestStressShortRunningLongCountedLoop.java
+++ b/test/hotspot/jtreg/compiler/longcountedloops/TestStressShortRunningLongCountedLoop.java
@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2025, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.longcountedloops;
+
+import compiler.lib.ir_framework.*;
+
+/*
+ * @test
+ * @bug 8342692
+ * @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
+ * @library /test/lib /
+ * @run driver compiler.longcountedloops.TestStressShortRunningLongCountedLoop
+ */
+
+public class TestStressShortRunningLongCountedLoop {
+    private static volatile int volatileField;
+
+    public static void main(String[] args) {
+        TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+StressShortRunningLongLoop");
+        TestFramework.runWithFlags("-XX:LoopMaxUnroll=0",  "-XX:+IgnoreUnrecognizedVMOptions", "-XX:-StressShortRunningLongLoop");
+    }
+
+    @Test
+    @IR(applyIf = { "StressShortRunningLongLoop", "true" }, counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(applyIf = { "StressShortRunningLongLoop", "true" }, failOn = { IRNode.LOOP })
+    @IR(applyIf = { "StressShortRunningLongLoop", "false" }, counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
+    @IR(applyIf = { "StressShortRunningLongLoop", "false" }, failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
+    public static int testLongLoopUnknownBoundsShortLoop(long start, long stop) {
+        int j = 0;
+        for (long i = start; i < stop; i++) {
+            volatileField = 42;
+            j++;
+        }
+        return j;
+    }
+
+    @Run(test = "testLongLoopUnknownBoundsShortLoop")
+    @Warmup(0)
+    public static void testLongLoopUnknownBoundsShortLoop_runner() {
+        int res = testLongLoopUnknownBoundsShortLoop(0, 100);
+        if (res != 100) {
+            throw new RuntimeException("incorrect result: " + res);
+        }
+    }
+}
--- a/test/hotspot/jtreg/compiler/loopopts/superword/TestMemorySegment.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestMemorySegment.java
@ -47,6 +47,23 @@ import java.lang.foreign.*;
 * @run driver compiler.loopopts.superword.TestMemorySegment ByteArray AlignVector
 */

+/*
+ * @test id=byte-array-NoShortRunningLongLoop
+ * @bug 8329273 8342692
+ * @summary Test vectorization of loops over MemorySegment
+ * @library /test/lib /
+ * @run driver compiler.loopopts.superword.TestMemorySegment ByteArray NoShortRunningLongLoop
+ */
+
+/*
+ * @test id=byte-array-AlignVector-NoShortRunningLongLoop
+ * @bug 8329273 8348263 8342692
+ * @summary Test vectorization of loops over MemorySegment
+ * @library /test/lib /
+ * @run driver compiler.loopopts.superword.TestMemorySegment ByteArray AlignVector NoShortRunningLongLoop
+ */
+
+
 /*
 * @test id=char-array
 * @bug 8329273
@ -172,6 +189,13 @@ public class TestMemorySegment {
    public static void main(String[] args) {
        TestFramework framework = new TestFramework(TestMemorySegmentImpl.class);
        framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0]);
+        for (int i = 1; i < args.length; i++) {
+            String tag = args[i];
+            switch (tag) {
+                case "AlignVector" ->                framework.addFlags("-XX:+AlignVector");
+                case "NoShortRunningLongLoop" ->     framework.addFlags("-XX:-ShortRunningLongLoop");
+            }
+        }
        if (args.length > 1 && args[1].equals("AlignVector")) {
            framework.addFlags("-XX:+AlignVector");
        }
@ -777,6 +801,13 @@ class TestMemorySegmentImpl {
    @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
                  IRNode.ADD_VI,        "= 0",
                  IRNode.STORE_VECTOR,  "= 0"},
+        applyIfAnd = { "ShortRunningLongLoop", "false", "AlignVector", "false" },
+        applyIfPlatform = {"64-bit", "true"},
+        applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
+    @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
+                  IRNode.ADD_VI,        "> 0",
+                  IRNode.STORE_VECTOR,  "> 0"},
+        applyIfAnd = { "ShortRunningLongLoop", "true", "AlignVector", "false" },
        applyIfPlatform = {"64-bit", "true"},
        applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
    // FAILS: invariants are sorted differently, because of differently inserted Cast.
@ -795,6 +826,13 @@ class TestMemorySegmentImpl {
    @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
                  IRNode.ADD_VI,        "= 0",
                  IRNode.STORE_VECTOR,  "= 0"},
+        applyIfAnd = { "ShortRunningLongLoop", "false", "AlignVector", "false" },
+        applyIfPlatform = {"64-bit", "true"},
+        applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
+    @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
+                  IRNode.ADD_VI,        "> 0",
+                  IRNode.STORE_VECTOR,  "> 0"},
+        applyIfAnd = { "ShortRunningLongLoop", "true", "AlignVector", "false" },
        applyIfPlatform = {"64-bit", "true"},
        applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
    // FAILS: invariants are sorted differently, because of differently inserted Cast.
--- a/test/micro/org/openjdk/bench/java/lang/foreign/HeapMismatchManualLoopTest.java
+++ b/test/micro/org/openjdk/bench/java/lang/foreign/HeapMismatchManualLoopTest.java
@ -0,0 +1,130 @@
+/*
+ *  Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ *  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ *  This code is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 only, as
+ *  published by the Free Software Foundation.
+ *
+ *  This code is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  version 2 for more details (a copy is included in the LICENSE file that
+ *  accompanied this code).
+ *
+ *  You should have received a copy of the GNU General Public License version
+ *  2 along with this work; if not, write to the Free Software Foundation,
+ *  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *   Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ *  or visit www.oracle.com if you need additional information or have any
+ *  questions.
+ *
+ */
+
+package org.openjdk.bench.java.lang.foreign;
+
+import org.openjdk.jmh.annotations.*;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+import jdk.internal.misc.Unsafe;
+import java.util.Objects;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@State(org.openjdk.jmh.annotations.Scope.Thread)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Fork(value = 3, jvmArgs = { "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED" })
+public class HeapMismatchManualLoopTest {
+
+    @Param({"4", "8", "16", "32", "64", "128"})
+    public int ELEM_SIZE;
+
+    static final Unsafe unsafe = Utils.unsafe;
+
+    byte[] srcArray;
+    byte[] dstArray;
+    MemorySegment srcSegment;
+    MemorySegment dstSegment;
+    ByteBuffer srcBuffer;
+    ByteBuffer dstBuffer;
+    long srcByteSize;
+    long dstByteSize;
+
+    @Setup
+    public void setup() {
+        srcArray = new byte[ELEM_SIZE];
+        dstArray = new byte[ELEM_SIZE];
+        srcSegment = MemorySegment.ofArray(srcArray);
+        dstSegment = MemorySegment.ofArray(dstArray);
+        srcBuffer = ByteBuffer.wrap(srcArray);
+        dstBuffer = ByteBuffer.wrap(dstArray);
+        srcByteSize = ELEM_SIZE;
+        dstByteSize = ELEM_SIZE;
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public int array_mismatch() {
+        for (int i = 0; i < srcArray.length ; i++) {
+            if (srcArray[i] != dstArray[i]) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public long segment_mismatch() {
+        for (long i = 0; i < srcSegment.byteSize() ; i++) {
+            if (srcSegment.get(ValueLayout.JAVA_BYTE, i) != dstSegment.get(ValueLayout.JAVA_BYTE, i)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public int buffer_mismatch() {
+        for (int i = 0; i < srcBuffer.capacity() ; i++) {
+            if (srcBuffer.get(i) != dstBuffer.get(i)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public long unsafe_mismatch() {
+        for (long i = 0; i < srcByteSize ; i++) {
+            Objects.checkIndex(i, srcByteSize);
+            Objects.checkIndex(i, dstByteSize);
+            long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET + i * Unsafe.ARRAY_BYTE_INDEX_SCALE;
+            if (unsafe.getByte(srcArray, offset) != unsafe.getByte(dstArray, offset)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public long unsafe_mismatch2() {
+        for (long i = 0; i < srcByteSize ; i++) {
+            long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET + i * Unsafe.ARRAY_BYTE_INDEX_SCALE;
+            if (unsafe.getByte(srcArray, offset) != unsafe.getByte(dstArray, offset)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+}