8278296: Generalize long range check transformation

Reviewed-by: jrose, thartmann
2026-02-20 15:25:27 +00:00 · 2022-03-07 16:26:19 +00:00 · 2022-03-07 16:26:19 +00:00 · ef266d77b6
commit ef266d77b6
parent f0995abe62
7 changed files with 661 additions and 251 deletions
--- a/src/hotspot/share/opto/loopPredicate.cpp
+++ b/src/hotspot/share/opto/loopPredicate.cpp
@ -759,7 +759,7 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, BasicT
  }
  scale  = 0;
  offset = NULL;
-  if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset, bt)) {
+  if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, bt, &scale, &offset)) {
    return false;
  }
  return true;
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -1121,18 +1121,18 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional,
        continue; // not RC
      }
      Node *cmp = bol->in(1);
-      Node *rc_exp = cmp->in(1);
-      Node *limit = cmp->in(2);

      if (provisional) {
        // Try to pattern match with either cmp inputs, do not check
        // whether one of the inputs is loop independent as it may not
        // have had a chance to be hoisted yet.
-        if (!phase->is_scaled_iv_plus_offset(cmp->in(1), trip_counter, NULL, NULL, bt) &&
-            !phase->is_scaled_iv_plus_offset(cmp->in(2), trip_counter, NULL, NULL, bt)) {
+        if (!phase->is_scaled_iv_plus_offset(cmp->in(1), trip_counter, bt, NULL, NULL) &&
+            !phase->is_scaled_iv_plus_offset(cmp->in(2), trip_counter, bt, NULL, NULL)) {
          continue;
        }
      } else {
+        Node *rc_exp = cmp->in(1);
+        Node *limit = cmp->in(2);
        Node *limit_c = phase->get_ctrl(limit);
        if (limit_c == phase->C->top()) {
          return false;           // Found dead test on live IF?  No RCE!
@ -1147,7 +1147,7 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional,
          }
        }

-        if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL, bt)) {
+        if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, bt, NULL, NULL)) {
          continue;
        }
      }
@ -2522,59 +2522,202 @@ void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* off
  }
 }

+//----------------------------------is_iv------------------------------------
+// Return true if exp is the value (of type bt) of the given induction var.
+// This grammar of cases is recognized, where X is I|L according to bt:
+//    VIV[iv] = iv | (CastXX VIV[iv]) | (ConvI2X VIV[iv])
 bool PhaseIdealLoop::is_iv(Node* exp, Node* iv, BasicType bt) {
-  if (exp == iv) {
+  exp = exp->uncast();
+  if (exp == iv && iv->bottom_type()->isa_integer(bt)) {
    return true;
  }

-  if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L && exp->in(1) == iv) {
+  if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L && exp->in(1)->uncast() == iv) {
    return true;
  }
  return false;
 }

 //------------------------------is_scaled_iv---------------------------------
-// Return true if exp is a constant times an induction var
-bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted) {
-  exp = exp->uncast();
-  assert(bt == T_INT || bt == T_LONG, "unexpected int type");
-  if (is_iv(exp, iv, bt)) {
+// Return true if exp is a constant times the given induction var (of type bt).
+// The multiplication is either done in full precision (exactly of type bt),
+// or else bt is T_LONG but iv is scaled using 32-bit arithmetic followed by a ConvI2L.
+// This grammar of cases is recognized, where X is I|L according to bt:
+//    SIV[iv] = VIV[iv] | (CastXX SIV[iv])
+//            | (MulX VIV[iv] ConX) | (MulX ConX VIV[iv])
+//            | (LShiftX VIV[iv] ConI)
+//            | (ConvI2L SIV[iv])  -- a "short-scale" can occur here; note recursion
+//            | (SubX 0 SIV[iv])  -- same as MulX(iv, -scale); note recursion
+//    VIV[iv] = [either iv or its value converted; see is_iv() above]
+// On success, the constant scale value is stored back to *p_scale.
+// The value (*p_short_scale) reports if such a ConvI2L conversion was present.
+bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, BasicType bt, jlong* p_scale, bool* p_short_scale, int depth) {
+  BasicType exp_bt = bt;
+  exp = exp->uncast();  //strip casts
+  assert(exp_bt == T_INT || exp_bt == T_LONG, "unexpected int type");
+  if (is_iv(exp, iv, exp_bt)) {
    if (p_scale != NULL) {
      *p_scale = 1;
    }
+    if (p_short_scale != NULL) {
+      *p_short_scale = false;
+    }
    return true;
  }
-  if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L) {
+  if (exp_bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L) {
    exp = exp->in(1);
-    bt = T_INT;
-    if (converted != NULL) {
-      *converted = true;
-    }
+    exp_bt = T_INT;
  }
  int opc = exp->Opcode();
+  int which = 0;  // this is which subexpression we find the iv in
  // Can't use is_Mul() here as it's true for AndI and AndL
-  if (opc == Op_Mul(bt)) {
-    if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
+  if (opc == Op_Mul(exp_bt)) {
+    if ((is_iv(exp->in(which = 1), iv, exp_bt) && exp->in(2)->is_Con()) ||
+        (is_iv(exp->in(which = 2), iv, exp_bt) && exp->in(1)->is_Con())) {
+      Node* factor = exp->in(which == 1 ? 2 : 1);  // the other argument
+      jlong scale = factor->find_integer_as_long(exp_bt, 0);
+      if (scale == 0) {
+        return false;  // might be top
+      }
      if (p_scale != NULL) {
-        *p_scale = exp->in(2)->get_integer_as_long(bt);
+        *p_scale = scale;
+      }
+      if (p_short_scale != NULL) {
+        // (ConvI2L (MulI iv K)) can be 64-bit linear if iv is kept small enough...
+        *p_short_scale = (exp_bt != bt && scale != 1);
      }
      return true;
    }
-    if (is_iv(exp->in(2)->uncast(), iv, bt) && exp->in(1)->is_Con()) {
+  } else if (opc == Op_LShift(exp_bt)) {
+    if (is_iv(exp->in(1), iv, exp_bt) && exp->in(2)->is_Con()) {
+      jint shift_amount = exp->in(2)->find_int_con(min_jint);
+      if (shift_amount == min_jint) {
+        return false;  // might be top
+      }
+      jlong scale;
+      if (exp_bt == T_INT) {
+        scale = java_shift_left((jint)1, (juint)shift_amount);
+      } else if (exp_bt == T_LONG) {
+        scale = java_shift_left((jlong)1, (julong)shift_amount);
+      }
      if (p_scale != NULL) {
-        *p_scale = exp->in(1)->get_integer_as_long(bt);
+        *p_scale = scale;
+      }
+      if (p_short_scale != NULL) {
+        // (ConvI2L (MulI iv K)) can be 64-bit linear if iv is kept small enough...
+        *p_short_scale = (exp_bt != bt && scale != 1);
      }
      return true;
    }
-  } else if (opc == Op_LShift(bt)) {
-    if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
+  } else if (opc == Op_Sub(exp_bt) &&
+             exp->in(1)->find_integer_as_long(exp_bt, -1) == 0) {
+    jlong scale = 0;
+    if (depth == 0 && is_scaled_iv(exp->in(2), iv, exp_bt, &scale, p_short_scale, depth + 1)) {
+      // SubX(0, iv*K) => iv*(-K)
+      if (scale == min_signed_integer(exp_bt)) {
+        // This should work even if -K overflows, but let's not.
+        return false;
+      }
+      scale = java_multiply(scale, (jlong)-1);
      if (p_scale != NULL) {
-        jint shift_amount = exp->in(2)->get_int();
-        if (bt == T_INT) {
-          *p_scale = java_shift_left((jint)1, (juint)shift_amount);
-        } else if (bt == T_LONG) {
-          *p_scale = java_shift_left((jlong)1, (julong)shift_amount);
+        *p_scale = scale;
+      }
+      if (p_short_scale != NULL) {
+        // (ConvI2L (MulI iv K)) can be 64-bit linear if iv is kept small enough...
+        *p_short_scale = *p_short_scale || (exp_bt != bt && scale != 1);
+      }
+      return true;
+    }
+  }
+  // We could also recognize (iv*K1)*K2, even with overflow, but let's not.
+  return false;
+}
+
+//-------------------------is_scaled_iv_plus_offset--------------------------
+// Return true if exp is a simple linear transform of the given induction var.
+// The scale must be constant and the addition tree (if any) must be simple.
+// This grammar of cases is recognized, where X is I|L according to bt:
+//
+//    OIV[iv] = SIV[iv] | (CastXX OIV[iv])
+//            | (AddX SIV[iv] E) | (AddX E SIV[iv])
+//            | (SubX SIV[iv] E) | (SubX E SIV[iv])
+//    SSIV[iv] = (ConvI2X SIV[iv])  -- a "short scale" might occur here
+//    SIV[iv] = [a possibly scaled value of iv; see is_scaled_iv() above]
+//
+// On success, the constant scale value is stored back to *p_scale unless null.
+// Likewise, the addend (perhaps a synthetic AddX node) is stored to *p_offset.
+// Also, (*p_short_scale) reports if a ConvI2L conversion was seen after a MulI,
+// meaning bt is T_LONG but iv was scaled using 32-bit arithmetic.
+// To avoid looping, the match is depth-limited, and so may fail to match the grammar to complex expressions.
+bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, BasicType bt, jlong* p_scale, Node** p_offset, bool* p_short_scale, int depth) {
+  assert(bt == T_INT || bt == T_LONG, "unexpected int type");
+  jlong scale = 0;  // to catch result from is_scaled_iv()
+  BasicType exp_bt = bt;
+  exp = exp->uncast();
+  if (is_scaled_iv(exp, iv, exp_bt, &scale, p_short_scale)) {
+    if (p_scale != NULL) {
+      *p_scale = scale;
+    }
+    if (p_offset != NULL) {
+      Node *zero = _igvn.zerocon(bt);
+      set_ctrl(zero, C->root());
+      *p_offset = zero;
+    }
+    return true;
+  }
+  if (exp_bt != bt) {
+    // We would now be matching inputs like (ConvI2L exp:(AddI (MulI iv S) E)).
+    // It's hard to make 32-bit arithmetic linear if it overflows.  Although we do
+    // cope with overflowing multiplication by S, it would be even more work to
+    // handle overflowing addition of E.  So we bail out here on ConvI2L input.
+    return false;
+  }
+  int opc = exp->Opcode();
+  int which = 0;  // this is which subexpression we find the iv in
+  Node* offset = NULL;
+  if (opc == Op_Add(exp_bt)) {
+    // Check for a scaled IV in (AddX (MulX iv S) E) or (AddX E (MulX iv S)).
+    if (is_scaled_iv(exp->in(which = 1), iv, bt, &scale, p_short_scale) ||
+        is_scaled_iv(exp->in(which = 2), iv, bt, &scale, p_short_scale)) {
+      offset = exp->in(which == 1 ? 2 : 1);  // the other argument
+      if (p_scale != NULL) {
+        *p_scale = scale;
+      }
+      if (p_offset != NULL) {
+        *p_offset = offset;
+      }
+      return true;
+    }
+    // Check for more addends, like (AddX (AddX (MulX iv S) E1) E2), etc.
+    if (is_scaled_iv_plus_extra_offset(exp->in(1), exp->in(2), iv, bt, p_scale, p_offset, p_short_scale, depth) ||
+        is_scaled_iv_plus_extra_offset(exp->in(2), exp->in(1), iv, bt, p_scale, p_offset, p_short_scale, depth)) {
+      return true;
+    }
+  } else if (opc == Op_Sub(exp_bt)) {
+    if (is_scaled_iv(exp->in(which = 1), iv, bt, &scale, p_short_scale) ||
+        is_scaled_iv(exp->in(which = 2), iv, bt, &scale, p_short_scale)) {
+      // Match (SubX SIV[iv] E) as if (AddX SIV[iv] (SubX 0 E)), and
+      // match (SubX E SIV[iv]) as if (AddX E (SubX 0 SIV[iv])).
+      offset = exp->in(which == 1 ? 2 : 1);  // the other argument
+      if (which == 2) {
+        // We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
+        if (scale == min_signed_integer(bt)) {
+          return false;   // cannot negate the scale of the iv
        }
+        scale = java_multiply(scale, (jlong)-1);
+      }
+      if (p_scale != NULL) {
+        *p_scale = scale;
+      }
+      if (p_offset != NULL) {
+        if (which == 1) {  // must negate the extracted offset
+          Node *zero = _igvn.integercon(0, exp_bt);
+          set_ctrl(zero, C->root());
+          Node *ctrl_off = get_ctrl(offset);
+          offset = SubNode::make(zero, offset, exp_bt);
+          register_new_node(offset, ctrl_off);
+        }
+        *p_offset = offset;
      }
      return true;
    }
@ -2582,71 +2725,30 @@ bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType
  return false;
 }

-//-----------------------------is_scaled_iv_plus_offset------------------------------
-// Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
-bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted, int depth) {
-  assert(bt == T_INT || bt == T_LONG, "unexpected int type");
-  if (is_scaled_iv(exp, iv, p_scale, bt, converted)) {
+// Helper for is_scaled_iv_plus_offset(), not called separately.
+// The caller encountered (AddX exp1 offset3) or (AddX offset3 exp1).
+// Here, exp1 is inspected to see if it is a simple linear transform of iv.
+// If so, the offset3 is combined with any other offset2 from inside exp1.
+bool PhaseIdealLoop::is_scaled_iv_plus_extra_offset(Node* exp1, Node* offset3, Node* iv,
+                                                    BasicType bt,
+                                                    jlong* p_scale, Node** p_offset,
+                                                    bool* p_short_scale, int depth) {
+  // By the time we reach here, it is unlikely that exp1 is a simple iv*K.
+  // If is a linear iv transform, it is probably an add or subtract.
+  // Let's collect the internal offset2 from it.
+  Node* offset2 = NULL;
+  if (offset3->is_Con() &&
+      depth < 2 &&
+      is_scaled_iv_plus_offset(exp1, iv, bt, p_scale,
+                               &offset2, p_short_scale, depth+1)) {
    if (p_offset != NULL) {
-      Node *zero = _igvn.integercon(0, bt);
-      set_ctrl(zero, C->root());
-      *p_offset = zero;
+      Node* ctrl_off2 = get_ctrl(offset2);
+      Node* offset = AddNode::make(offset2, offset3, bt);
+      register_new_node(offset, ctrl_off2);
+      *p_offset = offset;
    }
    return true;
  }
-  exp = exp->uncast();
-  int opc = exp->Opcode();
-  if (opc == Op_Add(bt)) {
-    if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
-      if (p_offset != NULL) {
-        *p_offset = exp->in(2);
-      }
-      return true;
-    }
-    if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
-      if (p_offset != NULL) {
-        *p_offset = exp->in(1);
-      }
-      return true;
-    }
-    if (exp->in(2)->is_Con()) {
-      Node* offset2 = NULL;
-      if (depth < 2 &&
-          is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
-                                   p_offset != NULL ? &offset2 : NULL, bt, converted, depth+1)) {
-        if (p_offset != NULL) {
-          Node *ctrl_off2 = get_ctrl(offset2);
-          Node* offset = AddNode::make(offset2, exp->in(2), bt);
-          register_new_node(offset, ctrl_off2);
-          *p_offset = offset;
-        }
-        return true;
-      }
-    }
-  } else if (opc == Op_Sub(bt)) {
-    if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
-      if (p_offset != NULL) {
-        Node *zero = _igvn.integercon(0, bt);
-        set_ctrl(zero, C->root());
-        Node *ctrl_off = get_ctrl(exp->in(2));
-        Node* offset = SubNode::make(zero, exp->in(2), bt);
-        register_new_node(offset, ctrl_off);
-        *p_offset = offset;
-      }
-      return true;
-    }
-    if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
-      if (p_offset != NULL) {
-        // We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
-        if (*p_scale == min_signed_integer(bt)) {
-          return false;
-        }
-        *p_scale *= -1;
-        *p_offset = exp->in(1);
-      }
-      return true;
-    }
-  }
  return false;
 }

--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -1122,9 +1122,6 @@ void PhaseIdealLoop::strip_mined_nest_back_to_counted_loop(IdealLoopTree* loop,

 int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong stride_con, int iters_limit, PhiNode* phi,
                                              Node_List& range_checks) {
-  if (stride_con < 0) { // only for stride_con > 0 && scale > 0 for now
-    return iters_limit;
-  }
  const jlong min_iters = 2;
  jlong reduced_iters_limit = iters_limit;
  jlong original_iters_limit = iters_limit;
@ -1139,7 +1136,6 @@ int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong s
        RangeCheckNode* rc = c->in(0)->as_RangeCheck();
        if (loop->is_range_check_if(rc, this, T_LONG, phi, range, offset, scale) &&
            loop->is_invariant(range) && loop->is_invariant(offset) &&
-            scale > 0 && // only for stride_con > 0 && scale > 0 for now
            original_iters_limit / ABS(scale * stride_con) >= min_iters) {
          reduced_iters_limit = MIN2(reduced_iters_limit, original_iters_limit/ABS(scale));
          range_checks.push(c);
@ -1154,27 +1150,29 @@ int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong s
 // One execution of the inner loop covers a sub-range of the entire iteration range of the loop: [A,Z), aka [A=init,
 // Z=limit). If the loop has at least one trip (which is the case here), the iteration variable i always takes A as its
 // first value, followed by A+S (S is the stride), next A+2S, etc. The limit is exclusive, so that the final value B of
-// i is never Z.  It will be B=Z-1 if S=1, or B=Z+1 if S=-1.  If |S|>1 the formula for the last value requires a floor
-// operation, specifically B=floor((Z-sgn(S)-A)/S)*S+A.  Thus i ranges as i:[A,B] or i:[A,Z) or i:[A,Z-U) for some U<S.
+// i is never Z.  It will be B=Z-1 if S=1, or B=Z+1 if S=-1.

-// N.B. We handle only the case of positive S currently, so comments about S<0 are not operative at present.  Also,
-// we only support positive index scale value (K > 0) to simplify the logic for clamping 32-bit bounds (L_2, R_2).
-// For restrictions on S and K, see the guards in extract_long_range_checks.
+// If |S|>1 the formula for the last value B would require a floor operation, specifically B=floor((Z-sgn(S)-A)/S)*S+A,
+// which is B=Z-sgn(S)U for some U in [1,|S|].  So when S>0, i ranges as i:[A,Z) or i:[A,B=Z-U], or else (in reverse)
+// as i:(Z,A] or i:[B=Z+U,A].  It will become important to reason about this inclusive range [A,B] or [B,A].

 // Within the loop there may be many range checks.  Each such range check (R.C.) is of the form 0 <= i*K+L < R, where K
 // is a scale factor applied to the loop iteration variable i, and L is some offset; K, L, and R are loop-invariant.
-// Because R is never negative, this check can always be simplified to an unsigned check i*K+L <u R.
+// Because R is never negative (see below), this check can always be simplified to an unsigned check i*K+L <u R.

 // When a long loop over a 64-bit variable i (outer_iv) is decomposed into a series of shorter sub-loops over a 32-bit
-// variable j (inner_iv), j ranges over a shorter interval j:[0,Z_2), where the limit is chosen to prevent various cases
-// of 32-bit overflow (including multiplications j*K below).  In the sub-loop the logical value i is offset from j by a
-// 64-bit constant C, so i ranges in i:C+[0,Z_2).
+// variable j (inner_iv), j ranges over a shorter interval j:[0,B_2] or [0,Z_2) (assuming S > 0), where the limit is
+// chosen to prevent various cases of 32-bit overflow (including multiplications j*K below).  In the sub-loop the
+// logical value i is offset from j by a 64-bit constant C, so i ranges in i:C+[0,Z_2).

-// The union of all the C+[0,Z_2) ranges from the sub-loops must be identical to the whole range [A,B].  Assuming S>0,
-// the first C must be A itself, and the next C value is the previous C+Z_2.  In each sub-loop, j counts up from zero
-// and exits just before i=C+Z_2.
+// For S<0, j ranges (in reverse!) through j:[-|B_2|,0] or (-|Z_2|,0].  For either sign of S, we can say i=j+C and j
+// ranges through 32-bit ranges [A_2,B_2] or [B_2,A_2] (A_2=0 of course).

-// (N.B. If S<0 the formulas are different, because all the loops count downward.)
+// The disjoint union of all the C+[A_2,B_2] ranges from the sub-loops must be identical to the whole range [A,B].
+// Assuming S>0, the first C must be A itself, and the next C value is the previous C+B_2, plus S.  If |S|=1, the next
+// C value is also the previous C+Z_2.  In each sub-loop, j counts from j=A_2=0 and i counts from C+0 and exits at
+// j=B_2 (i=C+B_2), just before it gets to i=C+Z_2.  Both i and j count up (from C and 0) if S>0; otherwise they count
+// down (from C and 0 again).

 // Returning to range checks, we see that each i*K+L <u R expands to (C+j)*K+L <u R, or j*K+Q <u R, where Q=(C*K+L).
 // (Recall that K and L and R are loop-invariant scale, offset and range values for a particular R.C.)  This is still a
@ -1194,65 +1192,104 @@ int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong s

 // If 32-bit multiplication j*K might overflow, we adjust the sub-loop limit Z_2 closer to zero to reduce j's range.

-// For each R.C. j*K+Q <u32 R, the range of mathematical values of j*K+Q in the sub-loop is [Q_min, Q_max), where
-// Q_min=Q and Q_max=Z_2*K+Q.  Making the upper limit Q_max be exclusive helps it integrate correctly with the strict
-// comparisons against R and R_2.  Sometimes a very high R will be replaced by an R_2 derived from the more moderate
-// Q_max, and replacing one exclusive limit by another exclusive limit avoids off-by-one complexities.
+// For each R.C. j*K+Q <u32 R, the range of mathematical values of j*K+Q in the sub-loop is [Q_min, Q_max], where
+// Q_min=Q and Q_max=B_2*K+Q (if S>0 and K>0), Q_min=A_2*K+Q and Q_max=Q (if S<0 and K>0),
+// Q_min=B_2*K+Q and Q_max=Q if (S>0 and K<0), Q_min=Q and Q_max=A_2*K+Q (if S<0 and K<0)

-// N.B. If (S*K)<0 then the formulas for Q_min and Q_max may differ; the values may need to be swapped and adjusted to
-// the correct type of bound (inclusive or exclusive).
+// Note that the first R.C. value is always Q=(S*K>0 ? Q_min : Q_max).  Also Q_{min,max} = Q + {min,max}(A_2*K,B_2*K).
+// If S*K>0 then, as the loop iterations progress, each R.C. value i*K+L = j*K+Q goes up from Q=Q_min towards Q_max.
+// If S*K<0 then j*K+Q starts at Q=Q_max and goes down towards Q_min.

 // Case A: Some Negatives (but no overflow).
 // Number line:
 // |s64_min   .    .    .    0    .    .    .   s64_max|
 // |    .  Q_min..Q_max .    0    .    .    .     .    |  s64 negative
+// |    .     .    .    .    R=0  R<   R<   R<    R<   |  (against R values)
 // |    .     .    .  Q_min..0..Q_max  .    .     .    |  small mixed
+// |    .     .    .    .    R    R    R<   R<    R<   |  (against R values)
 //
-// if Q_min <s64 0, then use this test:
-// j*K + s32_trunc(Q_min) <u32 clamp(R, 0, Q_max)
+// R values which are out of range (>Q_max+1) are reduced to max(0,Q_max+1).  They are marked on the number line as R<.
+//
+// So, if Q_min <s64 0, then use this test:
+// j*K + s32_trunc(Q_min) <u32 clamp(R, 0, Q_max+1) if S*K>0 (R.C.E. steps upward)
+// j*K + s32_trunc(Q_max) <u32 clamp(R, 0, Q_max+1) if S*K<0 (R.C.E. steps downward)
+// Both formulas reduce to adding j*K to the 32-bit truncated value of the first R.C. expression value, Q:
+// j*K + s32_trunc(Q) <u32 clamp(R, 0, Q_max+1) for all S,K

-// If the 32-bit truncation loses information, no harm is done, since certainly the clamp also returns R_2=zero.
+// If the 32-bit truncation loses information, no harm is done, since certainly the clamp also will return R_2=zero.

 // Case B: No Negatives.
 // Number line:
 // |s64_min   .    .    .    0    .    .    .   s64_max|
 // |    .     .    .    .    0 Q_min..Q_max .     .    |  small positive
+// |    .     .    .    .    R>   R    R    R<    R<   |  (against R values)
 // |    .     .    .    .    0    . Q_min..Q_max  .    |  s64 positive
+// |    .     .    .    .    R>   R>   R    R     R<   |  (against R values)
 //
-// if both Q_min, Q_max >=s64 0, then use this test:
-// j*K + 0 <u32 clamp(R, Q_min, Q_max) - Q_min
-// or equivalently:
-// j*K + 0 <u32 clamp(R - Q_min, 0, Q_max - Q_min)
+// R values which are out of range (<Q_min or >Q_max+1) are reduced as marked: R> up to Q_min, R< down to Q_max+1.
+// Then the whole comparison is shifted left by Q_min, so it can take place at zero, which is a nice 32-bit value.
+//
+// So, if both Q_min, Q_max+1 >=s64 0, then use this test:
+// j*K + 0         <u32 clamp(R, Q_min, Q_max+1) - Q_min if S*K>0
+// More generally:
+// j*K + Q - Q_min <u32 clamp(R, Q_min, Q_max+1) - Q_min for all S,K

 // Case C: Overflow in the 64-bit domain
 // Number line:
 // |..Q_max-2^64   .    .    0    .    .    .   Q_min..|  s64 overflow
+// |    .     .    .    .    R>   R>   R>   R>    R    |  (against R values)
 //
-// if Q_min >=s64 0 but Q_max <s64 0, then use this test:
-// j*K + 0 <u32 clamp(R, Q_min, R) - Q_min
-// or equivalently:
-// j*K + 0 <u32 clamp(R - Q_min, 0, R - Q_min)
-// or also equivalently:
-// j*K + 0 <u32 max(0, R - Q_min)
+// In this case, Q_min >s64 Q_max+1, even though the mathematical values of Q_min and Q_max+1 are correctly ordered.
+// The formulas from the previous case can be used, except that the bad upper bound Q_max is replaced by max_jlong.
+// (In fact, we could use any replacement bound from R to max_jlong inclusive, as the input to the clamp function.)
 //
-// Here the clamp function is a simple 64-bit min/max:
-// clamp(X, L, H) := max(L, min(X, H))
+// So if Q_min >=s64 0 but Q_max+1 <s64 0, use this test:
+// j*K + 0         <u32 clamp(R, Q_min, max_jlong) - Q_min if S*K>0
+// More generally:
+// j*K + Q - Q_min <u32 clamp(R, Q_min, max_jlong) - Q_min for all S,K
+//
+// Dropping the bad bound means only Q_min is used to reduce the range of R:
+// j*K + Q - Q_min <u32 max(Q_min, R) - Q_min for all S,K
+//
+// Here the clamp function is a 64-bit min/max that reduces the dynamic range of its R operand to the required [L,H]:
+//     clamp(X, L, H) := max(L, min(X, H))
 // When degenerately L > H, it returns L not H.
 //
-// Tests above can be merged into a single one:
-// L_clamp = Q_min < 0 ? 0 : Q_min
-// H_clamp = Q_max < Q_min ? R : Q_max
-// j*K + Q_min - L_clamp <u32 clamp(R, L_clamp, H_clamp) - L_clamp
-// or equivalently:
-// j*K + Q_min - L_clamp <u32 clamp(R - L_clamp, 0, H_clamp - L_clamp)
+// All of the formulas above can be merged into a single one:
+//     L_clamp = Q_min < 0 ? 0 : Q_min        --whether and how far to left-shift
+//     H_clamp = Q_max+1 < Q_min ? max_jlong : Q_max+1
+//             = Q_max+1 < 0 && Q_min >= 0 ? max_jlong : Q_max+1
+//     Q_first = Q = (S*K>0 ? Q_min : Q_max) = (C*K+L)
+//     R_clamp = clamp(R, L_clamp, H_clamp)   --reduced dynamic range
+//     replacement R.C.:
+//       j*K + Q_first - L_clamp <u32 R_clamp - L_clamp
+//     or equivalently:
+//       j*K + L_2 <u32 R_2
+//     where
+//       L_2 = Q_first - L_clamp
+//       R_2 = R_clamp - L_clamp
+//
+// Note on why R is never negative:
+//
+// Various details of this transformation would break badly if R could be negative, so this transformation only
+// operates after obtaining hard evidence that R<0 is impossible.  For example, if R comes from a LoadRange node, we
+// know R cannot be negative.  For explicit checks (of both int and long) a proof is constructed in
+// inline_preconditions_checkIndex, which triggers an uncommon trap if R<0, then wraps R in a ConstraintCastNode with a
+// non-negative type.  Later on, when IdealLoopTree::is_range_check_if looks for an optimizable R.C., it checks that
+// the type of that R node is non-negative.  Any "wild" R node that could be negative is not treated as an optimizable
+// R.C., but R values from a.length and inside checkIndex are good to go.
 //
-// Readers may find the equivalent forms easier to reason about, but the forms given first generate better code.
-
 void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List &range_checks, Node* outer_phi,
                                                 Node* inner_iters_actual_int, Node* inner_phi,
                                                 Node* iv_add, LoopNode* inner_head) {
  Node* long_zero = _igvn.longcon(0);
  set_ctrl(long_zero, C->root());
+  Node* int_zero = _igvn.intcon(0);
+  set_ctrl(int_zero, this->C->root());
+  Node* long_one = _igvn.longcon(1);
+  set_ctrl(long_one, this->C->root());
+  Node* int_stride = _igvn.intcon(checked_cast<int>(stride_con));
+  set_ctrl(int_stride, this->C->root());

  for (uint i = 0; i < range_checks.size(); i++) {
    ProjNode* proj = range_checks.at(i)->as_Proj();
@ -1266,8 +1303,8 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
      // could be shared and have already been taken care of
      continue;
    }
-    bool converted = false;
-    bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, &scale, &offset, T_LONG, &converted);
+    bool short_scale = false;
+    bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, T_LONG, &scale, &offset, &short_scale);
    assert(ok, "inconsistent: was tested before");
    Node* range = rc_cmp->in(2);
    Node* c = rc->in(0);
@ -1279,33 +1316,33 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List

    Node* L = offset;

-    if (converted) {
+    if (short_scale) {
      // This converts:
-      // i*K + L <u64 R
+      // (int)i*K + L <u64 R
      // with K an int into:
      // i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
-      // to protect against an overflow of i*K
+      // to protect against an overflow of (int)i*K
      //
-      // Because if i*K overflows, there are K,L where:
-      // i*K + L <u64 R is false
-      // when
-      // i*(long)K is > (long)max_jint and < R
-      // and so i*(long)K + L <u64 R is true
-      // As a consequence simply converting:
-      // i*K + L <u64 R to i*(long)K + L <u64 R could cause incorrect execution
+      // Because if (int)i*K overflows, there are K,L where:
+      // (int)i*K + L <u64 R is false because (int)i*K+L overflows to a negative which becomes a huge u64 value.
+      // But if i*(long)K + L is >u64 (long)max_jint and still is <u64 R, then
+      // i*(long)K + L <u64 R is true.
+      //
+      // As a consequence simply converting i*K + L <u64 R to i*(long)K + L <u64 R could cause incorrect execution.
      //
      // It's always true that:
-      // i*K <u64 (long)max_jint + 1
-      // which implies i*K + L <u64 (long)max_jint + 1 + L
+      // (int)i*K <u64 (long)max_jint + 1
+      // which implies (int)i*K + L <u64 (long)max_jint + 1 + L
      // As a consequence:
      // i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
      // is always false in case of overflow of i*K
      //
-      // Note, there are K,L where i*K overflows and
+      // Note, there are also K,L where i*K overflows and
      // i*K + L <u64 R is true, but
      // i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R) is false
      // So this transformation could cause spurious deoptimizations and failed range check elimination
-      // (but not incorrect execution) for unlikely corner cases with overflow
+      // (but not incorrect execution) for unlikely corner cases with overflow.
+      // If this causes problems in practice, we could maybe direct excution to a post-loop, instead of deoptimizing.
      Node* max_jint_plus_one_long = _igvn.longcon((jlong)max_jint + 1);
      set_ctrl(max_jint_plus_one_long, C->root());
      Node* max_range = new AddLNode(max_jint_plus_one_long, L);
@ -1315,26 +1352,38 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
    }

    Node* C = outer_phi;
-    Node* Z_2 = new ConvI2LNode(inner_iters_actual_int, TypeLong::LONG);
-    register_new_node(Z_2, entry_control);

    // Start with 64-bit values:
    //   i*K + L <u64 R
    //   (C+j)*K + L <u64 R
-    //   j*K + L_2 <u64 R    where L_2 = C*K+L
-    Node* L_2 = new MulLNode(C, K);
-    register_new_node(L_2, entry_control);
-    L_2 = new AddLNode(L_2, L);
-    register_new_node(L_2, entry_control);
+    //   j*K + Q <u64 R    where Q = Q_first = C*K+L
+    Node* Q_first = new MulLNode(C, K);
+    register_new_node(Q_first, entry_control);
+    Q_first = new AddLNode(Q_first, L);
+    register_new_node(Q_first, entry_control);

-    // Compute endpoints of the range of values j*K.
-    //  Q_min = (j=0)*K + L_2;  Q_max = (j=Z_2)*K + L_2
-    Node* Q_min = L_2;
-    Node* Q_max = new MulLNode(Z_2, K);
+    // Compute endpoints of the range of values j*K + Q.
+    //  Q_min = (j=0)*K + Q;  Q_max = (j=B_2)*K + Q
+    Node* Q_min = Q_first;
+
+    // Compute the exact ending value B_2 (which is really A_2 if S < 0)
+    Node* B_2 = new LoopLimitNode(this->C, int_zero, inner_iters_actual_int, int_stride);
+    register_new_node(B_2, entry_control);
+    B_2 = new SubINode(B_2, int_stride);
+    register_new_node(B_2, entry_control);
+    B_2 = new ConvI2LNode(B_2);
+    register_new_node(B_2, entry_control);
+
+    Node* Q_max = new MulLNode(B_2, K);
    register_new_node(Q_max, entry_control);
-    Q_max = new AddLNode(Q_max, L_2);
+    Q_max = new AddLNode(Q_max, Q_first);
    register_new_node(Q_max, entry_control);

+    if (scale * stride_con < 0) {
+      swap(Q_min, Q_max);
+    }
+    // Now, mathematically, Q_max > Q_min, and they are close enough so that (Q_max-Q_min) fits in 32 bits.
+
    // L_clamp = Q_min < 0 ? 0 : Q_min
    Node* Q_min_cmp = new CmpLNode(Q_min, long_zero);
    register_new_node(Q_min_cmp, entry_control);
@ -1342,38 +1391,53 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
    register_new_node(Q_min_bool, entry_control);
    Node* L_clamp = new CMoveLNode(Q_min_bool, Q_min, long_zero, TypeLong::LONG);
    register_new_node(L_clamp, entry_control);
+    // (This could also be coded bitwise as L_clamp = Q_min & ~(Q_min>>63).)

-    // H_clamp = Q_max < Q_min ? R : Q_max
-    Node* Q_max_cmp = new CmpLNode(Q_max, Q_min);
+    Node* Q_max_plus_one = new AddLNode(Q_max, long_one);
+    register_new_node(Q_max_plus_one, entry_control);
+
+    // H_clamp = Q_max+1 < Q_min ? max_jlong : Q_max+1
+    // (Because Q_min and Q_max are close, the overflow check could also be encoded as Q_max+1 < 0 & Q_min >= 0.)
+    Node* max_jlong_long = _igvn.longcon(max_jlong);
+    set_ctrl(max_jlong_long, this->C->root());
+    Node* Q_max_cmp = new CmpLNode(Q_max_plus_one, Q_min);
    register_new_node(Q_max_cmp, entry_control);
    Node* Q_max_bool = new BoolNode(Q_max_cmp, BoolTest::lt);
    register_new_node(Q_max_bool, entry_control);
-    Node* H_clamp = new CMoveLNode(Q_max_bool, Q_max, R, TypeLong::LONG);
+    Node* H_clamp = new CMoveLNode(Q_max_bool, Q_max_plus_one, max_jlong_long, TypeLong::LONG);
    register_new_node(H_clamp, entry_control);
+    // (This could also be coded bitwise as H_clamp = ((Q_max+1)<<1 | M)>>>1 where M = (Q_max+1)>>63 & ~Q_min>>63.)

    // R_2 = clamp(R, L_clamp, H_clamp) - L_clamp
-    // that is: R_2 = clamp(R, L_clamp, H_clamp) if Q_min < 0
-    // or:      R_2 = clamp(R, L_clamp, H_clamp) - Q_min if Q_min > 0
+    // that is:  R_2 = clamp(R, L_clamp=0, H_clamp=Q_max)      if Q_min < 0
+    // or else:  R_2 = clamp(R, L_clamp,   H_clamp) - Q_min    if Q_min >= 0
+    // and also: R_2 = clamp(R, L_clamp,   Q_max+1) - L_clamp  if Q_min < Q_max+1 (no overflow)
+    // or else:  R_2 = clamp(R, L_clamp, *no limit*)- L_clamp  if Q_max+1 < Q_min (overflow)
    Node* R_2 = clamp(R, L_clamp, H_clamp);
    R_2 = new SubLNode(R_2, L_clamp);
    register_new_node(R_2, entry_control);
    R_2 = new ConvL2INode(R_2, TypeInt::POS);
    register_new_node(R_2, entry_control);

-    // Q = Q_min - L_clamp
-    // that is: Q = Q_min - 0 if Q_min < 0
-    // or:      Q = Q_min - Q_min = 0 if Q_min > 0
-    Node* Q = new SubLNode(Q_min, L_clamp);
-    register_new_node(Q, entry_control);
-    Q = new ConvL2INode(Q, TypeInt::INT);
-    register_new_node(Q, entry_control);
+    // L_2 = Q_first - L_clamp
+    // We are subtracting L_clamp from both sides of the <u32 comparison.
+    // If S*K>0, then Q_first == 0 and the R.C. expression at -L_clamp and steps upward to Q_max-L_clamp.
+    // If S*K<0, then Q_first != 0 and the R.C. expression starts high and steps downward to Q_min-L_clamp.
+    Node* L_2 = new SubLNode(Q_first, L_clamp);
+    register_new_node(L_2, entry_control);
+    L_2 = new ConvL2INode(L_2, TypeInt::INT);
+    register_new_node(L_2, entry_control);

-    // Transform the range check
+    // Transform the range check using the computed values L_2/R_2
+    // from:   i*K + L   <u64 R
+    // to:     j*K + L_2 <u32 R_2
+    // that is:
+    //   (j*K + Q_first) - L_clamp <u32 clamp(R, L_clamp, H_clamp) - L_clamp
    K = _igvn.intcon(checked_cast<int>(scale));
    set_ctrl(K, this->C->root());
    Node* scaled_iv = new MulINode(inner_phi, K);
    register_new_node(scaled_iv, c);
-    Node* scaled_iv_plus_offset = scaled_iv_plus_offset = new AddINode(scaled_iv, Q);
+    Node* scaled_iv_plus_offset = scaled_iv_plus_offset = new AddINode(scaled_iv, L_2);
    register_new_node(scaled_iv_plus_offset, c);

    Node* new_rc_cmp = new CmpUNode(scaled_iv_plus_offset, R_2);
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -1265,15 +1265,15 @@ public:
  void mark_reductions( IdealLoopTree *loop );

  // Return true if exp is a constant times an induction var
-  bool is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted);
+  bool is_scaled_iv(Node* exp, Node* iv, BasicType bt, jlong* p_scale, bool* p_short_scale, int depth = 0);

  bool is_iv(Node* exp, Node* iv, BasicType bt);

  // Return true if exp is a scaled induction var plus (or minus) constant
-  bool is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted = NULL, int depth = 0);
+  bool is_scaled_iv_plus_offset(Node* exp, Node* iv, BasicType bt, jlong* p_scale, Node** p_offset, bool* p_short_scale = NULL, int depth = 0);
  bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset) {
    jlong long_scale;
-    if (is_scaled_iv_plus_offset(exp, iv, &long_scale, p_offset, T_INT)) {
+    if (is_scaled_iv_plus_offset(exp, iv, T_INT, &long_scale, p_offset)) {
      int int_scale = checked_cast<int>(long_scale);
      if (p_scale != NULL) {
        *p_scale = int_scale;
@ -1282,6 +1282,12 @@ public:
    }
    return false;
  }
+  // Helper for finding more complex matches to is_scaled_iv_plus_offset.
+  bool is_scaled_iv_plus_extra_offset(Node* exp1, Node* offset2, Node* iv,
+                                      BasicType bt,
+                                      jlong* p_scale, Node** p_offset,
+                                      bool* p_short_scale, int depth);
+

  // Enum to determine the action to be performed in create_new_if_for_predicate() when processing phis of UCT regions.
  enum class UnswitchingAction {
@ -1658,6 +1664,7 @@ public:

  void strip_mined_nest_back_to_counted_loop(IdealLoopTree* loop, const BaseCountedLoopNode* head, Node* back_control,
                                             IfNode*&exit_test, SafePointNode*&safepoint);
+
  void push_pinned_nodes_thru_region(IfNode* dom_if, Node* region);

  bool try_merge_identical_ifs(Node* n);
--- a/src/hotspot/share/opto/node.hpp
+++ b/src/hotspot/share/opto/node.hpp
@ -1153,7 +1153,12 @@ public:

  jlong get_integer_as_long(BasicType bt) const {
    const TypeInteger* t = find_integer_type(bt);
-    guarantee(t != NULL, "must be con");
+    guarantee(t != NULL && t->is_con(), "must be con");
+    return t->get_con_as_long(bt);
+  }
+  jlong find_integer_as_long(BasicType bt, jlong value_if_unknown) const {
+    const TypeInteger* t = find_integer_type(bt);
+    if (t == NULL || !t->is_con())  return value_if_unknown;
    return t->get_con_as_long(bt);
  }
  const TypePtr* get_ptr_type() const;
--- a/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestLongRangeChecks.java
@ -96,4 +96,150 @@ public class TestLongRangeChecks {
    private void testStridePosScalePosInIntLoop2_runner() {
        testStridePosScalePosInIntLoop2(0, 100, 200, 0);
    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(failOn = { IRNode.COUNTEDLOOP})
+    public static void testStrideNegScaleNeg(long start, long stop, long length, long offset) {
+        final long scale = -1;
+        final long stride = 1;
+        for (long i = stop; i > start; i -= stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStrideNegScaleNeg")
+    private void testStrideNegScaleNeg_runner() {
+        testStrideNegScaleNeg(0, 100, 100, 100);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(failOn = { IRNode.COUNTEDLOOP })
+    public static void testStrideNegScaleNegInIntLoop1(int start, int stop, long length, long offset) {
+        final long scale = -2;
+        final int stride = 1;
+
+        for (int i = stop; i > start; i -= stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStrideNegScaleNegInIntLoop1")
+    private void testStrideNegScaleNegInIntLoop1_runner() {
+        testStrideNegScaleNegInIntLoop1(0, 100, 200, 200);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(failOn = { IRNode.COUNTEDLOOP })
+    public static void testStrideNegScaleNegInIntLoop2(int start, int stop, long length, long offset) {
+        final int scale = -2;
+        final int stride = 1;
+
+        for (int i = stop; i > start; i -= stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStrideNegScaleNegInIntLoop2")
+    private void testStrideNegScaleNegInIntLoop2_runner() {
+        testStrideNegScaleNegInIntLoop2(0, 100, 200, 200);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(failOn = { IRNode.COUNTEDLOOP})
+    public static void testStrideNegScalePos(long start, long stop, long length, long offset) {
+        final long scale = 1;
+        final long stride = 1;
+        for (long i = stop-1; i >= start; i -= stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStrideNegScalePos")
+    private void testStrideNegScalePos_runner() {
+        testStrideNegScalePos(0, 100, 100, 0);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(failOn = { IRNode.COUNTEDLOOP })
+    public static void testStrideNegScalePosInIntLoop1(int start, int stop, long length, long offset) {
+        final long scale = 2;
+        final int stride = 1;
+        for (int i = stop-1; i >= start; i -= stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStrideNegScalePosInIntLoop1")
+    private void testStrideNegScalePosInIntLoop1_runner() {
+        testStrideNegScalePosInIntLoop1(0, 100, 200, 0);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1" })
+    @IR(failOn = { IRNode.COUNTEDLOOP })
+    public static void testStrideNegScalePosInIntLoop2(int start, int stop, long length, long offset) {
+        final int scale = 2;
+        final int stride = 1;
+        for (int i = stop-1; i >= start; i -= stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStrideNegScalePosInIntLoop2")
+    private void testStrideNegScalePosInIntLoop2_runner() {
+        testStrideNegScalePosInIntLoop1(0, 100, 200, 0);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(failOn = { IRNode.COUNTEDLOOP})
+    public static void testStridePosScaleNeg(long start, long stop, long length, long offset) {
+        final long scale = -1;
+        final long stride = 1;
+        for (long i = start; i < stop; i += stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStridePosScaleNeg")
+    private void testStridePosScaleNeg_runner() {
+        testStridePosScaleNeg(0, 100, 100, 99);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(failOn = { IRNode.COUNTEDLOOP})
+    public static void testStridePosScaleNegInIntLoop1(int start, int stop, long length, long offset) {
+        final long scale = -2;
+        final int stride = 1;
+        for (int i = start; i < stop; i += stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStridePosScaleNegInIntLoop1")
+    private void testStridePosScaleNegInIntLoop1_runner() {
+        testStridePosScaleNegInIntLoop1(0, 100, 200, 198);
+    }
+
+    @Test
+    @IR(counts = { IRNode.LOOP, "1"})
+    @IR(failOn = { IRNode.COUNTEDLOOP})
+    public static void testStridePosScaleNegInIntLoop2(int start, int stop, long length, long offset) {
+        final int scale = -2;
+        final int stride = 1;
+        for (int i = start; i < stop; i += stride) {
+            Objects.checkIndex(scale * i + offset, length);
+        }
+    }
+
+    @Run(test = "testStridePosScaleNegInIntLoop2")
+    private void testStridePosScaleNegInIntLoop2_runner() {
+        testStridePosScaleNegInIntLoop1(0, 100, 200, 198);
+    }
 }
--- a/test/hotspot/jtreg/compiler/rangechecks/TestLongRangeCheck.java
+++ b/test/hotspot/jtreg/compiler/rangechecks/TestLongRangeCheck.java
@ -127,6 +127,73 @@ public class TestLongRangeCheck {
        assertIsNotCompiled(m);
    }

+    private static void testOverflow(String method, long start, long stop, long length, long offset0, long offset1) throws Exception {
+        Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class);
+        m.invoke(null, start, stop, length, offset0);
+        compile(m);
+
+        m.invoke(null, start, stop, length, offset0);
+        assertIsCompiled(m);
+        try {
+            m.invoke(null, start, stop, length, offset1);
+            throw new RuntimeException("should have thrown");
+        } catch(InvocationTargetException e) {
+            if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
+                throw new RuntimeException("unexpected exception");
+            }
+        }
+        assertIsNotCompiled(m);
+    }
+
+    private static void testConditional(String method, long start, long stop, long length, long offset0, long offset1, long start1, long stop1) throws Exception {
+        Method m;
+
+        if (start1 != start) {
+            m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class, long.class, long.class);
+            m.invoke(null, start, stop, length, offset0, start, stop);
+            compile(m);
+
+            m.invoke(null, start, stop, length, offset0, start, stop);
+            assertIsCompiled(m);
+            try {
+                m.invoke(null, start, stop, length, offset1, start1-1, stop1);
+                throw new RuntimeException("should have thrown");
+            } catch(InvocationTargetException e) {
+                if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
+                    throw new RuntimeException("unexpected exception");
+                }
+            }
+            assertIsNotCompiled(m);
+        }
+
+        if (stop1 != stop) {
+            m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class, long.class, long.class);
+            m.invoke(null, start, stop, length, offset0, start, stop);
+            compile(m);
+
+            m.invoke(null, start, stop, length, offset0, start, stop);
+            assertIsCompiled(m);
+            try {
+                m.invoke(null, start, stop, length, offset1, start1, stop1+1);
+                throw new RuntimeException("should have thrown");
+            } catch(InvocationTargetException e) {
+                if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
+                    throw new RuntimeException("unexpected exception");
+                }
+            }
+            assertIsNotCompiled(m);
+        }
+
+        m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class, long.class, long.class);
+        m.invoke(null, start, stop, length, offset0, start, stop);
+        compile(m);
+
+        m.invoke(null, start, stop, length, offset0, start, stop);
+        assertIsCompiled(m);
+
+        m.invoke(null, start, stop, length, offset1, start1, stop1);
+        assertIsCompiled(m);
+    }

    public static void main(String[] args) throws Exception {

@ -157,42 +224,20 @@ public class TestLongRangeCheck {
        test("testStridePosNotOneScaleNeg", -v, v, v * 2, v-1);

        // offset causes overflow
-
-        {
-            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePos", long.class, long.class, long.class, long.class);
-            m.invoke(null, 0, 100, 100, 0);
-            compile(m);
-
-            m.invoke(null, 0, 100, 100, 0);
-            assertIsCompiled(m);
-            try {
-                m.invoke(null, 0, 100, 100, Long.MAX_VALUE - 50);
-                throw new RuntimeException("should have thrown");
-            } catch(InvocationTargetException e) {
-                if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
-                    throw new RuntimeException("unexpected exception");
-                }
-            }
-            assertIsNotCompiled(m);
-        }
+        testOverflow("testStridePosScalePos", 0, 100, 100, 0, Long.MAX_VALUE - 50);
+        testOverflow("testStrideNegScaleNeg", 0, 100, 100, 100, Long.MIN_VALUE + 50);
+        testOverflow("testStrideNegScalePos", 0, 100, 100, 0, Long.MAX_VALUE - 50);
+        testOverflow("testStridePosScaleNeg", 0, 100, 100, 99, Long.MIN_VALUE + 50);

        // no spurious deopt if the range check doesn't fail because not executed
-        {
-            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
-            m.invoke(null, 0, 100, 100, 0, 0, 100);
-            compile(m);
-
-            m.invoke(null, 0, 100, 100, -50, 50, 100);
-            assertIsCompiled(m);
-        }
-        {
-            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
-            m.invoke(null, 0, 100, 100, 0, 0, 100);
-            compile(m);
-
-            m.invoke(null, 0, 100, Long.MAX_VALUE, Long.MAX_VALUE - 50, 0, 50);
-            assertIsCompiled(m);
-        }
+        testConditional("testStridePosScalePosConditional", 0, 100, 100, 0, -50, 50, 100);
+        testConditional("testStridePosScalePosConditional", 0, 100, Long.MAX_VALUE, 0, Long.MAX_VALUE - 50, 0, 50);
+        testConditional("testStrideNegScaleNegConditional", 0, 100, 100, 100, 50, 0, 51);
+        testConditional("testStrideNegScaleNegConditional", 0, 100, Long.MAX_VALUE, 100, Long.MIN_VALUE + 50, 52, 100);
+        testConditional("testStrideNegScalePosConditional", 0, 100, 100, 0, -50, 50, 100);
+        testConditional("testStrideNegScalePosConditional", 0, 100, Long.MAX_VALUE, 100, Long.MAX_VALUE - 50, 0, 50);
+        testConditional("testStridePosScaleNegConditional", 0, 100, 100, 99, 50, 0, 51);
+        testConditional("testStridePosScaleNegConditional", 0, 100, Long.MAX_VALUE, 99, Long.MIN_VALUE + 50, 52, 100);

        test("testStridePosScalePosInIntLoop", 0, 100, 100, 0);

@ -221,40 +266,19 @@ public class TestLongRangeCheck {
        test("testStridePosNotOneScaleNegInIntLoop", -v, v, v * 4, 2 * v - 1);

        // offset causes overflow
-        {
-            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosInIntLoop", long.class, long.class, long.class, long.class);
-            m.invoke(null, 0, 100, 100, 0);
-            compile(m);
-
-            m.invoke(null, 0, 100, 100, 0);
-            assertIsCompiled(m);
-            try {
-                m.invoke(null, 0, 100, 100, Long.MAX_VALUE - 50);
-                throw new RuntimeException("should have thrown");
-            } catch(InvocationTargetException e) {
-                if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
-                    throw new RuntimeException("unexpected exception");
-                }
-            }
-            assertIsNotCompiled(m);
-        }
+        testOverflow("testStridePosScalePosInIntLoop", 0, 100, 100, 0, Long.MAX_VALUE - 50);
+        testOverflow("testStrideNegScaleNegInIntLoop", 0, 100, 100, 100, Long.MIN_VALUE + 50);
+        testOverflow("testStrideNegScalePosInIntLoop", 0, 100, 100, 0, Long.MAX_VALUE - 50);
+        testOverflow("testStridePosScaleNegInIntLoop", 0, 100, 100, 99, Long.MIN_VALUE + 50);
        // no spurious deopt if the range check doesn't fail because not executed
-        {
-            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
-            m.invoke(null, 0, 100, 100, 0, 0, 100);
-            compile(m);
-
-            m.invoke(null, 0, 100, 100, -50, 50, 100);
-            assertIsCompiled(m);
-        }
-        {
-            Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
-            m.invoke(null, 0, 100, 100, 0, 0, 100);
-            compile(m);
-
-            m.invoke(null, 0, 100, Long.MAX_VALUE, Long.MAX_VALUE - 50, 0, 50);
-            assertIsCompiled(m);
-        }
+        testConditional("testStridePosScalePosConditionalInIntLoop", 0, 100, 100, 0, -50, 50, 100);
+        testConditional("testStridePosScalePosConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 0, Long.MAX_VALUE - 50, 0, 50);
+        testConditional("testStrideNegScaleNegConditionalInIntLoop", 0, 100, 100, 100, 50, 0, 51);
+        testConditional("testStrideNegScaleNegConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 100, Long.MIN_VALUE + 50, 52, 100);
+        testConditional("testStrideNegScalePosConditionalInIntLoop", 0, 100, 100, 0, -50, 50, 100);
+        testConditional("testStrideNegScalePosConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 100, Long.MAX_VALUE - 50, 0, 50);
+        testConditional("testStridePosScaleNegConditionalInIntLoop", 0, 100, 100, 99, 50, 0, 51);
+        testConditional("testStridePosScaleNegConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 99, Long.MIN_VALUE + 50, 52, 100);

        test("testStridePosScalePosNotOneInIntLoop2", 0, 100, 1090, 0);

@ -411,6 +435,36 @@ public class TestLongRangeCheck {
        }
    }

+    public static void testStrideNegScaleNegConditional(long start, long stop, long length, long offset, long start2, long stop2) {
+        final long scale = -1;
+        final long stride = 1;
+        for (long i = stop; i > start; i -= stride) {
+            if (i >= start2 && i < stop2) {
+                Preconditions.checkIndex(scale * i + offset, length, null);
+            }
+        }
+    }
+
+    public static void testStrideNegScalePosConditional(long start, long stop, long length, long offset, long start2, long stop2) {
+        final long scale = 1;
+        final long stride = 1;
+        for (long i = stop-1; i >= start; i -= stride) {
+            if (i >= start2 && i < stop2) {
+                Preconditions.checkIndex(scale * i + offset, length, null);
+            }
+        }
+    }
+
+    public static void testStridePosScaleNegConditional(long start, long stop, long length, long offset, long start2, long stop2) {
+        final long scale = -1;
+        final long stride = 1;
+        for (long i = start; i < stop; i += stride) {
+            if (i >= start2 && i < stop2) {
+                Preconditions.checkIndex(scale * i + offset, length, null);
+            }
+        }
+    }
+
    private static void checkInputs(long... inputs) {
        for (int i = 0; i < inputs.length; i++) {
            if ((long)((int)inputs[i]) != inputs[i]) {
@ -529,7 +583,6 @@ public class TestLongRangeCheck {

    public static void testStridePosScalePosConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
        checkInputs(start, stop, start2, stop2);
-        Preconditions.checkIndex(0, length, null);
        final long scale = 1;
        final int stride = 1;
        for (int i = (int)start; i < (int)stop; i += stride) {
@ -539,6 +592,39 @@ public class TestLongRangeCheck {
        }
    }

+    public static void testStrideNegScaleNegConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
+        checkInputs(start, stop, start2, stop2);
+        final long scale = -1;
+        final int stride = 1;
+        for (int i = (int)stop; i > (int)start; i -= stride) {
+            if (i >= (int)start2 && i < (int)stop2) {
+                Preconditions.checkIndex(scale * i + offset, length, null);
+            }
+        }
+    }
+
+    public static void testStrideNegScalePosConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
+        checkInputs(start, stop, start2, stop2);
+        final long scale = 1;
+        final int stride = 1;
+        for (int i = (int)(stop-1); i >= (int)start; i -= stride) {
+            if (i >= (int)start2 && i < (int)stop2) {
+                Preconditions.checkIndex(scale * i + offset, length, null);
+            }
+        }
+    }
+
+    public static void testStridePosScaleNegConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
+        checkInputs(start, stop, start2, stop2);
+        final long scale = -1;
+        final int stride = 1;
+        for (int i = (int)start; i < (int)stop; i += stride) {
+            if (i >= (int)start2 && i < (int)stop2) {
+                Preconditions.checkIndex(scale * i + offset, length, null);
+            }
+        }
+    }
+
    public static void testStridePosScalePosNotOneInIntLoop2(long start, long stop, long length, long offset) {
        checkInputs(start, stop);
        final int scale = 11;