8346177: C2: optimize simple increment loops with loop-invariant strides

2026-03-28 16:50:10 +00:00 · 2026-03-27 14:07:01 +00:00 · 2026-03-27 14:07:01 +00:00 · 19bbd588ba
commit 19bbd588ba
parent 2994524025
5 changed files with 522 additions and 88 deletions
--- a/src/hotspot/share/opto/divnode.cpp
+++ b/src/hotspot/share/opto/divnode.cpp
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -600,6 +601,18 @@ static const IntegerType* compute_signed_div_type(const IntegerType* i1, const I
  return IntegerType::make(new_lo, new_hi, widen);
 }

+DivModIntegerNode* DivModIntegerNode::make(Node* c, Node* in1, Node* in2, BasicType bt) {
+  switch (bt) {
+    case T_INT:
+      return new DivINode(c, in1, in2);
+    case T_LONG:
+      return new DivLNode(c, in1, in2);
+    default:
+      fatal("Not implemented for %s", type2name(bt));
+  }
+  return nullptr;
+}
+
 //=============================================================================
 //------------------------------Identity---------------------------------------
 // If the divisor is 1, we are an identity on the dividend.
--- a/src/hotspot/share/opto/divnode.hpp
+++ b/src/hotspot/share/opto/divnode.hpp
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -42,6 +43,9 @@ private:
 protected:
  DivModIntegerNode(Node* c, Node* dividend, Node* divisor) : Node(c, dividend, divisor), _pinned(false) {}

+public:
+  static DivModIntegerNode* make(Node* c, Node* in1, Node* in2, BasicType bt);
+
 private:
  virtual uint size_of() const override { return sizeof(DivModIntegerNode); }
  virtual uint hash() const override { return Node::hash() + _pinned; }
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -4540,36 +4541,23 @@ bool PhaseIdealLoop::is_deleteable_safept(Node* sfpt) const {
 //
 //    int a = init2;
 //    for (int iv = init; iv < limit; iv += stride_con) {
-//      a += stride_con2;
+//      a += inc;   // or a -= inc; where inc is loop-invariant
 //    }
 //
 // and transforms it to:
 //
-//    int iv2 = init2
-//    int iv = init
-//    loop:
-//      if (iv >= limit) goto exit
-//      iv += stride_con
-//      iv2 = init2 + (iv - init) * (stride_con2 / stride_con)
-//      goto loop
-//    exit:
-//    ...
+//    a = init2 +/- ((iv - init) / stride_con) * inc
 //
-// Such transformation introduces more optimization opportunities. In this
-// particular example, the loop can be eliminated entirely given that
-// `stride_con2 / stride_con` is exact  (i.e., no remainder). Checks are in
-// place to only perform this optimization if such a division is exact. This
-// example will be transformed into its semantic equivalence:
-//
-//     int iv2 = (iv * stride_con2 / stride_con) + (init2 - (init * stride_con2 / stride_con))
-//
-// which corresponds to the structure of transformed subgraph.
+// The division (iv - init) / stride_con is always exact because at iteration
+// k the primary IV has value init + k * stride_con, so iv - init is always a
+// multiple of stride_con. For |stride_con| == 1 the division is elided; for
+// larger constants IGVN strength-reduces it into multiply-and-shift sequences.
+// Such transformation introduces more optimization opportunities, the loop
+// can often be eliminated.
 //
 // However, if there is a mismatch between types of the loop and the parallel
 // induction variable (e.g., a long-typed IV in an int-typed loop), type
-// conversions are required:
-//
-//     long iv2 = ((long) iv * stride_con2 / stride_con) + (init2 - ((long) init * stride_con2 / stride_con))
+// conversions are required.
 //
 void PhaseIdealLoop::replace_parallel_iv(IdealLoopTree *loop) {
  assert(loop->_head->is_CountedLoop(), "");
@ -4595,52 +4583,44 @@ void PhaseIdealLoop::replace_parallel_iv(IdealLoopTree *loop) {

    PhiNode* phi2 = out->as_Phi();
    Node* incr2 = phi2->in(LoopNode::LoopBackControl);
-    // Look for induction variables of the form:  X += constant
-    if (phi2->region() != loop->_head ||
-        incr2->req() != 3 ||
-        incr2->in(1)->uncast() != phi2 ||
-        incr2 == incr ||
-        (incr2->Opcode() != Op_AddI && incr2->Opcode() != Op_AddL) ||
-        !incr2->in(2)->is_Con()) {
+    if (phi2->region() != loop->_head || incr2->req() != 3 || incr2 == incr) {
      continue;
    }

-    if (incr2->in(1)->is_ConstraintCast() &&
-        !(incr2->in(1)->in(0)->is_IfProj() && incr2->in(1)->in(0)->in(0)->is_RangeCheck())) {
-      // Skip AddI->CastII->Phi case if CastII is not controlled by local RangeCheck
+    int opc = incr2->Opcode();
+    bool is_add = (opc == Op_AddI || opc == Op_AddL);
+    if (!is_add && opc != Op_SubI && opc != Op_SubL) {
      continue;
    }
-    // Check for parallel induction variable (parallel to trip counter)
-    // via an affine function.  In particular, count-down loops with
-    // count-up array indices are common. We only RCE references off
-    // the trip-counter, so we need to convert all these to trip-counter
-    // expressions.
+
+    // Determine which input is the phi (self-reference) and which is the
+    // increment value. For commutative Add, phi2 can be in either position.
+    // For non-commutative Sub, phi2 must be in(1).
+    int phi_idx, inc_idx;
+    if (incr2->in(1)->uncast() == phi2) {
+      phi_idx = 1; inc_idx = 2;
+    } else if (is_add && incr2->in(2)->uncast() == phi2) {
+      phi_idx = 2; inc_idx = 1;
+    } else {
+      continue;
+    }
+
+    if (incr2->in(phi_idx)->is_ConstraintCast() &&
+        !(incr2->in(phi_idx)->in(0)->is_IfProj() && incr2->in(phi_idx)->in(0)->in(0)->is_RangeCheck())) {
+      // Skip AddI/SubI->CastII->Phi case if CastII is not controlled by local RangeCheck
+      continue;
+    }
+
+    Node* inc_val = incr2->in(inc_idx);
+    if (!loop->is_invariant(inc_val)) {
+      continue;
+    }
+
+    // Determine the basic type of the increment (and the iv being incremented).
+    BasicType bt = (opc == Op_AddI || opc == Op_SubI) ? T_INT : T_LONG;
+
    Node* init2 = phi2->in(LoopNode::EntryControl);

-    // Determine the basic type of the stride constant (and the iv being incremented).
-    BasicType stride_con2_bt = incr2->Opcode() == Op_AddI ? T_INT : T_LONG;
-    jlong stride_con2 = incr2->in(2)->get_integer_as_long(stride_con2_bt);
-
-    // The ratio of the two strides cannot be represented as an int
-    // if stride_con2 is min_jint (or min_jlong, respectively) and
-    // stride_con is -1.
-    if (stride_con2 == min_signed_integer(stride_con2_bt) && stride_con == -1) {
-      continue;
-    }
-
-    // The general case here gets a little tricky.  We want to find the
-    // GCD of all possible parallel IV's and make a new IV using this
-    // GCD for the loop.  Then all possible IVs are simple multiples of
-    // the GCD.  In practice, this will cover very few extra loops.
-    // Instead we require 'stride_con2' to be a multiple of 'stride_con',
-    // where +/-1 is the common case, but other integer multiples are
-    // also easy to handle.
-    jlong ratio_con = stride_con2 / stride_con;
-
-    if ((ratio_con * stride_con) != stride_con2) { // Check for exact (no remainder)
-        continue;
-    }
-
 #ifndef PRODUCT
    if (TraceLoopOpts) {
      tty->print("Parallel IV: %d ", phi2->_idx);
@ -4648,35 +4628,45 @@ void PhaseIdealLoop::replace_parallel_iv(IdealLoopTree *loop) {
    }
 #endif

-    // Convert to using the trip counter.  The parallel induction
-    // variable differs from the trip counter by a loop-invariant
-    // amount, the difference between their respective initial values.
-    // It is scaled by the 'ratio_con'.
-    Node* ratio = integercon(ratio_con, stride_con2_bt);
+    // Transform: phi2 = init2 +/- ((iv - init) / stride_con) * inc_val
+    // Use Add for phi2 += inc_val, Sub for phi2 -= inc_val.
+    Node* init_converted = insert_convert_node_if_needed(bt, init);
+    Node* phi_converted = insert_convert_node_if_needed(bt, phi);

-    Node* init_converted = insert_convert_node_if_needed(stride_con2_bt, init);
-    Node* phi_converted = insert_convert_node_if_needed(stride_con2_bt, phi);
+    // Compute iteration count = (iv - init) / stride_con.
+    // The division is always exact (iv - init = k * stride_con at iteration k).
+    // For |stride_con| == 1 the division is elided. The DivNode is only
+    // created when |stride_con| > 1, so the divisor is never -1 and the
+    // JVM special case MIN_INT / -1 == MIN_INT cannot occur.
+    Node* iterations;
+    if (stride_con == 1) {
+      iterations = SubNode::make(phi_converted, init_converted, bt);
+    } else if (stride_con == -1) {
+      // (phi - init) / -1 == init - phi
+      iterations = SubNode::make(init_converted, phi_converted, bt);
+    } else {
+      Node* diff_iv = SubNode::make(phi_converted, init_converted, bt);
+      _igvn.register_new_node_with_optimizer(diff_iv);
+      set_ctrl(diff_iv, cl);
+      Node* stride_node = integercon(stride_con, bt);
+      iterations = DivModIntegerNode::make(nullptr, diff_iv, stride_node, bt);
+    }
+    _igvn.register_new_node_with_optimizer(iterations);
+    set_ctrl(iterations, cl);

-    Node* ratio_init = MulNode::make(init_converted, ratio, stride_con2_bt);
-    _igvn.register_new_node_with_optimizer(ratio_init, init_converted);
-    set_early_ctrl(ratio_init, false);
+    Node* scaled = MulNode::make(iterations, inc_val, bt);
+    _igvn.register_new_node_with_optimizer(scaled);
+    set_ctrl(scaled, cl);

-    Node* diff = SubNode::make(init2, ratio_init, stride_con2_bt);
-    _igvn.register_new_node_with_optimizer(diff, init2);
-    set_early_ctrl(diff, false);
+    Node* result = is_add ? (Node*)AddNode::make(init2, scaled, bt)
+                          : (Node*)SubNode::make(init2, scaled, bt);
+    _igvn.register_new_node_with_optimizer(result);
+    set_ctrl(result, cl);

-    Node* ratio_idx = MulNode::make(phi_converted, ratio, stride_con2_bt);
-    _igvn.register_new_node_with_optimizer(ratio_idx, phi_converted);
-    set_ctrl(ratio_idx, cl);
-
-    Node* add = AddNode::make(ratio_idx, diff, stride_con2_bt);
-    _igvn.register_new_node_with_optimizer(add);
-    set_ctrl(add, cl);
-
-    _igvn.replace_node( phi2, add );
+    _igvn.replace_node(phi2, result);
    // Sometimes an induction variable is unused
-    if (add->outcnt() == 0) {
-      _igvn.remove_dead_node(add);
+    if (result->outcnt() == 0) {
+      _igvn.remove_dead_node(result);
    }
    --i; // deleted this phi; rescan starting with next position
  }
--- a/test/hotspot/jtreg/compiler/loopopts/TestRedundantSafepointElimination.java
+++ b/test/hotspot/jtreg/compiler/loopopts/TestRedundantSafepointElimination.java
@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2025 Alibaba Group Holding Limited. All Rights Reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -45,13 +46,15 @@ public class TestRedundantSafepointElimination {

    // Test for a top-level counted loop.
    // There should be a non-call safepoint in the loop.
+    // Use loop-variant increment (someInts0 + i) to prevent loop elimination
+    // by parallel IV replacement.
    @Test
    @IR(counts = {IRNode.SAFEPOINT, "1"},
        phase = CompilePhase.AFTER_LOOP_OPTS)
    public int testTopLevelCountedLoop() {
        int sum = 0;
        for (int i = 0; i < 100000; i++) {
-            sum += someInts0;
+            sum += someInts0 + i;
        }
        return sum;
    }
@ -108,6 +111,8 @@ public class TestRedundantSafepointElimination {
    // There should be only one safepoint in the inner loop.
    // Before JDK-8347499, this test would fail due to C2 exiting
    // prematurely when encountering the local non-call safepoint.
+    // Use loop-variant increment (someInts1 + j) to prevent inner loop elimination
+    // by parallel IV replacement.
    @Test
    @IR(counts = {IRNode.SAFEPOINT, "1"},
        phase = CompilePhase.AFTER_LOOP_OPTS)
@ -116,7 +121,7 @@ public class TestRedundantSafepointElimination {
        for (int i = 0; i < 100; i += someInts0) {
            empty();
            for (int j = 0; j < 1000; j++) {
-                sum += someInts1;
+                sum += someInts1 + j;
            }
        }
        return sum;
@ -146,6 +151,8 @@ public class TestRedundantSafepointElimination {
    // Test for nested loops, where the outer loop has a local
    // non-call safepoint.
    // There should be a safepoint in both loops.
+    // Use loop-variant increment (someInts1 + j) to prevent inner loop elimination
+    // by parallel IV replacement.
    @Test
    @IR(counts = {IRNode.SAFEPOINT, "2"},
        phase = CompilePhase.AFTER_LOOP_OPTS)
@ -153,7 +160,7 @@ public class TestRedundantSafepointElimination {
        int sum = 0;
        for (int i = 0; i < 100; i += someInts0) {
            for (int j = 0; j < 1000; j++) {
-                sum += someInts1;
+                sum += someInts1 + j;
            }
        }
        return sum;
--- a/test/hotspot/jtreg/compiler/loopopts/parallel_iv/TestParallelIvInvariantIncrement.java
+++ b/test/hotspot/jtreg/compiler/loopopts/parallel_iv/TestParallelIvInvariantIncrement.java
@ -0,0 +1,420 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.loopopts.parallel_iv;
+
+import compiler.lib.ir_framework.*;
+import jdk.test.lib.Asserts;
+
+import static compiler.lib.generators.Generators.G;
+
+/**
+ * @test
+ * @bug 8346177
+ * @key randomness
+ * @summary test parallel IV replacement with loop-invariant increments
+ * @library /test/lib /
+ * @requires vm.compiler2.enabled
+ * @run driver ${test.main.class}
+ */
+public class TestParallelIvInvariantIncrement {
+
+    public static void main(String[] args) {
+        TestFramework.run();
+    }
+
+    @Test
+    @IR(failOn = { IRNode.MUL_I },        phase = CompilePhase.BEFORE_CLOOPS)
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int intAdd(int stop, int inc) {
+        int a = 0;
+        for (int i = 0; i < stop; i++) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "intAdd")
+    private static void runIntAdd() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(s * inc, intAdd(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.MUL_I },        phase = CompilePhase.BEFORE_CLOOPS)
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int intSub(int stop, int inc) {
+        int a = 0;
+        for (int i = 0; i < stop; i++) {
+            a -= inc;
+        }
+        return a;
+    }
+
+    @Run(test = "intSub")
+    private static void runIntSub() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(-s * inc, intSub(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.MUL_L },        phase = CompilePhase.BEFORE_CLOOPS)
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_L, ">=1" })
+    private static long longAdd(int stop, long inc) {
+        long a = 0;
+        for (int i = 0; i < stop; i++) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "longAdd")
+    private static void runLongAdd() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        long inc = G.longs().next();
+        Asserts.assertEQ((long) s * inc, longAdd(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.MUL_L },        phase = CompilePhase.BEFORE_CLOOPS)
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_L, ">=1" })
+    private static long longSub(int stop, long inc) {
+        long a = 0;
+        for (int i = 0; i < stop; i++) {
+            a -= inc;
+        }
+        return a;
+    }
+
+    @Run(test = "longSub")
+    private static void runLongSub() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        long inc = G.longs().next();
+        Asserts.assertEQ(-(long) s * inc, longSub(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int stride2(int stop, int inc) {
+        int a = 0;
+        for (int i = 0; i < stop; i += 2) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "stride2")
+    private static void runStride2() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE - 2).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(Math.ceilDiv(s, 2) * inc, stride2(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int countDown(int stop, int inc) {
+        int a = 0;
+        for (int i = stop; i > 0; i--) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "countDown")
+    private static void runCountDown() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(s * inc, countDown(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    @IR(counts = { IRNode.MUL_L, ">=1" })
+    private static long multipleIVs(int stop, int incA, long incB) {
+        int a = 0;
+        long b = 0;
+        for (int i = 0; i < stop; i++) {
+            a += incA;
+            b += incB;
+        }
+        return a + b;
+    }
+
+    @Run(test = "multipleIVs")
+    private static void runMultipleIVs() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        int incA = G.ints().next();
+        long incB = G.longs().next();
+        long expected = (long)(s * incA) + ((long) s * incB);
+        Asserts.assertEQ(expected, multipleIVs(s, incA, incB));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int nonZeroInit(int stop, int inc) {
+        int a = 42;
+        for (int i = 0; i < stop; i++) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "nonZeroInit")
+    private static void runNonZeroInit() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(42 + s * inc, nonZeroInit(s, inc));
+    }
+
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, ">=1" })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int sideEffectLoopAdd(int[] arr, int inc) {
+        int a = 0;
+        for (int i = 0; i < arr.length; i++) {
+            arr[i] = i;
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "sideEffectLoopAdd")
+    private static void runSideEffectLoopAdd() {
+        int[] arr = new int[100];
+        int inc = G.ints().next();
+        Asserts.assertEQ(100 * inc, sideEffectLoopAdd(arr, inc));
+    }
+
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, ">=1" })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int sideEffectLoopSub(int[] arr, int inc) {
+        int a = 0;
+        for (int i = 0; i < arr.length; i++) {
+            arr[i] = i;
+            a -= inc;
+        }
+        return a;
+    }
+
+    @Run(test = "sideEffectLoopSub")
+    private static void runSideEffectLoopSub() {
+        int[] arr = new int[100];
+        int inc = G.ints().next();
+        Asserts.assertEQ(-100 * inc, sideEffectLoopSub(arr, inc));
+    }
+
+    @Test
+    @IR(counts = { IRNode.COUNTED_LOOP, ">=1" })
+    @IR(counts = { IRNode.MUL_I, ">=2" })
+    @IR(counts = { IRNode.MUL_L, ">=1" })
+    private static long sideEffectLoopMultiIV(int[] arr, int incA, int incB, long incC) {
+        int a = 0;
+        int b = 0;
+        long c = 0;
+        for (int i = 0; i < arr.length; i++) {
+            arr[i] = i;
+            a += incA;
+            b += incB;
+            c += incC;
+        }
+        return a + b + c;
+    }
+
+    @Run(test = "sideEffectLoopMultiIV")
+    private static void runSideEffectLoopMultiIV() {
+        int[] arr = new int[100];
+        int incA = G.ints().next();
+        int incB = G.ints().next();
+        long incC = G.longs().next();
+        int a = 100 * incA;
+        int b = 100 * incB;
+        long c = 100L * incC;
+        long expected = a + b + c;
+        Asserts.assertEQ(expected, sideEffectLoopMultiIV(arr, incA, incB, incC));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int stride3NonMultiple(int stop, int inc) {
+        int a = 0;
+        for (int i = 0; i < stop; i += 3) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "stride3NonMultiple")
+    private static void runStride3NonMultiple() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE - 3).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(Math.ceilDiv(s, 3) * inc, stride3NonMultiple(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int stride7NonMultiple(int stop, int inc) {
+        int a = 0;
+        for (int i = 0; i < stop; i += 7) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "stride7NonMultiple")
+    private static void runStride7NonMultiple() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE - 7).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(Math.ceilDiv(s, 7) * inc, stride7NonMultiple(s, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int nonZeroIVStart(int start, int stop, int inc) {
+        int a = 0;
+        for (int i = start; i < stop; i++) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "nonZeroIVStart")
+    private static void runNonZeroIVStart() {
+        int start = G.ints().restricted(0, Integer.MAX_VALUE / 2).next();
+        int stop = G.ints().restricted(start, Integer.MAX_VALUE).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ((stop - start) * inc, nonZeroIVStart(start, stop, inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int nonZeroIVStartStride3(int start, int stop, int inc) {
+        int a = 0;
+        for (int i = start; i < stop; i += 3) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "nonZeroIVStartStride3")
+    private static void runNonZeroIVStartStride3() {
+        int start = G.ints().restricted(0, Integer.MAX_VALUE / 2).next();
+        int stop = G.ints().restricted(start, Integer.MAX_VALUE - 3).next();
+        int inc = G.ints().next();
+        int iters = Math.ceilDiv(stop - start, 3);
+        Asserts.assertEQ(iters * inc, nonZeroIVStartStride3(start, stop, inc));
+    }
+
+    @Test
+    // MAX_VALUE * inc is strength-reduced to shifts
+    @IR(failOn = { IRNode.COUNTED_LOOP, IRNode.MUL_I })
+    private static int countDownMaxRange(int inc) {
+        int a = 0;
+        for (int i = Integer.MAX_VALUE; i > 0; i--) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "countDownMaxRange")
+    private static void runCountDownMaxRange() {
+        int inc = G.ints().next();
+        Asserts.assertEQ(Integer.MAX_VALUE * inc, countDownMaxRange(inc));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_I, ">=1" })
+    private static int boundaryIncrements(int stop, int inc) {
+        int a = 0;
+        for (int i = 0; i < stop; i++) {
+            a += inc;
+        }
+        return a;
+    }
+
+    @Run(test = "boundaryIncrements")
+    private static void runBoundaryIncrements() {
+        int s = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        int inc = G.ints().next();
+        Asserts.assertEQ(s * Integer.MAX_VALUE, boundaryIncrements(s, Integer.MAX_VALUE));
+        Asserts.assertEQ(s * Integer.MIN_VALUE, boundaryIncrements(s, Integer.MIN_VALUE));
+        Asserts.assertEQ(0, boundaryIncrements(s, 0));
+        Asserts.assertEQ(0, boundaryIncrements(0, inc));
+        Asserts.assertEQ(inc, boundaryIncrements(1, inc));
+        Asserts.assertEQ(0, boundaryIncrements(0, Integer.MAX_VALUE));
+        Asserts.assertEQ(Integer.MAX_VALUE, boundaryIncrements(1, Integer.MAX_VALUE));
+        Asserts.assertEQ(Integer.MIN_VALUE, boundaryIncrements(1, Integer.MIN_VALUE));
+    }
+
+    @Test
+    @IR(failOn = { IRNode.MUL_L }, phase = CompilePhase.BEFORE_CLOOPS) // only MulI for pow exists before
+    @IR(failOn = { IRNode.COUNTED_LOOP })
+    @IR(counts = { IRNode.MUL_L, ">=1" })
+    private static long conditionalAccum(int load, int i) {
+        long x = 0;
+        long pow = (i % 8) * (i % 16);
+        for (int j = 0; j < load; j++) {
+            if (i % 2 == 0) {
+                x += pow;
+            } else {
+                x -= pow;
+            }
+        }
+        return x;
+    }
+
+    @Run(test = "conditionalAccum")
+    private static void runConditionalAccum() {
+        int load = G.ints().restricted(0, Integer.MAX_VALUE).next();
+        int i = G.ints().next();
+        long pow = (i % 8) * (i % 16);
+        long expected = (i % 2 == 0) ? (long) load * pow : -(long) load * pow;
+        Asserts.assertEQ(expected, conditionalAccum(load, i));
+    }
+
+    @Test
+    @Arguments(values = { Argument.NUMBER_42, Argument.NUMBER_42 })
+    @IR(counts = { IRNode.COUNTED_LOOP, ">=1" })
+    private static int loopVariantNotOptimized(int stop, int inc) {
+        int a = 0;
+        for (int i = 0; i < stop; i++) {
+            a += i * inc;
+        }
+        return a;
+    }
+}