8382700: C2: Delay inlining instead of giving up when hit NodeCountInliningCutoff

Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org> Co-authored-by: Maurizio Cimadamore <mcimadamore@openjdk.org> Co-authored-by: Ioannis Tsakpinis <iotsakp@gmail.com> Reviewed-by: kvn, vlivanov
2026-05-02 17:55:04 +00:00 · 2026-04-30 18:17:38 +00:00 · 2026-04-30 18:17:38 +00:00 · 41a5c032f5
commit 41a5c032f5
parent 4b45849b76
7 changed files with 1598 additions and 10 deletions
--- a/src/hotspot/share/opto/bytecodeInfo.cpp
+++ b/src/hotspot/share/opto/bytecodeInfo.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -391,11 +391,13 @@ bool InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method,

  // suppress a few checks for accessors and trivial methods
  if (callee_method->code_size() > MaxTrivialSize) {
-
-    // don't inline into giant methods
+    // We don't want to inline a call into a sufficiently large graph. However, this cannot be
+    // decided during parsing because there are more bytecodes in the caller that need parsing, and
+    // determining dead nodes is hard. As a result, we stop parse inlining at a relatively
+    // conservative threshold, and resume during incremental inlining, when there is no more
+    // parsing in the caller, and node liveness is more easily determined.
    if (C->over_inlining_cutoff()) {
-      if ((!callee_method->force_inline() && !caller_method->is_compiled_lambda_form())
-          || !IncrementalInline) {
+      if (!C->should_delay_after_inlining_cutoff(callee_method, caller_method)) {
        set_msg("NodeCountInliningCutoff");
        return false;
      } else {
--- a/src/hotspot/share/opto/c2_globals.hpp
+++ b/src/hotspot/share/opto/c2_globals.hpp
@ -507,9 +507,13 @@
  /* controls for heat-based inlining */                                    \
                                                                            \
  develop(intx, NodeCountInliningCutoff, 18000,                             \
-          "If parser node generation exceeds limit stop inlining")          \
+          "If node count exceeds limit stop inlining")                      \
          range(0, max_jint)                                                \
                                                                            \
+  product(bool, DelayAfterInliningCutoff, true, DIAGNOSTIC,                 \
+          "If node count exceeds limit during parsing, attempt inlining "   \
+          "later instead of giving up completely")                          \
+                                                                            \
  product(intx, MaxNodeLimit, 80000,                                        \
          "Maximum number of nodes")                                        \
          range(1000, max_jint / 3)                                         \
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@ -646,6 +646,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
      _stub_id(StubId::NO_STUBID),
      _stub_entry_point(nullptr),
      _max_node_limit(MaxNodeLimit),
+      _node_count_inlining_cutoff(NodeCountInliningCutoff),
      _post_loop_opts_phase(false),
      _merge_stores_phase(false),
      _allow_macro_nodes(true),
@ -922,6 +923,7 @@ Compile::Compile(ciEnv* ci_env,
      _stub_id(stub_id),
      _stub_entry_point(nullptr),
      _max_node_limit(MaxNodeLimit),
+      _node_count_inlining_cutoff(NodeCountInliningCutoff),
      _post_loop_opts_phase(false),
      _merge_stores_phase(false),
      _allow_macro_nodes(true),
@ -2170,8 +2172,8 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
  }

  while (_late_inlines.length() > 0) {
-    if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
-      if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
+    if (live_nodes() > node_count_inlining_cutoff()) {
+      if (low_live_nodes < node_count_inlining_cutoff() * 8 / 10) {
        TracePhase tp(_t_incrInline_ideal);
        // PhaseIdealLoop is expensive so we only try it once we are
        // out of live nodes and we only try it again if the previous
@ -2182,7 +2184,7 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
        _major_progress = true;
      }

-      if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
+      if (live_nodes() > node_count_inlining_cutoff()) {
        bool do_print_inlining = print_inlining() || print_intrinsics();
        if (do_print_inlining || log() != nullptr) {
          // Print inlining message for candidates that we couldn't inline for lack of space.
--- a/src/hotspot/share/opto/compile.hpp
+++ b/src/hotspot/share/opto/compile.hpp
@ -319,6 +319,7 @@ class Compile : public Phase {
  int                   _fixed_slots;           // count of frame slots not allocated by the register
                                                // allocator i.e. locks, original deopt pc, etc.
  uintx                 _max_node_limit;        // Max unique node count during a single compilation.
+  uint             _node_count_inlining_cutoff; // Number of nodes in the graph above which inlining is denied

  bool                  _post_loop_opts_phase;  // Loop opts are finished.
  bool                  _merge_stores_phase;    // Phase for merging stores, after post loop opts phase.
@ -654,6 +655,8 @@ public:
  void          set_print_intrinsics(bool z)     { _print_intrinsics = z; }
  uint              max_node_limit() const       { return (uint)_max_node_limit; }
  void          set_max_node_limit(uint n)       { _max_node_limit = n; }
+  uint           node_count_inlining_cutoff() const { return _node_count_inlining_cutoff; }
+  void       set_node_count_inlining_cutoff(uint n) { _node_count_inlining_cutoff = n; }
  bool              clinit_barrier_on_entry()       { return _clinit_barrier_on_entry; }
  void          set_clinit_barrier_on_entry(bool z) { _clinit_barrier_on_entry = z; }
  bool              has_monitors() const         { return _has_monitors; }
@ -1004,6 +1007,7 @@ public:
           should_delay_boxing_inlining(call_method, jvms) ||
           should_delay_vector_inlining(call_method, jvms);
  }
+  bool should_delay_after_inlining_cutoff(ciMethod* callee, ciMethod* caller);
  bool should_delay_string_inlining(ciMethod* call_method, JVMState* jvms);
  bool should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms);
  bool should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms);
@ -1117,7 +1121,7 @@ public:
      // and avoid thrashing when live node count is close to the limit.
      // Keep in mind that live_nodes() isn't accurate during inlining until
      // dead node elimination step happens (see Compile::inline_incrementally).
-      return live_nodes() > (uint)LiveNodeCountInliningCutoff * 11 / 10;
+      return live_nodes() > node_count_inlining_cutoff() * 11 / 10;
    }
  }

--- a/src/hotspot/share/opto/doCall.cpp
+++ b/src/hotspot/share/opto/doCall.cpp
@ -415,6 +415,22 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
  }
 }

+// After Compile::over_inlining_cutoff, should we decline inlining the callee, or should we try
+// inlining again later
+bool Compile::should_delay_after_inlining_cutoff(ciMethod* callee, ciMethod* caller) {
+  if (!IncrementalInline) {
+    return false;
+  }
+
+  if (DelayAfterInliningCutoff) {
+    return true;
+  } else if (callee->force_inline() || caller->is_compiled_lambda_form()) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
 // Return true for methods that shouldn't be inlined early so that
 // they are easier to analyze and optimize as intrinsics.
 bool Compile::should_delay_string_inlining(ciMethod* call_method, JVMState* jvms) {
@ -551,6 +567,7 @@ void Parse::do_call() {
  // Bump max node limit for JSR292 users
  if (bc() == Bytecodes::_invokedynamic || orig_callee->is_method_handle_intrinsic()) {
    C->set_max_node_limit(3*MaxNodeLimit);
+    C->set_node_count_inlining_cutoff(LiveNodeCountInliningCutoff);
  }

  // uncommon-trap when callee is unloaded, uninitialized or will not link
--- a/test/hotspot/jtreg/compiler/inlining/TestDelayAfterInliningCutoff.java
+++ b/test/hotspot/jtreg/compiler/inlining/TestDelayAfterInliningCutoff.java
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package compiler.inlining;
+
+import compiler.lib.ir_framework.*;
+
+/*
+ * @test
+ * @bug 8382700
+ * @summary verify that method inlining continues during incremental inline after it has stopped
+ *          during parsing due to NodeCountInliningCutoff
+ * @library /test/lib /
+ * @run driver ${test.main.class}
+ */
+public class TestDelayAfterInliningCutoff {
+    public static void main(String[] args) {
+        var framework = new TestFramework();
+        framework.setDefaultWarmup(1);
+        framework.addFlags("-XX:+UnlockDiagnosticVMOptions");
+        // Workaround the issue with incorrect call count at call sites
+        framework.addFlags("-XX:MinInlineFrequencyRatio=0");
+        framework.addScenarios(new Scenario(0, "-XX:+DelayAfterInliningCutoff"));
+        framework.addScenarios(new Scenario(1, "-XX:-DelayAfterInliningCutoff"));
+        framework.start();
+    }
+
+    @Test
+    @IR(failOn = IRNode.CALL, applyIf = {"DelayAfterInliningCutoff", "true"})
+    @IR(counts = {IRNode.CALL, ">= 1"}, applyIf = {"DelayAfterInliningCutoff", "false"})
+    public static void test() {
+        call1();
+        call1();
+        call1();
+        call1();
+    }
+
+    private static void call1() {
+        call2();
+        call2();
+        call2();
+        call2();
+    }
+
+    private static void call2() {
+        call3();
+        call3();
+        call3();
+        call3();
+    }
+
+    private static void call3() {
+        call4();
+        call4();
+        call4();
+        call4();
+    }
+
+    private static void call4() {
+        call5();
+        call5();
+        call5();
+        call5();
+    }
+
+    private static void call5() {
+        call6();
+        call6();
+        call6();
+        call6();
+    }
+
+    private static void call6() {}
+}
--- a/test/micro/org/openjdk/bench/java/lang/foreign/FFMStructAccessTest.java
+++ b/test/micro/org/openjdk/bench/java/lang/foreign/FFMStructAccessTest.java