8382700: C2: Delay inlining instead of giving up when hit NodeCountInliningCutoff

Co-authored-by: Vladimir Ivanov <vlivanov@openjdk.org>
Co-authored-by: Maurizio Cimadamore <mcimadamore@openjdk.org>
Co-authored-by: Ioannis Tsakpinis <iotsakp@gmail.com>
Reviewed-by: kvn, vlivanov
This commit is contained in:
Quan Anh Mai 2026-04-30 18:17:38 +00:00
parent 4b45849b76
commit 41a5c032f5
7 changed files with 1598 additions and 10 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -391,11 +391,13 @@ bool InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method,
// suppress a few checks for accessors and trivial methods
if (callee_method->code_size() > MaxTrivialSize) {
// don't inline into giant methods
// We don't want to inline a call into a sufficiently large graph. However, this cannot be
// decided during parsing because there are more bytecodes in the caller that need parsing, and
// determining dead nodes is hard. As a result, we stop parse inlining at a relatively
// conservative threshold, and resume during incremental inlining, when there is no more
// parsing in the caller, and node liveness is more easily determined.
if (C->over_inlining_cutoff()) {
if ((!callee_method->force_inline() && !caller_method->is_compiled_lambda_form())
|| !IncrementalInline) {
if (!C->should_delay_after_inlining_cutoff(callee_method, caller_method)) {
set_msg("NodeCountInliningCutoff");
return false;
} else {

View File

@ -507,9 +507,13 @@
/* controls for heat-based inlining */ \
\
develop(intx, NodeCountInliningCutoff, 18000, \
"If parser node generation exceeds limit stop inlining") \
"If node count exceeds limit stop inlining") \
range(0, max_jint) \
\
product(bool, DelayAfterInliningCutoff, true, DIAGNOSTIC, \
"If node count exceeds limit during parsing, attempt inlining " \
"later instead of giving up completely") \
\
product(intx, MaxNodeLimit, 80000, \
"Maximum number of nodes") \
range(1000, max_jint / 3) \

View File

@ -646,6 +646,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
_stub_id(StubId::NO_STUBID),
_stub_entry_point(nullptr),
_max_node_limit(MaxNodeLimit),
_node_count_inlining_cutoff(NodeCountInliningCutoff),
_post_loop_opts_phase(false),
_merge_stores_phase(false),
_allow_macro_nodes(true),
@ -922,6 +923,7 @@ Compile::Compile(ciEnv* ci_env,
_stub_id(stub_id),
_stub_entry_point(nullptr),
_max_node_limit(MaxNodeLimit),
_node_count_inlining_cutoff(NodeCountInliningCutoff),
_post_loop_opts_phase(false),
_merge_stores_phase(false),
_allow_macro_nodes(true),
@ -2170,8 +2172,8 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
}
while (_late_inlines.length() > 0) {
if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
if (live_nodes() > node_count_inlining_cutoff()) {
if (low_live_nodes < node_count_inlining_cutoff() * 8 / 10) {
TracePhase tp(_t_incrInline_ideal);
// PhaseIdealLoop is expensive so we only try it once we are
// out of live nodes and we only try it again if the previous
@ -2182,7 +2184,7 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
_major_progress = true;
}
if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
if (live_nodes() > node_count_inlining_cutoff()) {
bool do_print_inlining = print_inlining() || print_intrinsics();
if (do_print_inlining || log() != nullptr) {
// Print inlining message for candidates that we couldn't inline for lack of space.

View File

@ -319,6 +319,7 @@ class Compile : public Phase {
int _fixed_slots; // count of frame slots not allocated by the register
// allocator i.e. locks, original deopt pc, etc.
uintx _max_node_limit; // Max unique node count during a single compilation.
uint _node_count_inlining_cutoff; // Number of nodes in the graph above which inlining is denied
bool _post_loop_opts_phase; // Loop opts are finished.
bool _merge_stores_phase; // Phase for merging stores, after post loop opts phase.
@ -654,6 +655,8 @@ public:
void set_print_intrinsics(bool z) { _print_intrinsics = z; }
uint max_node_limit() const { return (uint)_max_node_limit; }
void set_max_node_limit(uint n) { _max_node_limit = n; }
uint node_count_inlining_cutoff() const { return _node_count_inlining_cutoff; }
void set_node_count_inlining_cutoff(uint n) { _node_count_inlining_cutoff = n; }
bool clinit_barrier_on_entry() { return _clinit_barrier_on_entry; }
void set_clinit_barrier_on_entry(bool z) { _clinit_barrier_on_entry = z; }
bool has_monitors() const { return _has_monitors; }
@ -1004,6 +1007,7 @@ public:
should_delay_boxing_inlining(call_method, jvms) ||
should_delay_vector_inlining(call_method, jvms);
}
bool should_delay_after_inlining_cutoff(ciMethod* callee, ciMethod* caller);
bool should_delay_string_inlining(ciMethod* call_method, JVMState* jvms);
bool should_delay_boxing_inlining(ciMethod* call_method, JVMState* jvms);
bool should_delay_vector_inlining(ciMethod* call_method, JVMState* jvms);
@ -1117,7 +1121,7 @@ public:
// and avoid thrashing when live node count is close to the limit.
// Keep in mind that live_nodes() isn't accurate during inlining until
// dead node elimination step happens (see Compile::inline_incrementally).
return live_nodes() > (uint)LiveNodeCountInliningCutoff * 11 / 10;
return live_nodes() > node_count_inlining_cutoff() * 11 / 10;
}
}

View File

@ -415,6 +415,22 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
}
}
// After Compile::over_inlining_cutoff, should we decline inlining the callee, or should we try
// inlining again later
bool Compile::should_delay_after_inlining_cutoff(ciMethod* callee, ciMethod* caller) {
if (!IncrementalInline) {
return false;
}
if (DelayAfterInliningCutoff) {
return true;
} else if (callee->force_inline() || caller->is_compiled_lambda_form()) {
return true;
} else {
return false;
}
}
// Return true for methods that shouldn't be inlined early so that
// they are easier to analyze and optimize as intrinsics.
bool Compile::should_delay_string_inlining(ciMethod* call_method, JVMState* jvms) {
@ -551,6 +567,7 @@ void Parse::do_call() {
// Bump max node limit for JSR292 users
if (bc() == Bytecodes::_invokedynamic || orig_callee->is_method_handle_intrinsic()) {
C->set_max_node_limit(3*MaxNodeLimit);
C->set_node_count_inlining_cutoff(LiveNodeCountInliningCutoff);
}
// uncommon-trap when callee is unloaded, uninitialized or will not link

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.inlining;
import compiler.lib.ir_framework.*;
/*
* @test
* @bug 8382700
* @summary verify that method inlining continues during incremental inline after it has stopped
* during parsing due to NodeCountInliningCutoff
* @library /test/lib /
* @run driver ${test.main.class}
*/
public class TestDelayAfterInliningCutoff {
public static void main(String[] args) {
var framework = new TestFramework();
framework.setDefaultWarmup(1);
framework.addFlags("-XX:+UnlockDiagnosticVMOptions");
// Workaround the issue with incorrect call count at call sites
framework.addFlags("-XX:MinInlineFrequencyRatio=0");
framework.addScenarios(new Scenario(0, "-XX:+DelayAfterInliningCutoff"));
framework.addScenarios(new Scenario(1, "-XX:-DelayAfterInliningCutoff"));
framework.start();
}
@Test
@IR(failOn = IRNode.CALL, applyIf = {"DelayAfterInliningCutoff", "true"})
@IR(counts = {IRNode.CALL, ">= 1"}, applyIf = {"DelayAfterInliningCutoff", "false"})
public static void test() {
call1();
call1();
call1();
call1();
}
private static void call1() {
call2();
call2();
call2();
call2();
}
private static void call2() {
call3();
call3();
call3();
call3();
}
private static void call3() {
call4();
call4();
call4();
call4();
}
private static void call4() {
call5();
call5();
call5();
call5();
}
private static void call5() {
call6();
call6();
call6();
call6();
}
private static void call6() {}
}

File diff suppressed because it is too large Load Diff