8351414: C2: MergeStores must happen after RangeCheck smearing

Reviewed-by: chagedorn, qamai
This commit is contained in:
Emanuel Peter 2025-03-11 07:10:31 +00:00
parent 8a5ed47f00
commit 4cf63160ad
8 changed files with 110 additions and 4 deletions

View File

@ -402,6 +402,9 @@ void Compile::remove_useless_node(Node* dead) {
if (dead->for_post_loop_opts_igvn()) {
remove_from_post_loop_opts_igvn(dead);
}
if (dead->for_merge_stores_igvn()) {
remove_from_merge_stores_igvn(dead);
}
if (dead->is_Call()) {
remove_useless_late_inlines( &_late_inlines, dead);
remove_useless_late_inlines( &_string_late_inlines, dead);
@ -453,6 +456,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis
remove_useless_nodes(_template_assertion_predicate_opaqs, useful); // remove useless Assertion Predicate opaque nodes
remove_useless_nodes(_expensive_nodes, useful); // remove useless expensive nodes
remove_useless_nodes(_for_post_loop_igvn, useful); // remove useless node recorded for post loop opts IGVN pass
remove_useless_nodes(_for_merge_stores_igvn, useful); // remove useless node recorded for merge stores IGVN pass
remove_useless_unstable_if_traps(useful); // remove useless unstable_if traps
remove_useless_coarsened_locks(useful); // remove useless coarsened locks nodes
#ifdef ASSERT
@ -627,6 +631,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
_stub_entry_point(nullptr),
_max_node_limit(MaxNodeLimit),
_post_loop_opts_phase(false),
_merge_stores_phase(false),
_allow_macro_nodes(true),
_inlining_progress(false),
_inlining_incrementally(false),
@ -651,6 +656,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
_template_assertion_predicate_opaqs(comp_arena(), 8, 0, nullptr),
_expensive_nodes(comp_arena(), 8, 0, nullptr),
_for_post_loop_igvn(comp_arena(), 8, 0, nullptr),
_for_merge_stores_igvn(comp_arena(), 8, 0, nullptr),
_unstable_if_traps(comp_arena(), 8, 0, nullptr),
_coarsened_locks(comp_arena(), 8, 0, nullptr),
_congraph(nullptr),
@ -905,6 +911,7 @@ Compile::Compile(ciEnv* ci_env,
_stub_entry_point(nullptr),
_max_node_limit(MaxNodeLimit),
_post_loop_opts_phase(false),
_merge_stores_phase(false),
_allow_macro_nodes(true),
_inlining_progress(false),
_inlining_incrementally(false),
@ -923,6 +930,7 @@ Compile::Compile(ciEnv* ci_env,
_log(ci_env->log()),
_first_failure_details(nullptr),
_for_post_loop_igvn(comp_arena(), 8, 0, nullptr),
_for_merge_stores_igvn(comp_arena(), 8, 0, nullptr),
_congraph(nullptr),
NOT_PRODUCT(_igv_printer(nullptr) COMMA)
_unique(0),
@ -1870,6 +1878,49 @@ void Compile::process_for_post_loop_opts_igvn(PhaseIterGVN& igvn) {
}
}
void Compile::record_for_merge_stores_igvn(Node* n) {
if (!n->for_merge_stores_igvn()) {
assert(!_for_merge_stores_igvn.contains(n), "duplicate");
n->add_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
_for_merge_stores_igvn.append(n);
}
}
void Compile::remove_from_merge_stores_igvn(Node* n) {
n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
_for_merge_stores_igvn.remove(n);
}
// We need to wait with merging stores until RangeCheck smearing has removed the RangeChecks during
// the post loops IGVN phase. If we do it earlier, then there may still be some RangeChecks between
// the stores, and we merge the wrong sequence of stores.
// Example:
// StoreI RangeCheck StoreI StoreI RangeCheck StoreI
// Apply MergeStores:
// StoreI RangeCheck [ StoreL ] RangeCheck StoreI
// Remove more RangeChecks:
// StoreI [ StoreL ] StoreI
// But now it would have been better to do this instead:
// [ StoreL ] [ StoreL ]
//
// Note: we allow stores to merge in this dedicated IGVN round, and any later IGVN round,
// since we never unset _merge_stores_phase.
void Compile::process_for_merge_stores_igvn(PhaseIterGVN& igvn) {
C->set_merge_stores_phase();
if (_for_merge_stores_igvn.length() > 0) {
while (_for_merge_stores_igvn.length() > 0) {
Node* n = _for_merge_stores_igvn.pop();
n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
igvn._worklist.push(n);
}
igvn.optimize();
if (failing()) return;
assert(_for_merge_stores_igvn.length() == 0, "no more delayed nodes allowed");
print_method(PHASE_AFTER_MERGE_STORES, 3);
}
}
void Compile::record_unstable_if_trap(UnstableIfTrap* trap) {
if (OptimizeUnstableIf) {
_unstable_if_traps.append(trap);
@ -2429,6 +2480,8 @@ void Compile::Optimize() {
process_for_post_loop_opts_igvn(igvn);
process_for_merge_stores_igvn(igvn);
if (failing()) return;
#ifdef ASSERT

View File

@ -318,6 +318,7 @@ class Compile : public Phase {
uintx _max_node_limit; // Max unique node count during a single compilation.
bool _post_loop_opts_phase; // Loop opts are finished.
bool _merge_stores_phase; // Phase for merging stores, after post loop opts phase.
bool _allow_macro_nodes; // True if we allow creation of macro nodes.
int _major_progress; // Count of something big happening
@ -374,6 +375,7 @@ class Compile : public Phase {
GrowableArray<Node*> _template_assertion_predicate_opaqs;
GrowableArray<Node*> _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
GrowableArray<Node*> _for_post_loop_igvn; // List of nodes for IGVN after loop opts are over
GrowableArray<Node*> _for_merge_stores_igvn; // List of nodes for IGVN merge stores
GrowableArray<UnstableIfTrap*> _unstable_if_traps; // List of ifnodes after IGVN
GrowableArray<Node_List*> _coarsened_locks; // List of coarsened Lock and Unlock nodes
ConnectionGraph* _congraph;
@ -766,6 +768,12 @@ public:
void remove_useless_unstable_if_traps(Unique_Node_List &useful);
void process_for_unstable_if_traps(PhaseIterGVN& igvn);
bool merge_stores_phase() { return _merge_stores_phase; }
void set_merge_stores_phase() { _merge_stores_phase = true; }
void record_for_merge_stores_igvn(Node* n);
void remove_from_merge_stores_igvn(Node* n);
void process_for_merge_stores_igvn(PhaseIterGVN& igvn);
void shuffle_macro_nodes();
void sort_macro_nodes();

View File

@ -3450,12 +3450,23 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
}
if (MergeStores && UseUnalignedAccesses) {
if (phase->C->post_loop_opts_phase()) {
if (phase->C->merge_stores_phase()) {
MergePrimitiveStores merge(phase, this);
Node* progress = merge.run();
if (progress != nullptr) { return progress; }
} else {
phase->C->record_for_post_loop_opts_igvn(this);
// We need to wait with merging stores until RangeCheck smearing has removed the RangeChecks during
// the post loops IGVN phase. If we do it earlier, then there may still be some RangeChecks between
// the stores, and we merge the wrong sequence of stores.
// Example:
// StoreI RangeCheck StoreI StoreI RangeCheck StoreI
// Apply MergeStores:
// StoreI RangeCheck [ StoreL ] RangeCheck StoreI
// Remove more RangeChecks:
// StoreI [ StoreL ] StoreI
// But now it would have been better to do this instead:
// [ StoreL ] [ StoreL ]
phase->C->record_for_merge_stores_igvn(this);
}
}

View File

@ -508,6 +508,11 @@ Node *Node::clone() const {
// If it is applicable, it will happen anyway when the cloned node is registered with IGVN.
n->remove_flag(Node::NodeFlags::Flag_for_post_loop_opts_igvn);
}
if (for_merge_stores_igvn()) {
// Don't add cloned node to Compile::_for_merge_stores_igvn list automatically.
// If it is applicable, it will happen anyway when the cloned node is registered with IGVN.
n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn);
}
if (n->is_ParsePredicate()) {
C->add_parse_predicate(n->as_ParsePredicate());
}
@ -615,6 +620,9 @@ void Node::destruct(PhaseValues* phase) {
if (for_post_loop_opts_igvn()) {
compile->remove_from_post_loop_opts_igvn(this);
}
if (for_merge_stores_igvn()) {
compile->remove_from_merge_stores_igvn(this);
}
if (is_SafePoint()) {
as_SafePoint()->delete_replaced_nodes();

View File

@ -830,8 +830,9 @@ public:
Flag_is_expensive = 1 << 13,
Flag_is_predicated_vector = 1 << 14,
Flag_for_post_loop_opts_igvn = 1 << 15,
Flag_is_removed_by_peephole = 1 << 16,
Flag_is_predicated_using_blend = 1 << 17,
Flag_for_merge_stores_igvn = 1 << 16,
Flag_is_removed_by_peephole = 1 << 17,
Flag_is_predicated_using_blend = 1 << 18,
_last_flag = Flag_is_predicated_using_blend
};
@ -1075,6 +1076,7 @@ public:
bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; }
bool for_post_loop_opts_igvn() const { return (_flags & Flag_for_post_loop_opts_igvn) != 0; }
bool for_merge_stores_igvn() const { return (_flags & Flag_for_merge_stores_igvn) != 0; }
// Is 'n' possibly a loop entry (i.e. a Parse Predicate projection)?
static bool may_be_loop_entry(Node* n) {

View File

@ -88,6 +88,7 @@
flags(CCP1, "PhaseCCP 1") \
flags(ITER_GVN2, "Iter GVN 2") \
flags(PHASEIDEALLOOP_ITERATIONS, "PhaseIdealLoop iterations") \
flags(AFTER_MERGE_STORES, "After Merge Stores") \
flags(BEFORE_MACRO_EXPANSION , "Before Macro Expansion") \
flags(AFTER_MACRO_EXPANSION_STEP, "After Macro Expansion Step") \
flags(AFTER_MACRO_EXPANSION, "After Macro Expansion") \

View File

@ -49,6 +49,15 @@ import java.util.Random;
* @run main compiler.c2.TestMergeStores unaligned
*/
/*
* @test
* @bug 8318446 8331054 8331311 8335392 8348959 8351414
* @summary Test merging of consecutive stores
* @modules java.base/jdk.internal.misc
* @library /test/lib /
* @run main compiler.c2.TestMergeStores StressIGVN
*/
public class TestMergeStores {
static int RANGE = 1000;
private static final Unsafe UNSAFE = Unsafe.getUnsafe();
@ -99,6 +108,19 @@ public class TestMergeStores {
switch (args[0]) {
case "aligned" -> { framework.addFlags("-XX:-UseUnalignedAccesses"); }
case "unaligned" -> { framework.addFlags("-XX:+UseUnalignedAccesses"); }
// StressIGVN can mix up the order of RangeCheck smearing and MergeStores optimization,
// if they run in the same IGVN round. When we did not yet have a dedicated IGVN round
// after post loop opts for MergeStrores, it could happen that we would only remove
// RangeChecks after already merging some stores, and now they would have to be split
// up again and re-merged with different stores. Example:
// StoreI RangeCheck StoreI StoreI RangeCheck StoreI
// Apply MergeStores:
// StoreI RangeCheck [ StoreL ] RangeCheck StoreI
// Remove more RangeChecks:
// StoreI [ StoreL ] StoreI
// But now it would have been better to do this instead:
// [ StoreL ] [ StoreL ]
case "StressIGVN" -> { framework.addFlags("-XX:+StressIGVN"); }
default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
}
framework.start();

View File

@ -95,6 +95,7 @@ public enum CompilePhase {
CCP1("PhaseCCP 1"),
ITER_GVN2("Iter GVN 2"),
PHASEIDEALLOOP_ITERATIONS("PhaseIdealLoop iterations"),
AFTER_MERGE_STORES("After Merge Stores"),
BEFORE_MACRO_EXPANSION("Before Macro Expansion"),
AFTER_MACRO_EXPANSION_STEP("After Macro Expansion Step"),
AFTER_MACRO_EXPANSION("After Macro Expansion"),