diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 014463ccc21..026aa1ba4fb 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -402,6 +402,9 @@ void Compile::remove_useless_node(Node* dead) { if (dead->for_post_loop_opts_igvn()) { remove_from_post_loop_opts_igvn(dead); } + if (dead->for_merge_stores_igvn()) { + remove_from_merge_stores_igvn(dead); + } if (dead->is_Call()) { remove_useless_late_inlines( &_late_inlines, dead); remove_useless_late_inlines( &_string_late_inlines, dead); @@ -453,6 +456,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis remove_useless_nodes(_template_assertion_predicate_opaqs, useful); // remove useless Assertion Predicate opaque nodes remove_useless_nodes(_expensive_nodes, useful); // remove useless expensive nodes remove_useless_nodes(_for_post_loop_igvn, useful); // remove useless node recorded for post loop opts IGVN pass + remove_useless_nodes(_for_merge_stores_igvn, useful); // remove useless node recorded for merge stores IGVN pass remove_useless_unstable_if_traps(useful); // remove useless unstable_if traps remove_useless_coarsened_locks(useful); // remove useless coarsened locks nodes #ifdef ASSERT @@ -627,6 +631,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci, _stub_entry_point(nullptr), _max_node_limit(MaxNodeLimit), _post_loop_opts_phase(false), + _merge_stores_phase(false), _allow_macro_nodes(true), _inlining_progress(false), _inlining_incrementally(false), @@ -651,6 +656,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci, _template_assertion_predicate_opaqs(comp_arena(), 8, 0, nullptr), _expensive_nodes(comp_arena(), 8, 0, nullptr), _for_post_loop_igvn(comp_arena(), 8, 0, nullptr), + _for_merge_stores_igvn(comp_arena(), 8, 0, nullptr), _unstable_if_traps(comp_arena(), 8, 0, nullptr), _coarsened_locks(comp_arena(), 8, 0, nullptr), _congraph(nullptr), @@ -905,6 +911,7 @@ Compile::Compile(ciEnv* ci_env, _stub_entry_point(nullptr), _max_node_limit(MaxNodeLimit), _post_loop_opts_phase(false), + _merge_stores_phase(false), _allow_macro_nodes(true), _inlining_progress(false), _inlining_incrementally(false), @@ -923,6 +930,7 @@ Compile::Compile(ciEnv* ci_env, _log(ci_env->log()), _first_failure_details(nullptr), _for_post_loop_igvn(comp_arena(), 8, 0, nullptr), + _for_merge_stores_igvn(comp_arena(), 8, 0, nullptr), _congraph(nullptr), NOT_PRODUCT(_igv_printer(nullptr) COMMA) _unique(0), @@ -1870,6 +1878,49 @@ void Compile::process_for_post_loop_opts_igvn(PhaseIterGVN& igvn) { } } +void Compile::record_for_merge_stores_igvn(Node* n) { + if (!n->for_merge_stores_igvn()) { + assert(!_for_merge_stores_igvn.contains(n), "duplicate"); + n->add_flag(Node::NodeFlags::Flag_for_merge_stores_igvn); + _for_merge_stores_igvn.append(n); + } +} + +void Compile::remove_from_merge_stores_igvn(Node* n) { + n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn); + _for_merge_stores_igvn.remove(n); +} + +// We need to wait with merging stores until RangeCheck smearing has removed the RangeChecks during +// the post loops IGVN phase. If we do it earlier, then there may still be some RangeChecks between +// the stores, and we merge the wrong sequence of stores. +// Example: +// StoreI RangeCheck StoreI StoreI RangeCheck StoreI +// Apply MergeStores: +// StoreI RangeCheck [ StoreL ] RangeCheck StoreI +// Remove more RangeChecks: +// StoreI [ StoreL ] StoreI +// But now it would have been better to do this instead: +// [ StoreL ] [ StoreL ] +// +// Note: we allow stores to merge in this dedicated IGVN round, and any later IGVN round, +// since we never unset _merge_stores_phase. +void Compile::process_for_merge_stores_igvn(PhaseIterGVN& igvn) { + C->set_merge_stores_phase(); + + if (_for_merge_stores_igvn.length() > 0) { + while (_for_merge_stores_igvn.length() > 0) { + Node* n = _for_merge_stores_igvn.pop(); + n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn); + igvn._worklist.push(n); + } + igvn.optimize(); + if (failing()) return; + assert(_for_merge_stores_igvn.length() == 0, "no more delayed nodes allowed"); + print_method(PHASE_AFTER_MERGE_STORES, 3); + } +} + void Compile::record_unstable_if_trap(UnstableIfTrap* trap) { if (OptimizeUnstableIf) { _unstable_if_traps.append(trap); @@ -2429,6 +2480,8 @@ void Compile::Optimize() { process_for_post_loop_opts_igvn(igvn); + process_for_merge_stores_igvn(igvn); + if (failing()) return; #ifdef ASSERT diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index 1fe6180c54f..7be23e01440 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -318,6 +318,7 @@ class Compile : public Phase { uintx _max_node_limit; // Max unique node count during a single compilation. bool _post_loop_opts_phase; // Loop opts are finished. + bool _merge_stores_phase; // Phase for merging stores, after post loop opts phase. bool _allow_macro_nodes; // True if we allow creation of macro nodes. int _major_progress; // Count of something big happening @@ -374,6 +375,7 @@ class Compile : public Phase { GrowableArray _template_assertion_predicate_opaqs; GrowableArray _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common GrowableArray _for_post_loop_igvn; // List of nodes for IGVN after loop opts are over + GrowableArray _for_merge_stores_igvn; // List of nodes for IGVN merge stores GrowableArray _unstable_if_traps; // List of ifnodes after IGVN GrowableArray _coarsened_locks; // List of coarsened Lock and Unlock nodes ConnectionGraph* _congraph; @@ -766,6 +768,12 @@ public: void remove_useless_unstable_if_traps(Unique_Node_List &useful); void process_for_unstable_if_traps(PhaseIterGVN& igvn); + bool merge_stores_phase() { return _merge_stores_phase; } + void set_merge_stores_phase() { _merge_stores_phase = true; } + void record_for_merge_stores_igvn(Node* n); + void remove_from_merge_stores_igvn(Node* n); + void process_for_merge_stores_igvn(PhaseIterGVN& igvn); + void shuffle_macro_nodes(); void sort_macro_nodes(); diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 55a65042584..7a285ee0722 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -3450,12 +3450,23 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { } if (MergeStores && UseUnalignedAccesses) { - if (phase->C->post_loop_opts_phase()) { + if (phase->C->merge_stores_phase()) { MergePrimitiveStores merge(phase, this); Node* progress = merge.run(); if (progress != nullptr) { return progress; } } else { - phase->C->record_for_post_loop_opts_igvn(this); + // We need to wait with merging stores until RangeCheck smearing has removed the RangeChecks during + // the post loops IGVN phase. If we do it earlier, then there may still be some RangeChecks between + // the stores, and we merge the wrong sequence of stores. + // Example: + // StoreI RangeCheck StoreI StoreI RangeCheck StoreI + // Apply MergeStores: + // StoreI RangeCheck [ StoreL ] RangeCheck StoreI + // Remove more RangeChecks: + // StoreI [ StoreL ] StoreI + // But now it would have been better to do this instead: + // [ StoreL ] [ StoreL ] + phase->C->record_for_merge_stores_igvn(this); } } diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp index d1b3d96b76b..fa836eac459 100644 --- a/src/hotspot/share/opto/node.cpp +++ b/src/hotspot/share/opto/node.cpp @@ -508,6 +508,11 @@ Node *Node::clone() const { // If it is applicable, it will happen anyway when the cloned node is registered with IGVN. n->remove_flag(Node::NodeFlags::Flag_for_post_loop_opts_igvn); } + if (for_merge_stores_igvn()) { + // Don't add cloned node to Compile::_for_merge_stores_igvn list automatically. + // If it is applicable, it will happen anyway when the cloned node is registered with IGVN. + n->remove_flag(Node::NodeFlags::Flag_for_merge_stores_igvn); + } if (n->is_ParsePredicate()) { C->add_parse_predicate(n->as_ParsePredicate()); } @@ -615,6 +620,9 @@ void Node::destruct(PhaseValues* phase) { if (for_post_loop_opts_igvn()) { compile->remove_from_post_loop_opts_igvn(this); } + if (for_merge_stores_igvn()) { + compile->remove_from_merge_stores_igvn(this); + } if (is_SafePoint()) { as_SafePoint()->delete_replaced_nodes(); diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp index f6cb9f692bb..f128a2eca1d 100644 --- a/src/hotspot/share/opto/node.hpp +++ b/src/hotspot/share/opto/node.hpp @@ -830,8 +830,9 @@ public: Flag_is_expensive = 1 << 13, Flag_is_predicated_vector = 1 << 14, Flag_for_post_loop_opts_igvn = 1 << 15, - Flag_is_removed_by_peephole = 1 << 16, - Flag_is_predicated_using_blend = 1 << 17, + Flag_for_merge_stores_igvn = 1 << 16, + Flag_is_removed_by_peephole = 1 << 17, + Flag_is_predicated_using_blend = 1 << 18, _last_flag = Flag_is_predicated_using_blend }; @@ -1075,6 +1076,7 @@ public: bool is_scheduled() const { return (_flags & Flag_is_scheduled) != 0; } bool for_post_loop_opts_igvn() const { return (_flags & Flag_for_post_loop_opts_igvn) != 0; } + bool for_merge_stores_igvn() const { return (_flags & Flag_for_merge_stores_igvn) != 0; } // Is 'n' possibly a loop entry (i.e. a Parse Predicate projection)? static bool may_be_loop_entry(Node* n) { diff --git a/src/hotspot/share/opto/phasetype.hpp b/src/hotspot/share/opto/phasetype.hpp index 8015255c03b..318a01e3bd8 100644 --- a/src/hotspot/share/opto/phasetype.hpp +++ b/src/hotspot/share/opto/phasetype.hpp @@ -88,6 +88,7 @@ flags(CCP1, "PhaseCCP 1") \ flags(ITER_GVN2, "Iter GVN 2") \ flags(PHASEIDEALLOOP_ITERATIONS, "PhaseIdealLoop iterations") \ + flags(AFTER_MERGE_STORES, "After Merge Stores") \ flags(BEFORE_MACRO_EXPANSION , "Before Macro Expansion") \ flags(AFTER_MACRO_EXPANSION_STEP, "After Macro Expansion Step") \ flags(AFTER_MACRO_EXPANSION, "After Macro Expansion") \ diff --git a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java index 99e714d8c5f..2e2b2b6661f 100644 --- a/test/hotspot/jtreg/compiler/c2/TestMergeStores.java +++ b/test/hotspot/jtreg/compiler/c2/TestMergeStores.java @@ -49,6 +49,15 @@ import java.util.Random; * @run main compiler.c2.TestMergeStores unaligned */ +/* + * @test + * @bug 8318446 8331054 8331311 8335392 8348959 8351414 + * @summary Test merging of consecutive stores + * @modules java.base/jdk.internal.misc + * @library /test/lib / + * @run main compiler.c2.TestMergeStores StressIGVN + */ + public class TestMergeStores { static int RANGE = 1000; private static final Unsafe UNSAFE = Unsafe.getUnsafe(); @@ -99,6 +108,19 @@ public class TestMergeStores { switch (args[0]) { case "aligned" -> { framework.addFlags("-XX:-UseUnalignedAccesses"); } case "unaligned" -> { framework.addFlags("-XX:+UseUnalignedAccesses"); } + // StressIGVN can mix up the order of RangeCheck smearing and MergeStores optimization, + // if they run in the same IGVN round. When we did not yet have a dedicated IGVN round + // after post loop opts for MergeStrores, it could happen that we would only remove + // RangeChecks after already merging some stores, and now they would have to be split + // up again and re-merged with different stores. Example: + // StoreI RangeCheck StoreI StoreI RangeCheck StoreI + // Apply MergeStores: + // StoreI RangeCheck [ StoreL ] RangeCheck StoreI + // Remove more RangeChecks: + // StoreI [ StoreL ] StoreI + // But now it would have been better to do this instead: + // [ StoreL ] [ StoreL ] + case "StressIGVN" -> { framework.addFlags("-XX:+StressIGVN"); } default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } } framework.start(); diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/CompilePhase.java b/test/hotspot/jtreg/compiler/lib/ir_framework/CompilePhase.java index 316eb5f3ce6..6f4410a0220 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/CompilePhase.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/CompilePhase.java @@ -95,6 +95,7 @@ public enum CompilePhase { CCP1("PhaseCCP 1"), ITER_GVN2("Iter GVN 2"), PHASEIDEALLOOP_ITERATIONS("PhaseIdealLoop iterations"), + AFTER_MERGE_STORES("After Merge Stores"), BEFORE_MACRO_EXPANSION("Before Macro Expansion"), AFTER_MACRO_EXPANSION_STEP("After Macro Expansion Step"), AFTER_MACRO_EXPANSION("After Macro Expansion"),