This commit is contained in:
Zoltan Majo 2015-08-19 10:14:04 +02:00
commit 6d858b63cb
5 changed files with 560 additions and 29 deletions

View File

@ -1175,7 +1175,7 @@ void CountedLoopEndNode::dump_spec(outputStream *st) const {
//=============================================================================
//------------------------------is_member--------------------------------------
// Is 'l' a member of 'this'?
int IdealLoopTree::is_member( const IdealLoopTree *l ) const {
bool IdealLoopTree::is_member(const IdealLoopTree *l) const {
while( l->_nest > _nest ) l = l->_parent;
return l == this;
}

View File

@ -384,7 +384,7 @@ public:
{ }
// Is 'l' a member of 'this'?
int is_member( const IdealLoopTree *l ) const; // Test for nested membership
bool is_member(const IdealLoopTree *l) const; // Test for nested membership
// Set loop nesting depth. Accumulate has_call bits.
int set_nest( uint depth );
@ -1086,6 +1086,8 @@ private:
bool split_up( Node *n, Node *blk1, Node *blk2 );
void sink_use( Node *use, Node *post_loop );
Node *place_near_use( Node *useblock ) const;
Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
void try_move_store_after_loop(Node* n);
bool _created_loop_node;
public:

View File

@ -653,6 +653,209 @@ Node *PhaseIdealLoop::conditional_move( Node *region ) {
return iff->in(1);
}
#ifdef ASSERT
static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
Node* u = m->fast_out(i);
if (u->is_CFG()) {
if (u->Opcode() == Op_NeverBranch) {
u = ((NeverBranchNode*)u)->proj_out(0);
enqueue_cfg_uses(u, wq);
} else {
wq.push(u);
}
}
}
}
#endif
// Try moving a store out of a loop, right before the loop
Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
// Store has to be first in the loop body
IdealLoopTree *n_loop = get_loop(n_ctrl);
if (n->is_Store() && n_loop != _ltree_root && n_loop->is_loop()) {
assert(n->in(0), "store should have control set");
Node* address = n->in(MemNode::Address);
Node* value = n->in(MemNode::ValueIn);
Node* mem = n->in(MemNode::Memory);
IdealLoopTree* address_loop = get_loop(get_ctrl(address));
IdealLoopTree* value_loop = get_loop(get_ctrl(value));
// - address and value must be loop invariant
// - memory must be a memory Phi for the loop
// - Store must be the only store on this memory slice in the
// loop: if there's another store following this one then value
// written at iteration i by the second store could be overwritten
// at iteration i+n by the first store: it's not safe to move the
// first store out of the loop
// - nothing must observe the Phi memory: it guarantees no read
// before the store and no early exit out of the loop
// With those conditions, we are also guaranteed the store post
// dominates the loop head. Otherwise there would be extra Phi
// involved between the loop's Phi and the store.
if (!n_loop->is_member(address_loop) &&
!n_loop->is_member(value_loop) &&
mem->is_Phi() && mem->in(0) == n_loop->_head &&
mem->outcnt() == 1 &&
mem->in(LoopNode::LoopBackControl) == n) {
#ifdef ASSERT
// Verify that store's control does post dominate loop entry and
// that there's no early exit of the loop before the store.
bool ctrl_ok = false;
{
// Follow control from loop head until n, we exit the loop or
// we reach the tail
ResourceMark rm;
Unique_Node_List wq;
wq.push(n_loop->_head);
assert(n_loop->_tail != NULL, "need a tail");
for (uint next = 0; next < wq.size(); ++next) {
Node *m = wq.at(next);
if (m == n->in(0)) {
ctrl_ok = true;
continue;
}
assert(!has_ctrl(m), "should be CFG");
if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
ctrl_ok = false;
break;
}
enqueue_cfg_uses(m, wq);
}
}
assert(ctrl_ok, "bad control");
#endif
// move the Store
_igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
_igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
_igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
// Disconnect the phi now. An empty phi can confuse other
// optimizations in this pass of loop opts.
_igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
n_loop->_body.yank(mem);
IdealLoopTree* new_loop = get_loop(n->in(0));
set_ctrl_and_loop(n, n->in(0));
return n;
}
}
return NULL;
}
// Try moving a store out of a loop, right after the loop
void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
if (n->is_Store()) {
assert(n->in(0), "store should have control set");
Node *n_ctrl = get_ctrl(n);
IdealLoopTree *n_loop = get_loop(n_ctrl);
// Store must be in a loop
if (n_loop != _ltree_root && !n_loop->_irreducible) {
Node* address = n->in(MemNode::Address);
Node* value = n->in(MemNode::ValueIn);
IdealLoopTree* address_loop = get_loop(get_ctrl(address));
// address must be loop invariant
if (!n_loop->is_member(address_loop)) {
// Store must be last on this memory slice in the loop and
// nothing in the loop must observe it
Node* phi = NULL;
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u = n->fast_out(i);
if (has_ctrl(u)) { // control use?
IdealLoopTree *u_loop = get_loop(get_ctrl(u));
if (!n_loop->is_member(u_loop)) {
continue;
}
if (u->is_Phi() && u->in(0) == n_loop->_head) {
assert(_igvn.type(u) == Type::MEMORY, "bad phi");
assert(phi == NULL, "already found");
phi = u;
continue;
}
}
phi = NULL;
break;
}
if (phi != NULL) {
// Nothing in the loop before the store (next iteration)
// must observe the stored value
bool mem_ok = true;
{
ResourceMark rm;
Unique_Node_List wq;
wq.push(phi);
for (uint next = 0; next < wq.size() && mem_ok; ++next) {
Node *m = wq.at(next);
for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
Node* u = m->fast_out(i);
if (u->is_Store() || u->is_Phi()) {
if (u != n) {
wq.push(u);
mem_ok = (wq.size() <= 10);
}
} else {
mem_ok = false;
break;
}
}
}
}
if (mem_ok) {
// Move the Store out of the loop creating clones along
// all paths out of the loop that observe the stored value
_igvn.rehash_node_delayed(phi);
int count = phi->replace_edge(n, n->in(MemNode::Memory));
assert(count > 0, "inconsistent phi");
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u = n->fast_out(i);
Node* c = get_ctrl(u);
if (u->is_Phi()) {
c = u->in(0)->in(u->find_edge(n));
}
IdealLoopTree *u_loop = get_loop(c);
assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop");
while(true) {
Node* next_c = find_non_split_ctrl(idom(c));
if (n_loop->is_member(get_loop(next_c))) {
break;
}
c = next_c;
}
Node* st = n->clone();
st->set_req(0, c);
_igvn.register_new_node_with_optimizer(st);
set_ctrl(st, c);
IdealLoopTree* new_loop = get_loop(c);
assert(new_loop != n_loop, "should be moved out of loop");
if (new_loop->_child == NULL) new_loop->_body.push(st);
_igvn.replace_input_of(u, u->find_edge(n), st);
--imax;
--i;
}
assert(n->outcnt() == 0, "all uses should be gone");
_igvn.replace_input_of(n, MemNode::Memory, C->top());
// Disconnect the phi now. An empty phi can confuse other
// optimizations in this pass of loop opts..
if (phi->in(LoopNode::LoopBackControl) == phi) {
_igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
n_loop->_body.yank(phi);
}
}
}
}
}
}
}
//------------------------------split_if_with_blocks_pre-----------------------
// Do the real work in a non-recursive function. Data nodes want to be
// cloned in the pre-order so they can feed each other nicely.
@ -683,6 +886,11 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
Node *n_ctrl = get_ctrl(n);
if( !n_ctrl ) return n; // Dead node
Node* res = try_move_store_before_loop(n, n_ctrl);
if (res != NULL) {
return n;
}
// Attempt to remix address expressions for loop invariants
Node *m = remix_address_expressions( n );
if( m ) return m;
@ -691,16 +899,18 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
// Returns the block to clone thru.
Node *n_blk = has_local_phi_input( n );
if( !n_blk ) return n;
// Do not clone the trip counter through on a CountedLoop
// (messes up the canonical shape).
if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
// Check for having no control input; not pinned. Allow
// dominating control.
if( n->in(0) ) {
if (n->in(0)) {
Node *dom = idom(n_blk);
if( dom_lca( n->in(0), dom ) != n->in(0) )
if (dom_lca(n->in(0), dom) != n->in(0)) {
return n;
}
}
// Policy: when is it profitable. You must get more wins than
// policy before it is considered profitable. Policy is usually 0,
@ -1029,6 +1239,8 @@ void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
}
}
try_move_store_after_loop(n);
// Check for Opaque2's who's loop has disappeared - who's input is in the
// same loop nest as their output. Remove 'em, they are no longer useful.
if( n_op == Op_Opaque2 &&

View File

@ -2371,40 +2371,47 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
Node* mem = in(MemNode::Memory);
Node* address = in(MemNode::Address);
// Back-to-back stores to same address? Fold em up. Generally
// unsafe if I have intervening uses... Also disallowed for StoreCM
// since they must follow each StoreP operation. Redundant StoreCMs
// are eliminated just before matching in final_graph_reshape.
if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) &&
mem->Opcode() != Op_StoreCM) {
// Looking at a dead closed cycle of memory?
assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
{
Node* st = mem;
// If Store 'st' has more than one use, we cannot fold 'st' away.
// For example, 'st' might be the final state at a conditional
// return. Or, 'st' might be used by some node which is live at
// the same time 'st' is live, which might be unschedulable. So,
// require exactly ONE user until such time as we clone 'mem' for
// each of 'mem's uses (thus making the exactly-1-user-rule hold
// true).
while (st->is_Store() && st->outcnt() == 1 && st->Opcode() != Op_StoreCM) {
// Looking at a dead closed cycle of memory?
assert(st != st->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
assert(Opcode() == st->Opcode() ||
st->Opcode() == Op_StoreVector ||
Opcode() == Op_StoreVector ||
phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw ||
(Opcode() == Op_StoreL && st->Opcode() == Op_StoreI), // expanded ClearArrayNode
err_msg_res("no mismatched stores, except on raw memory: %s %s", NodeClassNames[Opcode()], NodeClassNames[st->Opcode()]));
assert(Opcode() == mem->Opcode() ||
phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw,
"no mismatched stores, except on raw memory");
if (mem->outcnt() == 1 && // check for intervening uses
mem->as_Store()->memory_size() <= this->memory_size()) {
// If anybody other than 'this' uses 'mem', we cannot fold 'mem' away.
// For example, 'mem' might be the final state at a conditional return.
// Or, 'mem' might be used by some node which is live at the same time
// 'this' is live, which might be unschedulable. So, require exactly
// ONE user, the 'this' store, until such time as we clone 'mem' for
// each of 'mem's uses (thus making the exactly-1-user-rule hold true).
if (can_reshape) { // (%%% is this an anachronism?)
set_req_X(MemNode::Memory, mem->in(MemNode::Memory),
phase->is_IterGVN());
} else {
// It's OK to do this in the parser, since DU info is always accurate,
// and the parser always refers to nodes via SafePointNode maps.
set_req(MemNode::Memory, mem->in(MemNode::Memory));
if (st->in(MemNode::Address)->eqv_uncast(address) &&
st->as_Store()->memory_size() <= this->memory_size()) {
Node* use = st->raw_out(0);
phase->igvn_rehash_node_delayed(use);
if (can_reshape) {
use->set_req_X(MemNode::Memory, st->in(MemNode::Memory), phase->is_IterGVN());
} else {
// It's OK to do this in the parser, since DU info is always accurate,
// and the parser always refers to nodes via SafePointNode maps.
use->set_req(MemNode::Memory, st->in(MemNode::Memory));
}
return this;
}
return this;
st = st->in(MemNode::Memory);
}
}
// Capture an unaliased, unconditional, simple store into an initializer.
// Or, if it is independent of the allocation, hoist it above the allocation.
if (ReduceFieldZeroing && /*can_reshape &&*/

View File

@ -0,0 +1,310 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @test
* @bug 8080289
* @summary Sink stores out of loops if possible
* @run main/othervm -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+PrintCompilation -XX:CompileCommand=dontinline,TestMoveStoresOutOfLoops::test* TestMoveStoresOutOfLoops
*
*/
import java.lang.reflect.*;
import java.util.*;
import java.util.function.*;
public class TestMoveStoresOutOfLoops {
private static long[] array = new long[10];
private static long[] array2 = new long[10];
private static boolean[] array3 = new boolean[1000];
private static byte[] byte_array = new byte[10];
// Array store should be moved out of the loop, value stored
// should be 999, the loop should be eliminated
static void test_after_1(int idx) {
for (int i = 0; i < 1000; i++) {
array[idx] = i;
}
}
// Array store can't be moved out of loop because of following
// non loop invariant array access
static void test_after_2(int idx) {
for (int i = 0; i < 1000; i++) {
array[idx] = i;
array2[i%10] = i;
}
}
// Array store can't be moved out of loop because of following
// use
static void test_after_3(int idx) {
for (int i = 0; i < 1000; i++) {
array[idx] = i;
if (array[0] == -1) {
break;
}
}
}
// Array store can't be moved out of loop because of preceding
// use
static void test_after_4(int idx) {
for (int i = 0; i < 1000; i++) {
if (array[0] == -2) {
break;
}
array[idx] = i;
}
}
// All array stores should be moved out of the loop, one after
// the other
static void test_after_5(int idx) {
for (int i = 0; i < 1000; i++) {
array[idx] = i;
array[idx+1] = i;
array[idx+2] = i;
array[idx+3] = i;
array[idx+4] = i;
array[idx+5] = i;
}
}
// Array store can be moved after the loop but needs to be
// cloned on both exit paths
static void test_after_6(int idx) {
for (int i = 0; i < 1000; i++) {
array[idx] = i;
if (array3[i]) {
return;
}
}
}
// Optimize out redundant stores
static void test_stores_1(int ignored) {
array[0] = 0;
array[1] = 1;
array[2] = 2;
array[0] = 0;
array[1] = 1;
array[2] = 2;
}
static void test_stores_2(int idx) {
array[idx+0] = 0;
array[idx+1] = 1;
array[idx+2] = 2;
array[idx+0] = 0;
array[idx+1] = 1;
array[idx+2] = 2;
}
static void test_stores_3(int idx) {
byte_array[idx+0] = 0;
byte_array[idx+1] = 1;
byte_array[idx+2] = 2;
byte_array[idx+0] = 0;
byte_array[idx+1] = 1;
byte_array[idx+2] = 2;
}
// Array store can be moved out of the loop before the loop header
static void test_before_1(int idx) {
for (int i = 0; i < 1000; i++) {
array[idx] = 999;
}
}
// Array store can't be moved out of the loop before the loop
// header because there's more than one store on this slice
static void test_before_2(int idx) {
for (int i = 0; i < 1000; i++) {
array[idx] = 999;
array[i%2] = 0;
}
}
// Array store can't be moved out of the loop before the loop
// header because of use before store
static int test_before_3(int idx) {
int res = 0;
for (int i = 0; i < 1000; i++) {
res += array[i%10];
array[idx] = 999;
}
return res;
}
// Array store can't be moved out of the loop before the loop
// header because of possible early exit
static void test_before_4(int idx) {
for (int i = 0; i < 1000; i++) {
if (idx / (i+1) > 0) {
return;
}
array[idx] = 999;
}
}
// Array store can't be moved out of the loop before the loop
// header because it doesn't postdominate the loop head
static void test_before_5(int idx) {
for (int i = 0; i < 1000; i++) {
if (i % 2 == 0) {
array[idx] = 999;
}
}
}
// Array store can be moved out of the loop before the loop header
static int test_before_6(int idx) {
int res = 0;
for (int i = 0; i < 1000; i++) {
if (i%2 == 1) {
res *= 2;
} else {
res++;
}
array[idx] = 999;
}
return res;
}
final HashMap<String,Method> tests = new HashMap<>();
{
for (Method m : this.getClass().getDeclaredMethods()) {
if (m.getName().matches("test_(before|after|stores)_[0-9]+")) {
assert(Modifier.isStatic(m.getModifiers())) : m;
tests.put(m.getName(), m);
}
}
}
boolean success = true;
void doTest(String name, Runnable init, Function<String, Boolean> check) throws Exception {
Method m = tests.get(name);
for (int i = 0; i < 20000; i++) {
init.run();
m.invoke(null, 0);
success = success && check.apply(name);
if (!success) {
break;
}
}
}
static void array_init() {
array[0] = -1;
}
static boolean array_check(String name) {
boolean success = true;
if (array[0] != 999) {
success = false;
System.out.println(name + " failed: array[0] = " + array[0]);
}
return success;
}
static void array_init2() {
for (int i = 0; i < 6; i++) {
array[i] = -1;
}
}
static boolean array_check2(String name) {
boolean success = true;
for (int i = 0; i < 6; i++) {
if (array[i] != 999) {
success = false;
System.out.println(name + " failed: array[" + i + "] = " + array[i]);
}
}
return success;
}
static void array_init3() {
for (int i = 0; i < 3; i++) {
array[i] = -1;
}
}
static boolean array_check3(String name) {
boolean success = true;
for (int i = 0; i < 3; i++) {
if (array[i] != i) {
success = false;
System.out.println(name + " failed: array[" + i + "] = " + array[i]);
}
}
return success;
}
static void array_init4() {
for (int i = 0; i < 3; i++) {
byte_array[i] = -1;
}
}
static boolean array_check4(String name) {
boolean success = true;
for (int i = 0; i < 3; i++) {
if (byte_array[i] != i) {
success = false;
System.out.println(name + " failed: byte_array[" + i + "] = " + byte_array[i]);
}
}
return success;
}
static public void main(String[] args) throws Exception {
TestMoveStoresOutOfLoops test = new TestMoveStoresOutOfLoops();
test.doTest("test_after_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_after_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_after_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_after_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_after_5", TestMoveStoresOutOfLoops::array_init2, TestMoveStoresOutOfLoops::array_check2);
test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
array3[999] = true;
test.doTest("test_after_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_stores_1", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3);
test.doTest("test_stores_2", TestMoveStoresOutOfLoops::array_init3, TestMoveStoresOutOfLoops::array_check3);
test.doTest("test_stores_3", TestMoveStoresOutOfLoops::array_init4, TestMoveStoresOutOfLoops::array_check4);
test.doTest("test_before_1", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_before_2", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_before_3", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_before_4", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_before_5", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
test.doTest("test_before_6", TestMoveStoresOutOfLoops::array_init, TestMoveStoresOutOfLoops::array_check);
if (!test.success) {
throw new RuntimeException("Some tests failed");
}
}
}