8259609: C2: optimize long range checks in long counted loops

Co-authored-by: John R Rose <jrose@openjdk.org>
Reviewed-by: thartmann, jrose
This commit is contained in:
Roland Westrelin 2021-10-26 15:53:37 +00:00
parent 574f8903ee
commit 82f4aacb42
13 changed files with 911 additions and 178 deletions

View File

@ -1107,6 +1107,7 @@ Node* MaxNode::build_min_max(Node* a, Node* b, bool is_max, bool is_unsigned, co
bool is_int = gvn.type(a)->isa_int();
assert(is_int || gvn.type(a)->isa_long(), "int or long inputs");
assert(is_int == (gvn.type(b)->isa_int() != NULL), "inconsistent inputs");
BasicType bt = is_int ? T_INT: T_LONG;
Node* hook = NULL;
if (gvn.is_IterGVN()) {
// Make sure a and b are not destroyed
@ -1115,48 +1116,23 @@ Node* MaxNode::build_min_max(Node* a, Node* b, bool is_max, bool is_unsigned, co
hook->init_req(1, b);
}
Node* res = NULL;
if (!is_unsigned) {
if (is_int && !is_unsigned) {
if (is_max) {
if (is_int) {
res = gvn.transform(new MaxINode(a, b));
assert(gvn.type(res)->is_int()->_lo >= t->is_int()->_lo && gvn.type(res)->is_int()->_hi <= t->is_int()->_hi, "type doesn't match");
} else {
Node* cmp = gvn.transform(new CmpLNode(a, b));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveLNode(bol, a, b, t->is_long()));
}
res = gvn.transform(new MaxINode(a, b));
assert(gvn.type(res)->is_int()->_lo >= t->is_int()->_lo && gvn.type(res)->is_int()->_hi <= t->is_int()->_hi, "type doesn't match");
} else {
if (is_int) {
Node* res = gvn.transform(new MinINode(a, b));
assert(gvn.type(res)->is_int()->_lo >= t->is_int()->_lo && gvn.type(res)->is_int()->_hi <= t->is_int()->_hi, "type doesn't match");
} else {
Node* cmp = gvn.transform(new CmpLNode(b, a));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveLNode(bol, a, b, t->is_long()));
}
Node* res = gvn.transform(new MinINode(a, b));
assert(gvn.type(res)->is_int()->_lo >= t->is_int()->_lo && gvn.type(res)->is_int()->_hi <= t->is_int()->_hi, "type doesn't match");
}
} else {
Node* cmp = NULL;
if (is_max) {
if (is_int) {
Node* cmp = gvn.transform(new CmpUNode(a, b));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveINode(bol, a, b, t->is_int()));
} else {
Node* cmp = gvn.transform(new CmpULNode(a, b));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveLNode(bol, a, b, t->is_long()));
}
cmp = gvn.transform(CmpNode::make(a, b, bt, is_unsigned));
} else {
if (is_int) {
Node* cmp = gvn.transform(new CmpUNode(b, a));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveINode(bol, a, b, t->is_int()));
} else {
Node* cmp = gvn.transform(new CmpULNode(b, a));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveLNode(bol, a, b, t->is_long()));
}
cmp = gvn.transform(CmpNode::make(b, a, bt, is_unsigned));
}
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(CMoveNode::make(NULL, bol, a, b, t));
}
if (hook != NULL) {
hook->destruct(&gvn);
@ -1168,12 +1144,8 @@ Node* MaxNode::build_min_max_diff_with_zero(Node* a, Node* b, bool is_max, const
bool is_int = gvn.type(a)->isa_int();
assert(is_int || gvn.type(a)->isa_long(), "int or long inputs");
assert(is_int == (gvn.type(b)->isa_int() != NULL), "inconsistent inputs");
Node* zero = NULL;
if (is_int) {
zero = gvn.intcon(0);
} else {
zero = gvn.longcon(0);
}
BasicType bt = is_int ? T_INT: T_LONG;
Node* zero = gvn.integercon(0, bt);
Node* hook = NULL;
if (gvn.is_IterGVN()) {
// Make sure a and b are not destroyed
@ -1181,32 +1153,15 @@ Node* MaxNode::build_min_max_diff_with_zero(Node* a, Node* b, bool is_max, const
hook->init_req(0, a);
hook->init_req(1, b);
}
Node* res = NULL;
Node* cmp = NULL;
if (is_max) {
if (is_int) {
Node* cmp = gvn.transform(new CmpINode(a, b));
Node* sub = gvn.transform(new SubINode(a, b));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveINode(bol, sub, zero, t->is_int()));
} else {
Node* cmp = gvn.transform(new CmpLNode(a, b));
Node* sub = gvn.transform(new SubLNode(a, b));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveLNode(bol, sub, zero, t->is_long()));
}
cmp = gvn.transform(CmpNode::make(a, b, bt, false));
} else {
if (is_int) {
Node* cmp = gvn.transform(new CmpINode(b, a));
Node* sub = gvn.transform(new SubINode(a, b));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveINode(bol, sub, zero, t->is_int()));
} else {
Node* cmp = gvn.transform(new CmpLNode(b, a));
Node* sub = gvn.transform(new SubLNode(a, b));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
res = gvn.transform(new CMoveLNode(bol, sub, zero, t->is_long()));
}
cmp = gvn.transform(CmpNode::make(b, a, bt, false));
}
Node* sub = gvn.transform(SubNode::make(a, b, bt));
Node* bol = gvn.transform(new BoolNode(cmp, BoolTest::lt));
Node* res = gvn.transform(CMoveNode::make(NULL, bol, sub, zero, t));
if (hook != NULL) {
hook->destruct(&gvn);
}

View File

@ -712,7 +712,8 @@ class Invariance : public StackObj {
// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format
// Note: this function is particularly designed for loop predication. We require load_range
// and offset to be loop invariant computed on the fly by "invar"
bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar DEBUG_ONLY(COMMA ProjNode *predicate_proj)) const {
bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, BasicType bt, Node *iv, Node *&range,
Node *&offset, jlong &scale) const {
if (!is_loop_exit(iff)) {
return false;
}
@ -727,50 +728,62 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invari
return false;
}
const CmpNode *cmp = bol->in(1)->as_Cmp();
if (cmp->Opcode() != Op_CmpU) {
if (!(cmp->is_Cmp() && cmp->operates_on(bt, false))) {
return false;
}
Node* range = cmp->in(2);
if (range->Opcode() != Op_LoadRange && !iff->is_RangeCheck()) {
const TypeInt* tint = phase->_igvn.type(range)->isa_int();
if (tint == NULL || tint->empty() || tint->_lo < 0) {
range = cmp->in(2);
if (range->Opcode() != Op_LoadRange) {
const TypeInteger* tinteger = phase->_igvn.type(range)->isa_integer(bt);
if (tinteger == NULL || tinteger->empty() || tinteger->lo_as_long() < 0) {
// Allow predication on positive values that aren't LoadRanges.
// This allows optimization of loops where the length of the
// array is a known value and doesn't need to be loaded back
// from the array.
return false;
}
} else {
assert(bt == T_INT, "no LoadRange for longs");
}
if (!invar.is_invariant(range)) {
scale = 0;
offset = NULL;
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset, bt)) {
return false;
}
Node *iv = _head->as_CountedLoop()->phi();
int scale = 0;
Node *offset = NULL;
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) {
return false;
}
if (offset && !invar.is_invariant(offset)) { // offset must be invariant
return false;
}
#ifdef ASSERT
if (offset && phase->has_ctrl(offset)) {
Node* offset_ctrl = phase->get_ctrl(offset);
if (phase->get_loop(predicate_proj) == phase->get_loop(offset_ctrl) &&
phase->is_dominator(predicate_proj, offset_ctrl)) {
// If the control of offset is loop predication promoted by previous pass,
// then it will lead to cyclic dependency.
// Previously promoted loop predication is in the same loop of predication
// point.
// This situation can occur when pinning nodes too conservatively - can we do better?
assert(false, "cyclic dependency prevents range check elimination, idx: offset %d, offset_ctrl %d, predicate_proj %d",
offset->_idx, offset_ctrl->_idx, predicate_proj->_idx);
}
}
#endif
return true;
}
bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar DEBUG_ONLY(COMMA ProjNode *predicate_proj)) const {
Node* range = NULL;
Node* offset = NULL;
jlong scale = 0;
Node* iv = _head->as_BaseCountedLoop()->phi();
if (is_range_check_if(iff, phase, T_INT, iv, range, offset, scale)) {
if (!invar.is_invariant(range)) {
return false;
}
if (offset && !invar.is_invariant(offset)) { // offset must be invariant
return false;
}
#ifdef ASSERT
if (offset && phase->has_ctrl(offset)) {
Node* offset_ctrl = phase->get_ctrl(offset);
if (phase->get_loop(predicate_proj) == phase->get_loop(offset_ctrl) &&
phase->is_dominator(predicate_proj, offset_ctrl)) {
// If the control of offset is loop predication promoted by previous pass,
// then it will lead to cyclic dependency.
// Previously promoted loop predication is in the same loop of predication
// point.
// This situation can occur when pinning nodes too conservatively - can we do better?
assert(false, "cyclic dependency prevents range check elimination, idx: offset %d, offset_ctrl %d, predicate_proj %d",
offset->_idx, offset_ctrl->_idx, predicate_proj->_idx);
}
}
#endif
return true;
}
return false;
}
//------------------------------rc_predicate-----------------------------------
// Create a range check predicate
//

View File

@ -1059,27 +1059,35 @@ void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLo
}
}
//------------------------------policy_range_check-----------------------------
// Return TRUE or FALSE if the loop should be range-check-eliminated or not.
// When TRUE, the estimated node budget is also requested.
//
// We will actually perform iteration-splitting, a more powerful form of RCE.
bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
if (!RangeCheckElimination) return false;
bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional) const {
if (!provisional && !RangeCheckElimination) return false;
// If nodes are depleted, some transform has miscalculated its needs.
assert(!phase->exceeding_node_budget(), "sanity");
assert(provisional || !phase->exceeding_node_budget(), "sanity");
CountedLoopNode *cl = _head->as_CountedLoop();
// If we unrolled with no intention of doing RCE and we later changed our
// minds, we got no pre-loop. Either we need to make a new pre-loop, or we
// have to disallow RCE.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
if (_head->is_CountedLoop()) {
CountedLoopNode *cl = _head->as_CountedLoop();
// If we unrolled with no intention of doing RCE and we later changed our
// minds, we got no pre-loop. Either we need to make a new pre-loop, or we
// have to disallow RCE.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
// check for vectorized loops, some opts are no longer needed
// RCE needs pre/main/post loops. Don't apply it on a single iteration loop.
if (cl->is_unroll_only() || (cl->is_normal_loop() && cl->trip_count() == 1)) return false;
} else {
assert(provisional, "no long counted loop expected");
}
BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
Node *trip_counter = cl->phi();
// check for vectorized loops, some opts are no longer needed
// RCE needs pre/main/post loops. Don't apply it on a single iteration loop.
if (cl->is_unroll_only() || (cl->is_normal_loop() && cl->trip_count() == 1)) return false;
BasicType bt = cl->bt();
// Check loop body for tests of trip-counter plus loop-invariant vs
// loop-invariant.
@ -1104,22 +1112,32 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
Node *rc_exp = cmp->in(1);
Node *limit = cmp->in(2);
Node *limit_c = phase->get_ctrl(limit);
if (limit_c == phase->C->top()) {
return false; // Found dead test on live IF? No RCE!
}
if (is_member(phase->get_loop(limit_c))) {
// Compare might have operands swapped; commute them
rc_exp = cmp->in(2);
limit = cmp->in(1);
limit_c = phase->get_ctrl(limit);
if (is_member(phase->get_loop(limit_c))) {
continue; // Both inputs are loop varying; cannot RCE
if (provisional) {
// Try to pattern match with either cmp inputs, do not check
// whether one of the inputs is loop independent as it may not
// have had a chance to be hoisted yet.
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), trip_counter, NULL, NULL, bt) &&
!phase->is_scaled_iv_plus_offset(cmp->in(2), trip_counter, NULL, NULL, bt)) {
continue;
}
} else {
Node *limit_c = phase->get_ctrl(limit);
if (limit_c == phase->C->top()) {
return false; // Found dead test on live IF? No RCE!
}
if (is_member(phase->get_loop(limit_c))) {
// Compare might have operands swapped; commute them
rc_exp = cmp->in(2);
limit = cmp->in(1);
limit_c = phase->get_ctrl(limit);
if (is_member(phase->get_loop(limit_c))) {
continue; // Both inputs are loop varying; cannot RCE
}
}
}
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
continue;
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
continue;
}
}
// Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
// projections. If BOTH are in the loop we need loop unswitching instead
@ -1127,7 +1145,7 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
if (is_loop_exit(iff)) {
// Found valid reason to split iterations (if there is room).
// NOTE: Usually a gross overestimate.
return phase->may_require_nodes(est_loop_clone_sz(2));
return provisional || phase->may_require_nodes(est_loop_clone_sz(2));
}
} // End of is IF
}
@ -2458,8 +2476,9 @@ void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* off
//------------------------------is_scaled_iv---------------------------------
// Return true if exp is a constant times an induction var
bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt) {
exp = exp->uncast();
assert(bt == T_INT || bt == T_LONG, "unexpected int type");
if (exp == iv) {
if (p_scale != NULL) {
*p_scale = 1;
@ -2467,23 +2486,29 @@ bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
return true;
}
int opc = exp->Opcode();
if (opc == Op_MulI) {
// Can't use is_Mul() here as it's true for AndI and AndL
if ((opc == Op_MulI || opc == Op_MulL) && exp->operates_on(bt, true)) {
if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
if (p_scale != NULL) {
*p_scale = exp->in(2)->get_int();
*p_scale = exp->in(2)->get_integer_as_long(bt);
}
return true;
}
if (exp->in(2)->uncast() == iv && exp->in(1)->is_Con()) {
if (p_scale != NULL) {
*p_scale = exp->in(1)->get_int();
*p_scale = exp->in(1)->get_integer_as_long(bt);
}
return true;
}
} else if (opc == Op_LShiftI) {
} else if (exp->is_LShift() && exp->operates_on(bt, true)) {
if (exp->in(1)->uncast() == iv && exp->in(2)->is_Con()) {
if (p_scale != NULL) {
*p_scale = 1 << exp->in(2)->get_int();
jint shift_amount = exp->in(2)->get_int();
if (bt == T_INT) {
*p_scale = java_shift_left((jint)1, (juint)shift_amount);
} else if (bt == T_LONG) {
*p_scale = java_shift_left((jlong)1, (julong)shift_amount);
}
}
return true;
}
@ -2493,25 +2518,25 @@ bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
//-----------------------------is_scaled_iv_plus_offset------------------------------
// Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
if (is_scaled_iv(exp, iv, p_scale)) {
bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, int depth) {
assert(bt == T_INT || bt == T_LONG, "unexpected int type");
if (is_scaled_iv(exp, iv, p_scale, bt)) {
if (p_offset != NULL) {
Node *zero = _igvn.intcon(0);
Node *zero = _igvn.integercon(0, bt);
set_ctrl(zero, C->root());
*p_offset = zero;
}
return true;
}
exp = exp->uncast();
int opc = exp->Opcode();
if (opc == Op_AddI) {
if (is_scaled_iv(exp->in(1), iv, p_scale)) {
if (exp->is_Add() && exp->operates_on(bt, true)) {
if (is_scaled_iv(exp->in(1), iv, p_scale, bt)) {
if (p_offset != NULL) {
*p_offset = exp->in(2);
}
return true;
}
if (is_scaled_iv(exp->in(2), iv, p_scale)) {
if (is_scaled_iv(exp->in(2), iv, p_scale, bt)) {
if (p_offset != NULL) {
*p_offset = exp->in(1);
}
@ -2521,30 +2546,34 @@ bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale,
Node* offset2 = NULL;
if (depth < 2 &&
is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
p_offset != NULL ? &offset2 : NULL, depth+1)) {
p_offset != NULL ? &offset2 : NULL, bt, depth+1)) {
if (p_offset != NULL) {
Node *ctrl_off2 = get_ctrl(offset2);
Node* offset = new AddINode(offset2, exp->in(2));
Node* offset = AddNode::make(offset2, exp->in(2), bt);
register_new_node(offset, ctrl_off2);
*p_offset = offset;
}
return true;
}
}
} else if (opc == Op_SubI) {
if (is_scaled_iv(exp->in(1), iv, p_scale)) {
} else if (exp->is_Sub() && exp->operates_on(bt, true)) {
if (is_scaled_iv(exp->in(1), iv, p_scale, bt)) {
if (p_offset != NULL) {
Node *zero = _igvn.intcon(0);
Node *zero = _igvn.integercon(0, bt);
set_ctrl(zero, C->root());
Node *ctrl_off = get_ctrl(exp->in(2));
Node* offset = new SubINode(zero, exp->in(2));
Node* offset = SubNode::make(zero, exp->in(2), bt);
register_new_node(offset, ctrl_off);
*p_offset = offset;
}
return true;
}
if (is_scaled_iv(exp->in(2), iv, p_scale)) {
if (is_scaled_iv(exp->in(2), iv, p_scale, bt)) {
if (p_offset != NULL) {
// We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
if (*p_scale == min_signed_integer(bt)) {
return false;
}
*p_scale *= -1;
*p_offset = exp->in(1);
}
@ -3411,7 +3440,7 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
// unrolling), plus any needed for RCE purposes.
bool should_unroll = policy_unroll(phase);
bool should_rce = policy_range_check(phase);
bool should_rce = policy_range_check(phase, false);
// If not RCE'ing (iteration splitting), then we do not need a pre-loop.
// We may still need to peel an initial iteration but we will not

View File

@ -525,7 +525,7 @@ static bool condition_stride_ok(BoolTest::mask bt, jlong stride_con) {
return true;
}
void PhaseIdealLoop::long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head) {
Node* PhaseIdealLoop::long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head) {
Node* iv_as_long = new ConvI2LNode(inner_iv, TypeLong::INT);
register_new_node(iv_as_long, inner_head);
Node* iv_replacement = new AddLNode(outer_phi, iv_as_long);
@ -546,6 +546,7 @@ void PhaseIdealLoop::long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_
int nb = u->replace_edge(iv_to_replace, iv_replacement, &_igvn);
i -= nb;
}
return iv_replacement;
}
void PhaseIdealLoop::add_empty_predicate(Deoptimization::DeoptReason reason, Node* inner_head, IdealLoopTree* loop, SafePointNode* sfpt) {
@ -828,6 +829,9 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
Node* entry_control = x->in(LoopNode::EntryControl);
Node* cmp = exit_test->cmp_node();
Node_List range_checks;
iters_limit = extract_long_range_checks(loop, stride_con, iters_limit, phi, range_checks);
// Clone the control flow of the loop to build an outer loop
Node* outer_back_branch = back_control->clone();
Node* outer_exit_test = new IfNode(exit_test->in(0), exit_test->in(1), exit_test->_prob, exit_test->_fcnt);
@ -877,22 +881,22 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
Node* inner_iters_actual_int = new ConvL2INode(inner_iters_actual);
_igvn.register_new_node_with_optimizer(inner_iters_actual_int);
Node* zero = _igvn.intcon(0);
set_ctrl(zero, C->root());
Node* int_zero = _igvn.intcon(0);
set_ctrl(int_zero, C->root());
if (stride_con < 0) {
inner_iters_actual_int = new SubINode(zero, inner_iters_actual_int);
inner_iters_actual_int = new SubINode(int_zero, inner_iters_actual_int);
_igvn.register_new_node_with_optimizer(inner_iters_actual_int);
}
// Clone the iv data nodes as an integer iv
Node* int_stride = _igvn.intcon((int)stride_con);
Node* int_stride = _igvn.intcon(checked_cast<int>(stride_con));
set_ctrl(int_stride, C->root());
Node* inner_phi = new PhiNode(x->in(0), TypeInt::INT);
Node* inner_incr = new AddINode(inner_phi, int_stride);
Node* inner_cmp = NULL;
inner_cmp = new CmpINode(inner_incr, inner_iters_actual_int);
Node* inner_bol = new BoolNode(inner_cmp, exit_test->in(1)->as_Bool()->_test._test);
inner_phi->set_req(LoopNode::EntryControl, zero);
inner_phi->set_req(LoopNode::EntryControl, int_zero);
inner_phi->set_req(LoopNode::LoopBackControl, inner_incr);
register_new_node(inner_phi, x);
register_new_node(inner_incr, x);
@ -915,7 +919,7 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
// Replace inner loop long iv phi as inner loop int iv phi + outer
// loop iv phi
long_loop_replace_long_iv(phi, inner_phi, outer_phi, head);
Node* iv_add = long_loop_replace_long_iv(phi, inner_phi, outer_phi, head);
// Replace inner loop long iv incr with inner loop int incr + outer
// loop iv phi
@ -979,6 +983,8 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
// exit_branch: break; //in(0) := outer_exit_test
// }
transform_long_range_checks(checked_cast<int>(stride_con), range_checks, outer_phi, inner_iters_actual_int,
inner_phi, iv_add, inner_head);
// Peel one iteration of the loop and use the safepoint at the end
// of the peeled iteration to insert empty predicates. If no well
// positioned safepoint peel to guarantee a safepoint in the outer
@ -1012,6 +1018,239 @@ bool PhaseIdealLoop::transform_long_counted_loop(IdealLoopTree* loop, Node_List
return true;
}
int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong stride_con, int iters_limit, PhiNode* phi,
Node_List& range_checks) {
if (stride_con < 0) { // only for stride_con > 0 && scale > 0 for now
return iters_limit;
}
const jlong min_iters = 2;
jlong reduced_iters_limit = iters_limit;
jlong original_iters_limit = iters_limit;
for (uint i = 0; i < loop->_body.size(); i++) {
Node* c = loop->_body.at(i);
if (c->is_IfProj() && c->in(0)->is_RangeCheck()) {
CallStaticJavaNode* call = c->as_IfProj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
if (call != NULL) {
Node* range = NULL;
Node* offset = NULL;
jlong scale = 0;
RangeCheckNode* rc = c->in(0)->as_RangeCheck();
if (loop->is_range_check_if(rc, this, T_LONG, phi, range, offset, scale) &&
loop->is_invariant(range) && loop->is_invariant(offset) &&
scale > 0 && // only for stride_con > 0 && scale > 0 for now
original_iters_limit / ABS(scale * stride_con) >= min_iters) {
reduced_iters_limit = MIN2(reduced_iters_limit, original_iters_limit/ABS(scale));
range_checks.push(c);
}
}
}
}
return checked_cast<int>(reduced_iters_limit);
}
// One execution of the inner loop covers a sub-range of the entire iteration range of the loop: [A,Z), aka [A=init,
// Z=limit). If the loop has at least one trip (which is the case here), the iteration variable i always takes A as its
// first value, followed by A+S (S is the stride), next A+2S, etc. The limit is exclusive, so that the final value B of
// i is never Z. It will be B=Z-1 if S=1, or B=Z+1 if S=-1. If |S|>1 the formula for the last value requires a floor
// operation, specifically B=floor((Z-sgn(S)-A)/S)*S+A. Thus i ranges as i:[A,B] or i:[A,Z) or i:[A,Z-U) for some U<S.
// N.B. We handle only the case of positive S currently, so comments about S<0 are not operative at present. Also,
// we only support positive index scale value (K > 0) to simplify the logic for clamping 32-bit bounds (L_2, R_2).
// For restrictions on S and K, see the guards in extract_long_range_checks.
// Within the loop there may be many range checks. Each such range check (R.C.) is of the form 0 <= i*K+L < R, where K
// is a scale factor applied to the loop iteration variable i, and L is some offset; K, L, and R are loop-invariant.
// Because R is never negative, this check can always be simplified to an unsigned check i*K+L <u R.
// When a long loop over a 64-bit variable i (outer_iv) is decomposed into a series of shorter sub-loops over a 32-bit
// variable j (inner_iv), j ranges over a shorter interval j:[0,Z_2), where the limit is chosen to prevent various cases
// of 32-bit overflow (including multiplications j*K below). In the sub-loop the logical value i is offset from j by a
// 64-bit constant C, so i ranges in i:C+[0,Z_2).
// The union of all the C+[0,Z_2) ranges from the sub-loops must be identical to the whole range [A,B]. Assuming S>0,
// the first C must be A itself, and the next C value is the previous C+Z_2. In each sub-loop, j counts up from zero
// and exits just before i=C+Z_2.
// (N.B. If S<0 the formulas are different, because all the loops count downward.)
// Returning to range checks, we see that each i*K+L <u R expands to (C+j)*K+L <u R, or j*K+Q <u R, where Q=(C*K+L).
// (Recall that K and L and R are loop-invariant scale, offset and range values for a particular R.C.) This is still a
// 64-bit comparison, so the range check elimination logic will not apply to it. (The R.C.E. transforms operate only on
// 32-bit indexes and comparisons, because they use 64-bit temporary values to avoid overflow; see
// PhaseIdealLoop::add_constraint.)
// We must transform this comparison so that it gets the same answer, but by means of a 32-bit R.C. (using j not i) of
// the form j*K+L_2 <u32 R_2. Note that L_2 and R_2 must be loop-invariant, but only with respect to the sub-loop. Thus, the
// problem reduces to computing values for L_2 and R_2 (for each R.C. in the loop) in the loop header for the sub-loop.
// Then the standard R.C.E. transforms can take those as inputs and further compute the necessary minimum and maximum
// values for the 32-bit counter j within which the range checks can be eliminated.
// So, given j*K+Q <u R, we need to find some j*K+L_2 <u32 R_2, where L_2 and R_2 fit in 32 bits, and the 32-bit operations do
// not overflow. We also need to cover the cases where i*K+L (= j*K+Q) overflows to a 64-bit negative, since that is
// allowed as an input to the R.C., as long as the R.C. as a whole fails.
// If 32-bit multiplication j*K might overflow, we adjust the sub-loop limit Z_2 closer to zero to reduce j's range.
// For each R.C. j*K+Q <u32 R, the range of mathematical values of j*K+Q in the sub-loop is [Q_min, Q_max), where
// Q_min=Q and Q_max=Z_2*K+Q. Making the upper limit Q_max be exclusive helps it integrate correctly with the strict
// comparisons against R and R_2. Sometimes a very high R will be replaced by an R_2 derived from the more moderate
// Q_max, and replacing one exclusive limit by another exclusive limit avoids off-by-one complexities.
// N.B. If (S*K)<0 then the formulas for Q_min and Q_max may differ; the values may need to be swapped and adjusted to
// the correct type of bound (inclusive or exclusive).
// Case A: Some Negatives (but no overflow).
// Number line:
// |s64_min . . . 0 . . . s64_max|
// | . Q_min..Q_max . 0 . . . . | s64 negative
// | . . . Q_min..0..Q_max . . . | small mixed
//
// if Q_min <s64 0, then use this test:
// j*K + s32_trunc(Q_min) <u32 clamp(R, 0, Q_max)
// If the 32-bit truncation loses information, no harm is done, since certainly the clamp also returns R_2=zero.
// Case B: No Negatives.
// Number line:
// |s64_min . . . 0 . . . s64_max|
// | . . . . 0 Q_min..Q_max . . | small positive
// | . . . . 0 . Q_min..Q_max . | s64 positive
//
// if both Q_min, Q_max >=s64 0, then use this test:
// j*K + 0 <u32 clamp(R, Q_min, Q_max) - Q_min
// or equivalently:
// j*K + 0 <u32 clamp(R - Q_min, 0, Q_max - Q_min)
// Case C: Overflow in the 64-bit domain
// Number line:
// |..Q_max-2^64 . . 0 . . . Q_min..| s64 overflow
//
// if Q_min >=s64 0 but Q_max <s64 0, then use this test:
// j*K + 0 <u32 clamp(R, Q_min, R) - Q_min
// or equivalently:
// j*K + 0 <u32 clamp(R - Q_min, 0, R - Q_min)
// or also equivalently:
// j*K + 0 <u32 max(0, R - Q_min)
//
// Here the clamp function is a simple 64-bit min/max:
// clamp(X, L, H) := max(L, min(X, H))
// When degenerately L > H, it returns L not H.
//
// Tests above can be merged into a single one:
// L_clamp = Q_min < 0 ? 0 : Q_min
// H_clamp = Q_max < Q_min ? R : Q_max
// j*K + Q_min - L_clamp <u32 clamp(R, L_clamp, H_clamp) - L_clamp
// or equivalently:
// j*K + Q_min - L_clamp <u32 clamp(R - L_clamp, 0, H_clamp - L_clamp)
//
// Readers may find the equivalent forms easier to reason about, but the forms given first generate better code.
void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List &range_checks, Node* outer_phi,
Node* inner_iters_actual_int, Node* inner_phi,
Node* iv_add, LoopNode* inner_head) {
Node* long_zero = _igvn.longcon(0);
set_ctrl(long_zero, C->root());
for (uint i = 0; i < range_checks.size(); i++) {
ProjNode* proj = range_checks.at(i)->as_Proj();
ProjNode* unc_proj = proj->other_if_proj();
RangeCheckNode* rc = proj->in(0)->as_RangeCheck();
jlong scale = 0;
Node* offset = NULL;
Node* rc_bol = rc->in(1);
Node* rc_cmp = rc_bol->in(1);
if (rc_cmp->Opcode() == Op_CmpU) {
// could be shared and have already been taken care of
continue;
}
bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, &scale, &offset, T_LONG);
assert(ok, "inconsistent: was tested before");
Node* range = rc_cmp->in(2);
Node* c = rc->in(0);
Node* entry_control = inner_head->in(LoopNode::EntryControl);
Node* R = range;
Node* K = _igvn.longcon(scale);
set_ctrl(K, this->C->root());
Node* L = offset;
Node* C = outer_phi;
Node* Z_2 = new ConvI2LNode(inner_iters_actual_int, TypeLong::LONG);
register_new_node(Z_2, entry_control);
// Start with 64-bit values:
// i*K + L <u64 R
// (C+j)*K + L <u64 R
// j*K + L_2 <u64 R where L_2 = C*K+L
Node* L_2 = new MulLNode(C, K);
register_new_node(L_2, entry_control);
L_2 = new AddLNode(L_2, L);
register_new_node(L_2, entry_control);
// Compute endpoints of the range of values j*K.
// Q_min = (j=0)*K + L_2; Q_max = (j=Z_2)*K + L_2
Node* Q_min = L_2;
Node* Q_max = new MulLNode(Z_2, K);
register_new_node(Q_max, entry_control);
Q_max = new AddLNode(Q_max, L_2);
register_new_node(Q_max, entry_control);
// L_clamp = Q_min < 0 ? 0 : Q_min
Node* Q_min_cmp = new CmpLNode(Q_min, long_zero);
register_new_node(Q_min_cmp, entry_control);
Node* Q_min_bool = new BoolNode(Q_min_cmp, BoolTest::lt);
register_new_node(Q_min_bool, entry_control);
Node* L_clamp = new CMoveLNode(Q_min_bool, Q_min, long_zero, TypeLong::LONG);
register_new_node(L_clamp, entry_control);
// H_clamp = Q_max < Q_min ? R : Q_max
Node* Q_max_cmp = new CmpLNode(Q_max, Q_min);
register_new_node(Q_max_cmp, entry_control);
Node* Q_max_bool = new BoolNode(Q_max_cmp, BoolTest::lt);
register_new_node(Q_max_bool, entry_control);
Node* H_clamp = new CMoveLNode(Q_max_bool, Q_max, R, TypeLong::LONG);
register_new_node(H_clamp, entry_control);
// R_2 = clamp(R, L_clamp, H_clamp) - L_clamp
// that is: R_2 = clamp(R, L_clamp, H_clamp) if Q_min < 0
// or: R_2 = clamp(R, L_clamp, H_clamp) - Q_min if Q_min > 0
Node* R_2 = clamp(R, L_clamp, H_clamp);
R_2 = new SubLNode(R_2, L_clamp);
register_new_node(R_2, entry_control);
R_2 = new ConvL2INode(R_2, TypeInt::POS);
register_new_node(R_2, entry_control);
// Q = Q_min - L_clamp
// that is: Q = Q_min - 0 if Q_min < 0
// or: Q = Q_min - Q_min = 0 if Q_min > 0
Node* Q = new SubLNode(Q_min, L_clamp);
register_new_node(Q, entry_control);
Q = new ConvL2INode(Q, TypeInt::INT);
register_new_node(Q, entry_control);
// Transform the range check
K = _igvn.intcon(checked_cast<int>(scale));
set_ctrl(K, this->C->root());
Node* scaled_iv = new MulINode(inner_phi, K);
register_new_node(scaled_iv, c);
Node* scaled_iv_plus_offset = scaled_iv_plus_offset = new AddINode(scaled_iv, Q);
register_new_node(scaled_iv_plus_offset, c);
Node* new_rc_cmp = new CmpUNode(scaled_iv_plus_offset, R_2);
register_new_node(new_rc_cmp, c);
_igvn.replace_input_of(rc_bol, 1, new_rc_cmp);
}
}
Node* PhaseIdealLoop::clamp(Node* R, Node* L, Node* H) {
Node* min = MaxNode::signed_min(R, H, TypeLong::LONG, _igvn);
set_subtree_ctrl(min, true);
Node* max = MaxNode::signed_max(L, min, TypeLong::LONG, _igvn);
set_subtree_ctrl(max, true);
return max;
}
LoopNode* PhaseIdealLoop::create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head,
LongCountedLoopEndNode* exit_test) {
LoopNode* new_inner_head = new LoopNode(head->in(1), head->in(2));
@ -1417,10 +1656,10 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
Node* bol;
if (stride_con > 0) {
cmp_limit = CmpNode::make(limit, _igvn.integercon(max_jint - stride_m, iv_bt), iv_bt);
cmp_limit = CmpNode::make(limit, _igvn.integercon(max_signed_integer(iv_bt) - stride_m, iv_bt), iv_bt);
bol = new BoolNode(cmp_limit, BoolTest::le);
} else {
cmp_limit = CmpNode::make(limit, _igvn.integercon(min_jint - stride_m, iv_bt), iv_bt);
cmp_limit = CmpNode::make(limit, _igvn.integercon(min_signed_integer(iv_bt) - stride_m, iv_bt), iv_bt);
bol = new BoolNode(cmp_limit, BoolTest::ge);
}
@ -1873,7 +2112,7 @@ const Type* LoopLimitNode::Value(PhaseGVN* phase) const {
int stride_con = stride_t->is_int()->get_con();
if (stride_con == 1)
return NULL; // Identity
return bottom_type(); // Identity
if (init_t->is_int()->is_con() && limit_t->is_int()->is_con()) {
// Use jlongs to avoid integer overflow.
@ -3955,24 +4194,26 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
// Reassociate invariants and prep for split_thru_phi
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
IdealLoopTree* lpt = iter.current();
bool is_counted = lpt->is_counted();
if (!is_counted || !lpt->is_innermost()) continue;
if (!lpt->is_loop()) {
continue;
}
Node* head = lpt->_head;
if (!head->is_BaseCountedLoop() || !lpt->is_innermost()) continue;
// check for vectorized loops, any reassociation of invariants was already done
if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) {
continue;
} else {
AutoNodeBudget node_budget(this);
lpt->reassociate_invariants(this);
if (head->is_CountedLoop()) {
if (head->as_CountedLoop()->is_unroll_only()) {
continue;
} else {
AutoNodeBudget node_budget(this);
lpt->reassociate_invariants(this);
}
}
// Because RCE opportunities can be masked by split_thru_phi,
// look for RCE candidates and inhibit split_thru_phi
// on just their loop-phi's for this pass of loop opts
if (SplitIfBlocks && do_split_ifs) {
AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
if (lpt->policy_range_check(this)) {
lpt->_rce_candidate = 1; // = true
}
if (SplitIfBlocks && do_split_ifs && lpt->policy_range_check(this, true)) {
lpt->_rce_candidate = 1; // = true
}
}
}

View File

@ -734,10 +734,12 @@ public:
// Return TRUE or FALSE if the loop should be range-check-eliminated.
// Gather a list of IF tests that are dominated by iteration splitting;
// also gather the end of the first split and the start of the 2nd split.
bool policy_range_check( PhaseIdealLoop *phase ) const;
bool policy_range_check(PhaseIdealLoop* phase, bool provisional) const;
// Return TRUE if "iff" is a range check.
bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar DEBUG_ONLY(COMMA ProjNode *predicate_proj)) const;
bool is_range_check_if(IfNode* iff, PhaseIdealLoop* phase, BasicType bt, Node* iv, Node*& range, Node*& offset,
jlong& scale) const;
// Estimate the number of nodes required when cloning a loop (body).
uint est_loop_clone_sz(uint factor) const;
@ -1168,7 +1170,7 @@ public:
bool is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_bt);
void long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head);
Node* long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head);
bool transform_long_counted_loop(IdealLoopTree* loop, Node_List &old_new);
#ifdef ASSERT
bool convert_to_long_loop(Node* cmp, Node* phi, IdealLoopTree* loop);
@ -1270,10 +1272,21 @@ public:
void mark_reductions( IdealLoopTree *loop );
// Return true if exp is a constant times an induction var
bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
bool is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt);
// Return true if exp is a scaled induction var plus (or minus) constant
bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0);
bool is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, int depth = 0);
bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset) {
jlong long_scale;
if (is_scaled_iv_plus_offset(exp, iv, &long_scale, p_offset, T_INT)) {
int int_scale = checked_cast<int>(long_scale);
if (p_scale != NULL) {
*p_scale = int_scale;
}
return true;
}
return false;
}
// Enum to determine the action to be performed in create_new_if_for_predicate() when processing phis of UCT regions.
enum class UnswitchingAction {
@ -1623,6 +1636,14 @@ public:
LoopNode* create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head, LongCountedLoopEndNode* exit_test);
int extract_long_range_checks(const IdealLoopTree* loop, jlong stride_con, int iters_limit, PhiNode* phi,
Node_List &range_checks);
void transform_long_range_checks(int stride_con, const Node_List &range_checks, Node* outer_phi,
Node* inner_iters_actual_int, Node* inner_phi,
Node* iv_add, LoopNode* inner_head);
Node* get_late_ctrl_with_anti_dep(LoadNode* n, Node* early, Node* LCA);
bool ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl);
@ -1633,6 +1654,8 @@ public:
void try_sink_out_of_loop(Node* n);
Node* clamp(Node* R, Node* L, Node* H);
bool safe_for_if_replacement(const Node* dom) const;
};

View File

@ -1074,7 +1074,7 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
// If the loop is a candidate for range check elimination,
// delay splitting through it's phi until a later loop optimization
if (n_blk->is_CountedLoop()) {
if (n_blk->is_BaseCountedLoop()) {
IdealLoopTree *lp = get_loop(n_blk);
if (lp && lp->_rce_candidate) {
return n;

View File

@ -41,7 +41,7 @@ class PhaseTransform;
class MulNode : public Node {
virtual uint hash() const;
public:
MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {
MulNode(Node *in1, Node *in2): Node(NULL,in1,in2) {
init_class_id(Class_Mul);
}
@ -75,6 +75,11 @@ public:
// Supplied function to return the multiplicative opcode
virtual int mul_opcode() const = 0;
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return false;
}
// Supplied function to return the additive opcode
virtual int max_opcode() const = 0;
@ -98,6 +103,10 @@ public:
int min_opcode() const { return Op_MinI; }
const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return bt == T_INT;
}
};
//------------------------------MulLNode---------------------------------------
@ -116,6 +125,10 @@ public:
int min_opcode() const { return Op_MinL; }
const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return bt == T_LONG;
}
};
@ -216,30 +229,49 @@ public:
virtual uint ideal_reg() const { return Op_RegL; }
};
class LShiftNode : public Node {
public:
LShiftNode(Node *in1, Node *in2) : Node(NULL,in1,in2) {
init_class_id(Class_LShift);
}
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return false;
}
};
//------------------------------LShiftINode------------------------------------
// Logical shift left
class LShiftINode : public Node {
class LShiftINode : public LShiftNode {
public:
LShiftINode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
LShiftINode(Node *in1, Node *in2) : LShiftNode(in1,in2) {}
virtual int Opcode() const;
virtual Node* Identity(PhaseGVN* phase);
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type* Value(PhaseGVN* phase) const;
const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return bt == T_INT;
}
};
//------------------------------LShiftLNode------------------------------------
// Logical shift left
class LShiftLNode : public Node {
class LShiftLNode : public LShiftNode {
public:
LShiftLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
LShiftLNode(Node *in1, Node *in2) : LShiftNode(in1,in2) {}
virtual int Opcode() const;
virtual Node* Identity(PhaseGVN* phase);
virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
virtual const Type* Value(PhaseGVN* phase) const;
const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return bt == T_LONG;
}
};

View File

@ -98,6 +98,7 @@ class LockNode;
class LongCountedLoopNode;
class LongCountedLoopEndNode;
class LoopNode;
class LShiftNode;
class MachBranchNode;
class MachCallDynamicJavaNode;
class MachCallJavaNode;
@ -754,6 +755,7 @@ public:
DEFINE_CLASS_ID(Halt, Node, 15)
DEFINE_CLASS_ID(Opaque1, Node, 16)
DEFINE_CLASS_ID(Move, Node, 17)
DEFINE_CLASS_ID(LShift, Node, 18)
_max_classes = ClassMask_Move
};
@ -883,6 +885,7 @@ public:
DEFINE_CLASS_QUERY(LoadStoreConditional)
DEFINE_CLASS_QUERY(Lock)
DEFINE_CLASS_QUERY(Loop)
DEFINE_CLASS_QUERY(LShift)
DEFINE_CLASS_QUERY(Mach)
DEFINE_CLASS_QUERY(MachBranch)
DEFINE_CLASS_QUERY(MachCall)

View File

@ -792,9 +792,7 @@ ConLNode* PhaseTransform::longcon(jlong l) {
}
ConNode* PhaseTransform::integercon(jlong l, BasicType bt) {
if (bt == T_INT) {
jint int_con = (jint)l;
assert(((long)int_con) == l, "not an int");
return intcon(int_con);
return intcon(checked_cast<jint>(l));
}
assert(bt == T_LONG, "not an integer");
return longcon(l);

View File

@ -62,6 +62,10 @@ public:
virtual const Type *add_id() const = 0;
static SubNode* make(Node* in1, Node* in2, BasicType bt);
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return false;
}
};
@ -77,6 +81,10 @@ public:
const Type *add_id() const { return TypeInt::ZERO; }
const Type *bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return bt == T_INT;
}
};
//------------------------------SubLNode---------------------------------------
@ -90,6 +98,10 @@ public:
const Type *add_id() const { return TypeLong::ZERO; }
const Type *bottom_type() const { return TypeLong::LONG; }
virtual uint ideal_reg() const { return Op_RegL; }
virtual bool operates_on(BasicType bt, bool signed_int) const {
assert(bt == T_INT || bt == T_LONG, "unsupported");
return bt == T_LONG;
}
};
// NOTE: SubFPNode should be taken away and replaced by add and negate

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2021, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.irTests;
import compiler.lib.ir_framework.*;
import java.util.Objects;
/*
* @test
* @bug 8259609
* @summary C2: optimize long range checks in long counted loops
* @library /test/lib /
* @run driver compiler.c2.irTests.TestLongRangeChecks
*/
public class TestLongRangeChecks {
public static void main(String[] args) {
TestFramework.run();
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(failOn = { IRNode.COUNTEDLOOP})
public static void testStridePosScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
final long stride = 1;
// Loop is first transformed into a loop nest, long range
// check into an int range check, the range check is hoisted
// and the inner counted loop becomes empty so is optimized
// out.
for (long i = start; i < stop; i += stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStridePosScalePos")
private void testStridePosScalePos_runner() {
testStridePosScalePos(0, 100, 100, 0);
}
}

View File

@ -0,0 +1,304 @@
/*
* Copyright (c) 2021, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8259609
* @summary C2: optimize long range checks in long counted loops
* @requires vm.compiler2.enabled
* @requires vm.compMode != "Xcomp"
* @library /test/lib /
* @modules java.base/jdk.internal.util
* @build sun.hotspot.WhiteBox
* @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
*
* @run main/othervm -ea -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-BackgroundCompilation TestLongRangeCheck
*
*/
import jdk.internal.util.Preconditions;
import sun.hotspot.WhiteBox;
import java.lang.reflect.Method;
import compiler.whitebox.CompilerWhiteBoxTest;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.file.Paths;
import java.lang.reflect.InvocationTargetException;
public class TestLongRangeCheck {
private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
private static void assertIsCompiled(Method m) {
if (!WHITE_BOX.isMethodCompiled(m) || WHITE_BOX.getMethodCompilationLevel(m) != CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION) {
throw new RuntimeException("should still be compiled");
}
}
private static void assertIsNotCompiled(Method m) {
if (WHITE_BOX.isMethodCompiled(m) && WHITE_BOX.getMethodCompilationLevel(m) == CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION) {
throw new RuntimeException("should have been deoptimized");
}
}
private static void compile(Method m) {
WHITE_BOX.enqueueMethodForCompilation(m, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
assertIsCompiled(m);
}
public static ClassLoader newClassLoader() {
try {
return new URLClassLoader(new URL[] {
Paths.get(System.getProperty("test.classes",".")).toUri().toURL(),
}, null);
} catch (MalformedURLException e){
throw new RuntimeException("Unexpected URL conversion failure", e);
}
}
private static void test(String method, long start, long stop, long length, long offset) throws Exception {
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class);
m.invoke(null, start, stop, length, offset); // run once so all classes are loaded
compile(m);
m.invoke(null, start, stop, length, offset);
assertIsCompiled(m);
try {
m.invoke(null, start-1, stop, length, offset);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class);
m.invoke(null, start, stop, length, offset); // run once so all classes are loaded
compile(m);
assertIsCompiled(m);
m.invoke(null, start, stop, length, offset);
assertIsCompiled(m);
try {
m.invoke(null, stop, stop + 100, length, offset);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class);
m.invoke(null, start, stop, length, offset); // run once so all classes are loaded
compile(m);
m.invoke(null, start, stop, length, offset);
assertIsCompiled(m);
try {
m.invoke(null, start, stop+1, length, offset);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
}
public static void main(String[] args) throws Exception {
test("testStridePosScalePos", 0, 100, 100, 0);
test("testStrideNegScaleNeg", 0, 100, 100, 100);
test("testStrideNegScalePos", 0, 100, 100, 0);
test("testStridePosScaleNeg", 0, 100, 100, 99);
test("testStridePosScalePosNotOne", 0, 100, 1090, 0);
test("testStrideNegScaleNegNotOne", 0, 100, 1090, 1100);
test("testStrideNegScalePosNotOne", 0, 100, 1090, 0);
test("testStridePosScaleNegNotOne", 0, 100, 1090, 1089);
long v = ((long)Integer.MAX_VALUE / 10000) * 250000;
test("testStridePosNotOneScalePos", -v, v, v * 2, v);
test("testStrideNegNotOneScaleNeg", -v, v, v * 2, v);
test("testStrideNegNotOneScalePos", -v, v, v * 2, v);
test("testStridePosNotOneScaleNeg", -v, v, v * 2, v-1);
// offset causes overflow
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePos", long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0);
compile(m);
m.invoke(null, 0, 100, 100, 0);
assertIsCompiled(m);
try {
m.invoke(null, 0, 100, 100, Long.MAX_VALUE - 50);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
}
// no spurious deopt if the range check doesn't fail because not executed
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0, 0, 100);
compile(m);
m.invoke(null, 0, 100, 100, -50, 50, 100);
assertIsCompiled(m);
}
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0, 0, 100);
compile(m);
m.invoke(null, 0, 100, Long.MAX_VALUE, Long.MAX_VALUE - 50, 0, 50);
assertIsCompiled(m);
}
}
public static void testStridePosScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
final long stride = 1;
for (long i = start; i < stop; i += stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStrideNegScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
final long stride = 1;
for (long i = stop; i > start; i -= stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStrideNegScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
final long stride = 1;
for (long i = stop-1; i >= start; i -= stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStridePosScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
final long stride = 1;
for (long i = start; i < stop; i += stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStridePosScalePosNotOne(long start, long stop, long length, long offset) {
final long scale = 11;
final long stride = 1;
for (long i = start; i < stop; i += stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStrideNegScaleNegNotOne(long start, long stop, long length, long offset) {
final long scale = -11;
final long stride = 1;
for (long i = stop; i > start; i -= stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStrideNegScalePosNotOne(long start, long stop, long length, long offset) {
final long scale = 11;
final long stride = 1;
for (long i = stop-1; i >= start; i -= stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStridePosScaleNegNotOne(long start, long stop, long length, long offset) {
final long scale = -11;
final long stride = 1;
for (long i = start; i < stop; i += stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStridePosNotOneScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
final long stride = Integer.MAX_VALUE / 10000;
for (long i = start; i < stop; i += stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStrideNegNotOneScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
final long stride = Integer.MAX_VALUE / 10000;
for (long i = stop; i > start; i -= stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStrideNegNotOneScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
final long stride = Integer.MAX_VALUE / 10000;
for (long i = stop-1; i >= start; i -= stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStridePosNotOneScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
final long stride = Integer.MAX_VALUE / 10000;
for (long i = start; i < stop; i += stride) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
public static void testStridePosScalePosConditional(long start, long stop, long length, long offset, long start2, long stop2) {
Preconditions.checkIndex(0, length, null);
final long scale = 1;
final long stride = 1;
for (long i = start; i < stop; i += stride) {
if (i >= start2 && i < stop2) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
}
}

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2021, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8259609
* @summary range checks with min int scale value
*
* @run main/othervm -XX:-BackgroundCompilation TestRCMinInt
*
*/
import java.util.Objects;
public class TestRCMinInt {
public static void main(String[] args) {
for (int i = 0; i < 20_000; i++) {
test1(0, 10, 10);
test2(0, 10, 10);
}
}
private static float test1(int start, int stop, int offset) {
float v = 1;
for (int i = start; i < stop; i+=2) {
final int index = offset + Integer.MIN_VALUE * i;
Objects.checkIndex(index, 100);
}
return v;
}
private static float test2(int start, int stop, int offset) {
float v = 1;
for (int i = start; i < stop; i+=2) {
final int index = offset - Integer.MIN_VALUE * i;
Objects.checkIndex(index, 100);
}
return v;
}
}