8278296: Generalize long range check transformation

Reviewed-by: jrose, thartmann
This commit is contained in:
Roland Westrelin 2022-03-07 16:26:19 +00:00
parent f0995abe62
commit ef266d77b6
7 changed files with 661 additions and 251 deletions

View File

@ -759,7 +759,7 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, BasicT
}
scale = 0;
offset = NULL;
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset, bt)) {
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, bt, &scale, &offset)) {
return false;
}
return true;

View File

@ -1121,18 +1121,18 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional,
continue; // not RC
}
Node *cmp = bol->in(1);
Node *rc_exp = cmp->in(1);
Node *limit = cmp->in(2);
if (provisional) {
// Try to pattern match with either cmp inputs, do not check
// whether one of the inputs is loop independent as it may not
// have had a chance to be hoisted yet.
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), trip_counter, NULL, NULL, bt) &&
!phase->is_scaled_iv_plus_offset(cmp->in(2), trip_counter, NULL, NULL, bt)) {
if (!phase->is_scaled_iv_plus_offset(cmp->in(1), trip_counter, bt, NULL, NULL) &&
!phase->is_scaled_iv_plus_offset(cmp->in(2), trip_counter, bt, NULL, NULL)) {
continue;
}
} else {
Node *rc_exp = cmp->in(1);
Node *limit = cmp->in(2);
Node *limit_c = phase->get_ctrl(limit);
if (limit_c == phase->C->top()) {
return false; // Found dead test on live IF? No RCE!
@ -1147,7 +1147,7 @@ bool IdealLoopTree::policy_range_check(PhaseIdealLoop* phase, bool provisional,
}
}
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL, bt)) {
if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, bt, NULL, NULL)) {
continue;
}
}
@ -2522,59 +2522,202 @@ void PhaseIdealLoop::add_constraint(jlong stride_con, jlong scale_con, Node* off
}
}
//----------------------------------is_iv------------------------------------
// Return true if exp is the value (of type bt) of the given induction var.
// This grammar of cases is recognized, where X is I|L according to bt:
// VIV[iv] = iv | (CastXX VIV[iv]) | (ConvI2X VIV[iv])
bool PhaseIdealLoop::is_iv(Node* exp, Node* iv, BasicType bt) {
if (exp == iv) {
exp = exp->uncast();
if (exp == iv && iv->bottom_type()->isa_integer(bt)) {
return true;
}
if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L && exp->in(1) == iv) {
if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L && exp->in(1)->uncast() == iv) {
return true;
}
return false;
}
//------------------------------is_scaled_iv---------------------------------
// Return true if exp is a constant times an induction var
bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted) {
exp = exp->uncast();
assert(bt == T_INT || bt == T_LONG, "unexpected int type");
if (is_iv(exp, iv, bt)) {
// Return true if exp is a constant times the given induction var (of type bt).
// The multiplication is either done in full precision (exactly of type bt),
// or else bt is T_LONG but iv is scaled using 32-bit arithmetic followed by a ConvI2L.
// This grammar of cases is recognized, where X is I|L according to bt:
// SIV[iv] = VIV[iv] | (CastXX SIV[iv])
// | (MulX VIV[iv] ConX) | (MulX ConX VIV[iv])
// | (LShiftX VIV[iv] ConI)
// | (ConvI2L SIV[iv]) -- a "short-scale" can occur here; note recursion
// | (SubX 0 SIV[iv]) -- same as MulX(iv, -scale); note recursion
// VIV[iv] = [either iv or its value converted; see is_iv() above]
// On success, the constant scale value is stored back to *p_scale.
// The value (*p_short_scale) reports if such a ConvI2L conversion was present.
bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, BasicType bt, jlong* p_scale, bool* p_short_scale, int depth) {
BasicType exp_bt = bt;
exp = exp->uncast(); //strip casts
assert(exp_bt == T_INT || exp_bt == T_LONG, "unexpected int type");
if (is_iv(exp, iv, exp_bt)) {
if (p_scale != NULL) {
*p_scale = 1;
}
if (p_short_scale != NULL) {
*p_short_scale = false;
}
return true;
}
if (bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L) {
if (exp_bt == T_LONG && iv->bottom_type()->isa_int() && exp->Opcode() == Op_ConvI2L) {
exp = exp->in(1);
bt = T_INT;
if (converted != NULL) {
*converted = true;
}
exp_bt = T_INT;
}
int opc = exp->Opcode();
int which = 0; // this is which subexpression we find the iv in
// Can't use is_Mul() here as it's true for AndI and AndL
if (opc == Op_Mul(bt)) {
if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
if (opc == Op_Mul(exp_bt)) {
if ((is_iv(exp->in(which = 1), iv, exp_bt) && exp->in(2)->is_Con()) ||
(is_iv(exp->in(which = 2), iv, exp_bt) && exp->in(1)->is_Con())) {
Node* factor = exp->in(which == 1 ? 2 : 1); // the other argument
jlong scale = factor->find_integer_as_long(exp_bt, 0);
if (scale == 0) {
return false; // might be top
}
if (p_scale != NULL) {
*p_scale = exp->in(2)->get_integer_as_long(bt);
*p_scale = scale;
}
if (p_short_scale != NULL) {
// (ConvI2L (MulI iv K)) can be 64-bit linear if iv is kept small enough...
*p_short_scale = (exp_bt != bt && scale != 1);
}
return true;
}
if (is_iv(exp->in(2)->uncast(), iv, bt) && exp->in(1)->is_Con()) {
} else if (opc == Op_LShift(exp_bt)) {
if (is_iv(exp->in(1), iv, exp_bt) && exp->in(2)->is_Con()) {
jint shift_amount = exp->in(2)->find_int_con(min_jint);
if (shift_amount == min_jint) {
return false; // might be top
}
jlong scale;
if (exp_bt == T_INT) {
scale = java_shift_left((jint)1, (juint)shift_amount);
} else if (exp_bt == T_LONG) {
scale = java_shift_left((jlong)1, (julong)shift_amount);
}
if (p_scale != NULL) {
*p_scale = exp->in(1)->get_integer_as_long(bt);
*p_scale = scale;
}
if (p_short_scale != NULL) {
// (ConvI2L (MulI iv K)) can be 64-bit linear if iv is kept small enough...
*p_short_scale = (exp_bt != bt && scale != 1);
}
return true;
}
} else if (opc == Op_LShift(bt)) {
if (is_iv(exp->in(1)->uncast(), iv, bt) && exp->in(2)->is_Con()) {
} else if (opc == Op_Sub(exp_bt) &&
exp->in(1)->find_integer_as_long(exp_bt, -1) == 0) {
jlong scale = 0;
if (depth == 0 && is_scaled_iv(exp->in(2), iv, exp_bt, &scale, p_short_scale, depth + 1)) {
// SubX(0, iv*K) => iv*(-K)
if (scale == min_signed_integer(exp_bt)) {
// This should work even if -K overflows, but let's not.
return false;
}
scale = java_multiply(scale, (jlong)-1);
if (p_scale != NULL) {
jint shift_amount = exp->in(2)->get_int();
if (bt == T_INT) {
*p_scale = java_shift_left((jint)1, (juint)shift_amount);
} else if (bt == T_LONG) {
*p_scale = java_shift_left((jlong)1, (julong)shift_amount);
*p_scale = scale;
}
if (p_short_scale != NULL) {
// (ConvI2L (MulI iv K)) can be 64-bit linear if iv is kept small enough...
*p_short_scale = *p_short_scale || (exp_bt != bt && scale != 1);
}
return true;
}
}
// We could also recognize (iv*K1)*K2, even with overflow, but let's not.
return false;
}
//-------------------------is_scaled_iv_plus_offset--------------------------
// Return true if exp is a simple linear transform of the given induction var.
// The scale must be constant and the addition tree (if any) must be simple.
// This grammar of cases is recognized, where X is I|L according to bt:
//
// OIV[iv] = SIV[iv] | (CastXX OIV[iv])
// | (AddX SIV[iv] E) | (AddX E SIV[iv])
// | (SubX SIV[iv] E) | (SubX E SIV[iv])
// SSIV[iv] = (ConvI2X SIV[iv]) -- a "short scale" might occur here
// SIV[iv] = [a possibly scaled value of iv; see is_scaled_iv() above]
//
// On success, the constant scale value is stored back to *p_scale unless null.
// Likewise, the addend (perhaps a synthetic AddX node) is stored to *p_offset.
// Also, (*p_short_scale) reports if a ConvI2L conversion was seen after a MulI,
// meaning bt is T_LONG but iv was scaled using 32-bit arithmetic.
// To avoid looping, the match is depth-limited, and so may fail to match the grammar to complex expressions.
bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, BasicType bt, jlong* p_scale, Node** p_offset, bool* p_short_scale, int depth) {
assert(bt == T_INT || bt == T_LONG, "unexpected int type");
jlong scale = 0; // to catch result from is_scaled_iv()
BasicType exp_bt = bt;
exp = exp->uncast();
if (is_scaled_iv(exp, iv, exp_bt, &scale, p_short_scale)) {
if (p_scale != NULL) {
*p_scale = scale;
}
if (p_offset != NULL) {
Node *zero = _igvn.zerocon(bt);
set_ctrl(zero, C->root());
*p_offset = zero;
}
return true;
}
if (exp_bt != bt) {
// We would now be matching inputs like (ConvI2L exp:(AddI (MulI iv S) E)).
// It's hard to make 32-bit arithmetic linear if it overflows. Although we do
// cope with overflowing multiplication by S, it would be even more work to
// handle overflowing addition of E. So we bail out here on ConvI2L input.
return false;
}
int opc = exp->Opcode();
int which = 0; // this is which subexpression we find the iv in
Node* offset = NULL;
if (opc == Op_Add(exp_bt)) {
// Check for a scaled IV in (AddX (MulX iv S) E) or (AddX E (MulX iv S)).
if (is_scaled_iv(exp->in(which = 1), iv, bt, &scale, p_short_scale) ||
is_scaled_iv(exp->in(which = 2), iv, bt, &scale, p_short_scale)) {
offset = exp->in(which == 1 ? 2 : 1); // the other argument
if (p_scale != NULL) {
*p_scale = scale;
}
if (p_offset != NULL) {
*p_offset = offset;
}
return true;
}
// Check for more addends, like (AddX (AddX (MulX iv S) E1) E2), etc.
if (is_scaled_iv_plus_extra_offset(exp->in(1), exp->in(2), iv, bt, p_scale, p_offset, p_short_scale, depth) ||
is_scaled_iv_plus_extra_offset(exp->in(2), exp->in(1), iv, bt, p_scale, p_offset, p_short_scale, depth)) {
return true;
}
} else if (opc == Op_Sub(exp_bt)) {
if (is_scaled_iv(exp->in(which = 1), iv, bt, &scale, p_short_scale) ||
is_scaled_iv(exp->in(which = 2), iv, bt, &scale, p_short_scale)) {
// Match (SubX SIV[iv] E) as if (AddX SIV[iv] (SubX 0 E)), and
// match (SubX E SIV[iv]) as if (AddX E (SubX 0 SIV[iv])).
offset = exp->in(which == 1 ? 2 : 1); // the other argument
if (which == 2) {
// We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
if (scale == min_signed_integer(bt)) {
return false; // cannot negate the scale of the iv
}
scale = java_multiply(scale, (jlong)-1);
}
if (p_scale != NULL) {
*p_scale = scale;
}
if (p_offset != NULL) {
if (which == 1) { // must negate the extracted offset
Node *zero = _igvn.integercon(0, exp_bt);
set_ctrl(zero, C->root());
Node *ctrl_off = get_ctrl(offset);
offset = SubNode::make(zero, offset, exp_bt);
register_new_node(offset, ctrl_off);
}
*p_offset = offset;
}
return true;
}
@ -2582,71 +2725,30 @@ bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType
return false;
}
//-----------------------------is_scaled_iv_plus_offset------------------------------
// Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted, int depth) {
assert(bt == T_INT || bt == T_LONG, "unexpected int type");
if (is_scaled_iv(exp, iv, p_scale, bt, converted)) {
// Helper for is_scaled_iv_plus_offset(), not called separately.
// The caller encountered (AddX exp1 offset3) or (AddX offset3 exp1).
// Here, exp1 is inspected to see if it is a simple linear transform of iv.
// If so, the offset3 is combined with any other offset2 from inside exp1.
bool PhaseIdealLoop::is_scaled_iv_plus_extra_offset(Node* exp1, Node* offset3, Node* iv,
BasicType bt,
jlong* p_scale, Node** p_offset,
bool* p_short_scale, int depth) {
// By the time we reach here, it is unlikely that exp1 is a simple iv*K.
// If is a linear iv transform, it is probably an add or subtract.
// Let's collect the internal offset2 from it.
Node* offset2 = NULL;
if (offset3->is_Con() &&
depth < 2 &&
is_scaled_iv_plus_offset(exp1, iv, bt, p_scale,
&offset2, p_short_scale, depth+1)) {
if (p_offset != NULL) {
Node *zero = _igvn.integercon(0, bt);
set_ctrl(zero, C->root());
*p_offset = zero;
Node* ctrl_off2 = get_ctrl(offset2);
Node* offset = AddNode::make(offset2, offset3, bt);
register_new_node(offset, ctrl_off2);
*p_offset = offset;
}
return true;
}
exp = exp->uncast();
int opc = exp->Opcode();
if (opc == Op_Add(bt)) {
if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
if (p_offset != NULL) {
*p_offset = exp->in(2);
}
return true;
}
if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
if (p_offset != NULL) {
*p_offset = exp->in(1);
}
return true;
}
if (exp->in(2)->is_Con()) {
Node* offset2 = NULL;
if (depth < 2 &&
is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
p_offset != NULL ? &offset2 : NULL, bt, converted, depth+1)) {
if (p_offset != NULL) {
Node *ctrl_off2 = get_ctrl(offset2);
Node* offset = AddNode::make(offset2, exp->in(2), bt);
register_new_node(offset, ctrl_off2);
*p_offset = offset;
}
return true;
}
}
} else if (opc == Op_Sub(bt)) {
if (is_scaled_iv(exp->in(1), iv, p_scale, bt, converted)) {
if (p_offset != NULL) {
Node *zero = _igvn.integercon(0, bt);
set_ctrl(zero, C->root());
Node *ctrl_off = get_ctrl(exp->in(2));
Node* offset = SubNode::make(zero, exp->in(2), bt);
register_new_node(offset, ctrl_off);
*p_offset = offset;
}
return true;
}
if (is_scaled_iv(exp->in(2), iv, p_scale, bt, converted)) {
if (p_offset != NULL) {
// We can't handle a scale of min_jint (or min_jlong) here as -1 * min_jint = min_jint
if (*p_scale == min_signed_integer(bt)) {
return false;
}
*p_scale *= -1;
*p_offset = exp->in(1);
}
return true;
}
}
return false;
}

View File

@ -1122,9 +1122,6 @@ void PhaseIdealLoop::strip_mined_nest_back_to_counted_loop(IdealLoopTree* loop,
int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong stride_con, int iters_limit, PhiNode* phi,
Node_List& range_checks) {
if (stride_con < 0) { // only for stride_con > 0 && scale > 0 for now
return iters_limit;
}
const jlong min_iters = 2;
jlong reduced_iters_limit = iters_limit;
jlong original_iters_limit = iters_limit;
@ -1139,7 +1136,6 @@ int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong s
RangeCheckNode* rc = c->in(0)->as_RangeCheck();
if (loop->is_range_check_if(rc, this, T_LONG, phi, range, offset, scale) &&
loop->is_invariant(range) && loop->is_invariant(offset) &&
scale > 0 && // only for stride_con > 0 && scale > 0 for now
original_iters_limit / ABS(scale * stride_con) >= min_iters) {
reduced_iters_limit = MIN2(reduced_iters_limit, original_iters_limit/ABS(scale));
range_checks.push(c);
@ -1154,27 +1150,29 @@ int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong s
// One execution of the inner loop covers a sub-range of the entire iteration range of the loop: [A,Z), aka [A=init,
// Z=limit). If the loop has at least one trip (which is the case here), the iteration variable i always takes A as its
// first value, followed by A+S (S is the stride), next A+2S, etc. The limit is exclusive, so that the final value B of
// i is never Z. It will be B=Z-1 if S=1, or B=Z+1 if S=-1. If |S|>1 the formula for the last value requires a floor
// operation, specifically B=floor((Z-sgn(S)-A)/S)*S+A. Thus i ranges as i:[A,B] or i:[A,Z) or i:[A,Z-U) for some U<S.
// i is never Z. It will be B=Z-1 if S=1, or B=Z+1 if S=-1.
// N.B. We handle only the case of positive S currently, so comments about S<0 are not operative at present. Also,
// we only support positive index scale value (K > 0) to simplify the logic for clamping 32-bit bounds (L_2, R_2).
// For restrictions on S and K, see the guards in extract_long_range_checks.
// If |S|>1 the formula for the last value B would require a floor operation, specifically B=floor((Z-sgn(S)-A)/S)*S+A,
// which is B=Z-sgn(S)U for some U in [1,|S|]. So when S>0, i ranges as i:[A,Z) or i:[A,B=Z-U], or else (in reverse)
// as i:(Z,A] or i:[B=Z+U,A]. It will become important to reason about this inclusive range [A,B] or [B,A].
// Within the loop there may be many range checks. Each such range check (R.C.) is of the form 0 <= i*K+L < R, where K
// is a scale factor applied to the loop iteration variable i, and L is some offset; K, L, and R are loop-invariant.
// Because R is never negative, this check can always be simplified to an unsigned check i*K+L <u R.
// Because R is never negative (see below), this check can always be simplified to an unsigned check i*K+L <u R.
// When a long loop over a 64-bit variable i (outer_iv) is decomposed into a series of shorter sub-loops over a 32-bit
// variable j (inner_iv), j ranges over a shorter interval j:[0,Z_2), where the limit is chosen to prevent various cases
// of 32-bit overflow (including multiplications j*K below). In the sub-loop the logical value i is offset from j by a
// 64-bit constant C, so i ranges in i:C+[0,Z_2).
// variable j (inner_iv), j ranges over a shorter interval j:[0,B_2] or [0,Z_2) (assuming S > 0), where the limit is
// chosen to prevent various cases of 32-bit overflow (including multiplications j*K below). In the sub-loop the
// logical value i is offset from j by a 64-bit constant C, so i ranges in i:C+[0,Z_2).
// The union of all the C+[0,Z_2) ranges from the sub-loops must be identical to the whole range [A,B]. Assuming S>0,
// the first C must be A itself, and the next C value is the previous C+Z_2. In each sub-loop, j counts up from zero
// and exits just before i=C+Z_2.
// For S<0, j ranges (in reverse!) through j:[-|B_2|,0] or (-|Z_2|,0]. For either sign of S, we can say i=j+C and j
// ranges through 32-bit ranges [A_2,B_2] or [B_2,A_2] (A_2=0 of course).
// (N.B. If S<0 the formulas are different, because all the loops count downward.)
// The disjoint union of all the C+[A_2,B_2] ranges from the sub-loops must be identical to the whole range [A,B].
// Assuming S>0, the first C must be A itself, and the next C value is the previous C+B_2, plus S. If |S|=1, the next
// C value is also the previous C+Z_2. In each sub-loop, j counts from j=A_2=0 and i counts from C+0 and exits at
// j=B_2 (i=C+B_2), just before it gets to i=C+Z_2. Both i and j count up (from C and 0) if S>0; otherwise they count
// down (from C and 0 again).
// Returning to range checks, we see that each i*K+L <u R expands to (C+j)*K+L <u R, or j*K+Q <u R, where Q=(C*K+L).
// (Recall that K and L and R are loop-invariant scale, offset and range values for a particular R.C.) This is still a
@ -1194,65 +1192,104 @@ int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jlong s
// If 32-bit multiplication j*K might overflow, we adjust the sub-loop limit Z_2 closer to zero to reduce j's range.
// For each R.C. j*K+Q <u32 R, the range of mathematical values of j*K+Q in the sub-loop is [Q_min, Q_max), where
// Q_min=Q and Q_max=Z_2*K+Q. Making the upper limit Q_max be exclusive helps it integrate correctly with the strict
// comparisons against R and R_2. Sometimes a very high R will be replaced by an R_2 derived from the more moderate
// Q_max, and replacing one exclusive limit by another exclusive limit avoids off-by-one complexities.
// For each R.C. j*K+Q <u32 R, the range of mathematical values of j*K+Q in the sub-loop is [Q_min, Q_max], where
// Q_min=Q and Q_max=B_2*K+Q (if S>0 and K>0), Q_min=A_2*K+Q and Q_max=Q (if S<0 and K>0),
// Q_min=B_2*K+Q and Q_max=Q if (S>0 and K<0), Q_min=Q and Q_max=A_2*K+Q (if S<0 and K<0)
// N.B. If (S*K)<0 then the formulas for Q_min and Q_max may differ; the values may need to be swapped and adjusted to
// the correct type of bound (inclusive or exclusive).
// Note that the first R.C. value is always Q=(S*K>0 ? Q_min : Q_max). Also Q_{min,max} = Q + {min,max}(A_2*K,B_2*K).
// If S*K>0 then, as the loop iterations progress, each R.C. value i*K+L = j*K+Q goes up from Q=Q_min towards Q_max.
// If S*K<0 then j*K+Q starts at Q=Q_max and goes down towards Q_min.
// Case A: Some Negatives (but no overflow).
// Number line:
// |s64_min . . . 0 . . . s64_max|
// | . Q_min..Q_max . 0 . . . . | s64 negative
// | . . . . R=0 R< R< R< R< | (against R values)
// | . . . Q_min..0..Q_max . . . | small mixed
// | . . . . R R R< R< R< | (against R values)
//
// if Q_min <s64 0, then use this test:
// j*K + s32_trunc(Q_min) <u32 clamp(R, 0, Q_max)
// R values which are out of range (>Q_max+1) are reduced to max(0,Q_max+1). They are marked on the number line as R<.
//
// So, if Q_min <s64 0, then use this test:
// j*K + s32_trunc(Q_min) <u32 clamp(R, 0, Q_max+1) if S*K>0 (R.C.E. steps upward)
// j*K + s32_trunc(Q_max) <u32 clamp(R, 0, Q_max+1) if S*K<0 (R.C.E. steps downward)
// Both formulas reduce to adding j*K to the 32-bit truncated value of the first R.C. expression value, Q:
// j*K + s32_trunc(Q) <u32 clamp(R, 0, Q_max+1) for all S,K
// If the 32-bit truncation loses information, no harm is done, since certainly the clamp also returns R_2=zero.
// If the 32-bit truncation loses information, no harm is done, since certainly the clamp also will return R_2=zero.
// Case B: No Negatives.
// Number line:
// |s64_min . . . 0 . . . s64_max|
// | . . . . 0 Q_min..Q_max . . | small positive
// | . . . . R> R R R< R< | (against R values)
// | . . . . 0 . Q_min..Q_max . | s64 positive
// | . . . . R> R> R R R< | (against R values)
//
// if both Q_min, Q_max >=s64 0, then use this test:
// j*K + 0 <u32 clamp(R, Q_min, Q_max) - Q_min
// or equivalently:
// j*K + 0 <u32 clamp(R - Q_min, 0, Q_max - Q_min)
// R values which are out of range (<Q_min or >Q_max+1) are reduced as marked: R> up to Q_min, R< down to Q_max+1.
// Then the whole comparison is shifted left by Q_min, so it can take place at zero, which is a nice 32-bit value.
//
// So, if both Q_min, Q_max+1 >=s64 0, then use this test:
// j*K + 0 <u32 clamp(R, Q_min, Q_max+1) - Q_min if S*K>0
// More generally:
// j*K + Q - Q_min <u32 clamp(R, Q_min, Q_max+1) - Q_min for all S,K
// Case C: Overflow in the 64-bit domain
// Number line:
// |..Q_max-2^64 . . 0 . . . Q_min..| s64 overflow
// | . . . . R> R> R> R> R | (against R values)
//
// if Q_min >=s64 0 but Q_max <s64 0, then use this test:
// j*K + 0 <u32 clamp(R, Q_min, R) - Q_min
// or equivalently:
// j*K + 0 <u32 clamp(R - Q_min, 0, R - Q_min)
// or also equivalently:
// j*K + 0 <u32 max(0, R - Q_min)
// In this case, Q_min >s64 Q_max+1, even though the mathematical values of Q_min and Q_max+1 are correctly ordered.
// The formulas from the previous case can be used, except that the bad upper bound Q_max is replaced by max_jlong.
// (In fact, we could use any replacement bound from R to max_jlong inclusive, as the input to the clamp function.)
//
// Here the clamp function is a simple 64-bit min/max:
// clamp(X, L, H) := max(L, min(X, H))
// So if Q_min >=s64 0 but Q_max+1 <s64 0, use this test:
// j*K + 0 <u32 clamp(R, Q_min, max_jlong) - Q_min if S*K>0
// More generally:
// j*K + Q - Q_min <u32 clamp(R, Q_min, max_jlong) - Q_min for all S,K
//
// Dropping the bad bound means only Q_min is used to reduce the range of R:
// j*K + Q - Q_min <u32 max(Q_min, R) - Q_min for all S,K
//
// Here the clamp function is a 64-bit min/max that reduces the dynamic range of its R operand to the required [L,H]:
// clamp(X, L, H) := max(L, min(X, H))
// When degenerately L > H, it returns L not H.
//
// Tests above can be merged into a single one:
// L_clamp = Q_min < 0 ? 0 : Q_min
// H_clamp = Q_max < Q_min ? R : Q_max
// j*K + Q_min - L_clamp <u32 clamp(R, L_clamp, H_clamp) - L_clamp
// or equivalently:
// j*K + Q_min - L_clamp <u32 clamp(R - L_clamp, 0, H_clamp - L_clamp)
// All of the formulas above can be merged into a single one:
// L_clamp = Q_min < 0 ? 0 : Q_min --whether and how far to left-shift
// H_clamp = Q_max+1 < Q_min ? max_jlong : Q_max+1
// = Q_max+1 < 0 && Q_min >= 0 ? max_jlong : Q_max+1
// Q_first = Q = (S*K>0 ? Q_min : Q_max) = (C*K+L)
// R_clamp = clamp(R, L_clamp, H_clamp) --reduced dynamic range
// replacement R.C.:
// j*K + Q_first - L_clamp <u32 R_clamp - L_clamp
// or equivalently:
// j*K + L_2 <u32 R_2
// where
// L_2 = Q_first - L_clamp
// R_2 = R_clamp - L_clamp
//
// Note on why R is never negative:
//
// Various details of this transformation would break badly if R could be negative, so this transformation only
// operates after obtaining hard evidence that R<0 is impossible. For example, if R comes from a LoadRange node, we
// know R cannot be negative. For explicit checks (of both int and long) a proof is constructed in
// inline_preconditions_checkIndex, which triggers an uncommon trap if R<0, then wraps R in a ConstraintCastNode with a
// non-negative type. Later on, when IdealLoopTree::is_range_check_if looks for an optimizable R.C., it checks that
// the type of that R node is non-negative. Any "wild" R node that could be negative is not treated as an optimizable
// R.C., but R values from a.length and inside checkIndex are good to go.
//
// Readers may find the equivalent forms easier to reason about, but the forms given first generate better code.
void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List &range_checks, Node* outer_phi,
Node* inner_iters_actual_int, Node* inner_phi,
Node* iv_add, LoopNode* inner_head) {
Node* long_zero = _igvn.longcon(0);
set_ctrl(long_zero, C->root());
Node* int_zero = _igvn.intcon(0);
set_ctrl(int_zero, this->C->root());
Node* long_one = _igvn.longcon(1);
set_ctrl(long_one, this->C->root());
Node* int_stride = _igvn.intcon(checked_cast<int>(stride_con));
set_ctrl(int_stride, this->C->root());
for (uint i = 0; i < range_checks.size(); i++) {
ProjNode* proj = range_checks.at(i)->as_Proj();
@ -1266,8 +1303,8 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
// could be shared and have already been taken care of
continue;
}
bool converted = false;
bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, &scale, &offset, T_LONG, &converted);
bool short_scale = false;
bool ok = is_scaled_iv_plus_offset(rc_cmp->in(1), iv_add, T_LONG, &scale, &offset, &short_scale);
assert(ok, "inconsistent: was tested before");
Node* range = rc_cmp->in(2);
Node* c = rc->in(0);
@ -1279,33 +1316,33 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
Node* L = offset;
if (converted) {
if (short_scale) {
// This converts:
// i*K + L <u64 R
// (int)i*K + L <u64 R
// with K an int into:
// i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
// to protect against an overflow of i*K
// to protect against an overflow of (int)i*K
//
// Because if i*K overflows, there are K,L where:
// i*K + L <u64 R is false
// when
// i*(long)K is > (long)max_jint and < R
// and so i*(long)K + L <u64 R is true
// As a consequence simply converting:
// i*K + L <u64 R to i*(long)K + L <u64 R could cause incorrect execution
// Because if (int)i*K overflows, there are K,L where:
// (int)i*K + L <u64 R is false because (int)i*K+L overflows to a negative which becomes a huge u64 value.
// But if i*(long)K + L is >u64 (long)max_jint and still is <u64 R, then
// i*(long)K + L <u64 R is true.
//
// As a consequence simply converting i*K + L <u64 R to i*(long)K + L <u64 R could cause incorrect execution.
//
// It's always true that:
// i*K <u64 (long)max_jint + 1
// which implies i*K + L <u64 (long)max_jint + 1 + L
// (int)i*K <u64 (long)max_jint + 1
// which implies (int)i*K + L <u64 (long)max_jint + 1 + L
// As a consequence:
// i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R)
// is always false in case of overflow of i*K
//
// Note, there are K,L where i*K overflows and
// Note, there are also K,L where i*K overflows and
// i*K + L <u64 R is true, but
// i*(long)K + L <u64 unsigned_min((long)max_jint + L + 1, R) is false
// So this transformation could cause spurious deoptimizations and failed range check elimination
// (but not incorrect execution) for unlikely corner cases with overflow
// (but not incorrect execution) for unlikely corner cases with overflow.
// If this causes problems in practice, we could maybe direct excution to a post-loop, instead of deoptimizing.
Node* max_jint_plus_one_long = _igvn.longcon((jlong)max_jint + 1);
set_ctrl(max_jint_plus_one_long, C->root());
Node* max_range = new AddLNode(max_jint_plus_one_long, L);
@ -1315,26 +1352,38 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
}
Node* C = outer_phi;
Node* Z_2 = new ConvI2LNode(inner_iters_actual_int, TypeLong::LONG);
register_new_node(Z_2, entry_control);
// Start with 64-bit values:
// i*K + L <u64 R
// (C+j)*K + L <u64 R
// j*K + L_2 <u64 R where L_2 = C*K+L
Node* L_2 = new MulLNode(C, K);
register_new_node(L_2, entry_control);
L_2 = new AddLNode(L_2, L);
register_new_node(L_2, entry_control);
// j*K + Q <u64 R where Q = Q_first = C*K+L
Node* Q_first = new MulLNode(C, K);
register_new_node(Q_first, entry_control);
Q_first = new AddLNode(Q_first, L);
register_new_node(Q_first, entry_control);
// Compute endpoints of the range of values j*K.
// Q_min = (j=0)*K + L_2; Q_max = (j=Z_2)*K + L_2
Node* Q_min = L_2;
Node* Q_max = new MulLNode(Z_2, K);
// Compute endpoints of the range of values j*K + Q.
// Q_min = (j=0)*K + Q; Q_max = (j=B_2)*K + Q
Node* Q_min = Q_first;
// Compute the exact ending value B_2 (which is really A_2 if S < 0)
Node* B_2 = new LoopLimitNode(this->C, int_zero, inner_iters_actual_int, int_stride);
register_new_node(B_2, entry_control);
B_2 = new SubINode(B_2, int_stride);
register_new_node(B_2, entry_control);
B_2 = new ConvI2LNode(B_2);
register_new_node(B_2, entry_control);
Node* Q_max = new MulLNode(B_2, K);
register_new_node(Q_max, entry_control);
Q_max = new AddLNode(Q_max, L_2);
Q_max = new AddLNode(Q_max, Q_first);
register_new_node(Q_max, entry_control);
if (scale * stride_con < 0) {
swap(Q_min, Q_max);
}
// Now, mathematically, Q_max > Q_min, and they are close enough so that (Q_max-Q_min) fits in 32 bits.
// L_clamp = Q_min < 0 ? 0 : Q_min
Node* Q_min_cmp = new CmpLNode(Q_min, long_zero);
register_new_node(Q_min_cmp, entry_control);
@ -1342,38 +1391,53 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
register_new_node(Q_min_bool, entry_control);
Node* L_clamp = new CMoveLNode(Q_min_bool, Q_min, long_zero, TypeLong::LONG);
register_new_node(L_clamp, entry_control);
// (This could also be coded bitwise as L_clamp = Q_min & ~(Q_min>>63).)
// H_clamp = Q_max < Q_min ? R : Q_max
Node* Q_max_cmp = new CmpLNode(Q_max, Q_min);
Node* Q_max_plus_one = new AddLNode(Q_max, long_one);
register_new_node(Q_max_plus_one, entry_control);
// H_clamp = Q_max+1 < Q_min ? max_jlong : Q_max+1
// (Because Q_min and Q_max are close, the overflow check could also be encoded as Q_max+1 < 0 & Q_min >= 0.)
Node* max_jlong_long = _igvn.longcon(max_jlong);
set_ctrl(max_jlong_long, this->C->root());
Node* Q_max_cmp = new CmpLNode(Q_max_plus_one, Q_min);
register_new_node(Q_max_cmp, entry_control);
Node* Q_max_bool = new BoolNode(Q_max_cmp, BoolTest::lt);
register_new_node(Q_max_bool, entry_control);
Node* H_clamp = new CMoveLNode(Q_max_bool, Q_max, R, TypeLong::LONG);
Node* H_clamp = new CMoveLNode(Q_max_bool, Q_max_plus_one, max_jlong_long, TypeLong::LONG);
register_new_node(H_clamp, entry_control);
// (This could also be coded bitwise as H_clamp = ((Q_max+1)<<1 | M)>>>1 where M = (Q_max+1)>>63 & ~Q_min>>63.)
// R_2 = clamp(R, L_clamp, H_clamp) - L_clamp
// that is: R_2 = clamp(R, L_clamp, H_clamp) if Q_min < 0
// or: R_2 = clamp(R, L_clamp, H_clamp) - Q_min if Q_min > 0
// that is: R_2 = clamp(R, L_clamp=0, H_clamp=Q_max) if Q_min < 0
// or else: R_2 = clamp(R, L_clamp, H_clamp) - Q_min if Q_min >= 0
// and also: R_2 = clamp(R, L_clamp, Q_max+1) - L_clamp if Q_min < Q_max+1 (no overflow)
// or else: R_2 = clamp(R, L_clamp, *no limit*)- L_clamp if Q_max+1 < Q_min (overflow)
Node* R_2 = clamp(R, L_clamp, H_clamp);
R_2 = new SubLNode(R_2, L_clamp);
register_new_node(R_2, entry_control);
R_2 = new ConvL2INode(R_2, TypeInt::POS);
register_new_node(R_2, entry_control);
// Q = Q_min - L_clamp
// that is: Q = Q_min - 0 if Q_min < 0
// or: Q = Q_min - Q_min = 0 if Q_min > 0
Node* Q = new SubLNode(Q_min, L_clamp);
register_new_node(Q, entry_control);
Q = new ConvL2INode(Q, TypeInt::INT);
register_new_node(Q, entry_control);
// L_2 = Q_first - L_clamp
// We are subtracting L_clamp from both sides of the <u32 comparison.
// If S*K>0, then Q_first == 0 and the R.C. expression at -L_clamp and steps upward to Q_max-L_clamp.
// If S*K<0, then Q_first != 0 and the R.C. expression starts high and steps downward to Q_min-L_clamp.
Node* L_2 = new SubLNode(Q_first, L_clamp);
register_new_node(L_2, entry_control);
L_2 = new ConvL2INode(L_2, TypeInt::INT);
register_new_node(L_2, entry_control);
// Transform the range check
// Transform the range check using the computed values L_2/R_2
// from: i*K + L <u64 R
// to: j*K + L_2 <u32 R_2
// that is:
// (j*K + Q_first) - L_clamp <u32 clamp(R, L_clamp, H_clamp) - L_clamp
K = _igvn.intcon(checked_cast<int>(scale));
set_ctrl(K, this->C->root());
Node* scaled_iv = new MulINode(inner_phi, K);
register_new_node(scaled_iv, c);
Node* scaled_iv_plus_offset = scaled_iv_plus_offset = new AddINode(scaled_iv, Q);
Node* scaled_iv_plus_offset = scaled_iv_plus_offset = new AddINode(scaled_iv, L_2);
register_new_node(scaled_iv_plus_offset, c);
Node* new_rc_cmp = new CmpUNode(scaled_iv_plus_offset, R_2);

View File

@ -1265,15 +1265,15 @@ public:
void mark_reductions( IdealLoopTree *loop );
// Return true if exp is a constant times an induction var
bool is_scaled_iv(Node* exp, Node* iv, jlong* p_scale, BasicType bt, bool* converted);
bool is_scaled_iv(Node* exp, Node* iv, BasicType bt, jlong* p_scale, bool* p_short_scale, int depth = 0);
bool is_iv(Node* exp, Node* iv, BasicType bt);
// Return true if exp is a scaled induction var plus (or minus) constant
bool is_scaled_iv_plus_offset(Node* exp, Node* iv, jlong* p_scale, Node** p_offset, BasicType bt, bool* converted = NULL, int depth = 0);
bool is_scaled_iv_plus_offset(Node* exp, Node* iv, BasicType bt, jlong* p_scale, Node** p_offset, bool* p_short_scale = NULL, int depth = 0);
bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset) {
jlong long_scale;
if (is_scaled_iv_plus_offset(exp, iv, &long_scale, p_offset, T_INT)) {
if (is_scaled_iv_plus_offset(exp, iv, T_INT, &long_scale, p_offset)) {
int int_scale = checked_cast<int>(long_scale);
if (p_scale != NULL) {
*p_scale = int_scale;
@ -1282,6 +1282,12 @@ public:
}
return false;
}
// Helper for finding more complex matches to is_scaled_iv_plus_offset.
bool is_scaled_iv_plus_extra_offset(Node* exp1, Node* offset2, Node* iv,
BasicType bt,
jlong* p_scale, Node** p_offset,
bool* p_short_scale, int depth);
// Enum to determine the action to be performed in create_new_if_for_predicate() when processing phis of UCT regions.
enum class UnswitchingAction {
@ -1658,6 +1664,7 @@ public:
void strip_mined_nest_back_to_counted_loop(IdealLoopTree* loop, const BaseCountedLoopNode* head, Node* back_control,
IfNode*&exit_test, SafePointNode*&safepoint);
void push_pinned_nodes_thru_region(IfNode* dom_if, Node* region);
bool try_merge_identical_ifs(Node* n);

View File

@ -1153,7 +1153,12 @@ public:
jlong get_integer_as_long(BasicType bt) const {
const TypeInteger* t = find_integer_type(bt);
guarantee(t != NULL, "must be con");
guarantee(t != NULL && t->is_con(), "must be con");
return t->get_con_as_long(bt);
}
jlong find_integer_as_long(BasicType bt, jlong value_if_unknown) const {
const TypeInteger* t = find_integer_type(bt);
if (t == NULL || !t->is_con()) return value_if_unknown;
return t->get_con_as_long(bt);
}
const TypePtr* get_ptr_type() const;

View File

@ -96,4 +96,150 @@ public class TestLongRangeChecks {
private void testStridePosScalePosInIntLoop2_runner() {
testStridePosScalePosInIntLoop2(0, 100, 200, 0);
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(failOn = { IRNode.COUNTEDLOOP})
public static void testStrideNegScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
final long stride = 1;
for (long i = stop; i > start; i -= stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStrideNegScaleNeg")
private void testStrideNegScaleNeg_runner() {
testStrideNegScaleNeg(0, 100, 100, 100);
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(failOn = { IRNode.COUNTEDLOOP })
public static void testStrideNegScaleNegInIntLoop1(int start, int stop, long length, long offset) {
final long scale = -2;
final int stride = 1;
for (int i = stop; i > start; i -= stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStrideNegScaleNegInIntLoop1")
private void testStrideNegScaleNegInIntLoop1_runner() {
testStrideNegScaleNegInIntLoop1(0, 100, 200, 200);
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(failOn = { IRNode.COUNTEDLOOP })
public static void testStrideNegScaleNegInIntLoop2(int start, int stop, long length, long offset) {
final int scale = -2;
final int stride = 1;
for (int i = stop; i > start; i -= stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStrideNegScaleNegInIntLoop2")
private void testStrideNegScaleNegInIntLoop2_runner() {
testStrideNegScaleNegInIntLoop2(0, 100, 200, 200);
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(failOn = { IRNode.COUNTEDLOOP})
public static void testStrideNegScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
final long stride = 1;
for (long i = stop-1; i >= start; i -= stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStrideNegScalePos")
private void testStrideNegScalePos_runner() {
testStrideNegScalePos(0, 100, 100, 0);
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(failOn = { IRNode.COUNTEDLOOP })
public static void testStrideNegScalePosInIntLoop1(int start, int stop, long length, long offset) {
final long scale = 2;
final int stride = 1;
for (int i = stop-1; i >= start; i -= stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStrideNegScalePosInIntLoop1")
private void testStrideNegScalePosInIntLoop1_runner() {
testStrideNegScalePosInIntLoop1(0, 100, 200, 0);
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(failOn = { IRNode.COUNTEDLOOP })
public static void testStrideNegScalePosInIntLoop2(int start, int stop, long length, long offset) {
final int scale = 2;
final int stride = 1;
for (int i = stop-1; i >= start; i -= stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStrideNegScalePosInIntLoop2")
private void testStrideNegScalePosInIntLoop2_runner() {
testStrideNegScalePosInIntLoop1(0, 100, 200, 0);
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(failOn = { IRNode.COUNTEDLOOP})
public static void testStridePosScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
final long stride = 1;
for (long i = start; i < stop; i += stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStridePosScaleNeg")
private void testStridePosScaleNeg_runner() {
testStridePosScaleNeg(0, 100, 100, 99);
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(failOn = { IRNode.COUNTEDLOOP})
public static void testStridePosScaleNegInIntLoop1(int start, int stop, long length, long offset) {
final long scale = -2;
final int stride = 1;
for (int i = start; i < stop; i += stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStridePosScaleNegInIntLoop1")
private void testStridePosScaleNegInIntLoop1_runner() {
testStridePosScaleNegInIntLoop1(0, 100, 200, 198);
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(failOn = { IRNode.COUNTEDLOOP})
public static void testStridePosScaleNegInIntLoop2(int start, int stop, long length, long offset) {
final int scale = -2;
final int stride = 1;
for (int i = start; i < stop; i += stride) {
Objects.checkIndex(scale * i + offset, length);
}
}
@Run(test = "testStridePosScaleNegInIntLoop2")
private void testStridePosScaleNegInIntLoop2_runner() {
testStridePosScaleNegInIntLoop1(0, 100, 200, 198);
}
}

View File

@ -127,6 +127,73 @@ public class TestLongRangeCheck {
assertIsNotCompiled(m);
}
private static void testOverflow(String method, long start, long stop, long length, long offset0, long offset1) throws Exception {
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class);
m.invoke(null, start, stop, length, offset0);
compile(m);
m.invoke(null, start, stop, length, offset0);
assertIsCompiled(m);
try {
m.invoke(null, start, stop, length, offset1);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
}
private static void testConditional(String method, long start, long stop, long length, long offset0, long offset1, long start1, long stop1) throws Exception {
Method m;
if (start1 != start) {
m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, start, stop, length, offset0, start, stop);
compile(m);
m.invoke(null, start, stop, length, offset0, start, stop);
assertIsCompiled(m);
try {
m.invoke(null, start, stop, length, offset1, start1-1, stop1);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
}
if (stop1 != stop) {
m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, start, stop, length, offset0, start, stop);
compile(m);
m.invoke(null, start, stop, length, offset0, start, stop);
assertIsCompiled(m);
try {
m.invoke(null, start, stop, length, offset1, start1, stop1+1);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
}
m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod(method, long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, start, stop, length, offset0, start, stop);
compile(m);
m.invoke(null, start, stop, length, offset0, start, stop);
assertIsCompiled(m);
m.invoke(null, start, stop, length, offset1, start1, stop1);
assertIsCompiled(m);
}
public static void main(String[] args) throws Exception {
@ -157,42 +224,20 @@ public class TestLongRangeCheck {
test("testStridePosNotOneScaleNeg", -v, v, v * 2, v-1);
// offset causes overflow
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePos", long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0);
compile(m);
m.invoke(null, 0, 100, 100, 0);
assertIsCompiled(m);
try {
m.invoke(null, 0, 100, 100, Long.MAX_VALUE - 50);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
}
testOverflow("testStridePosScalePos", 0, 100, 100, 0, Long.MAX_VALUE - 50);
testOverflow("testStrideNegScaleNeg", 0, 100, 100, 100, Long.MIN_VALUE + 50);
testOverflow("testStrideNegScalePos", 0, 100, 100, 0, Long.MAX_VALUE - 50);
testOverflow("testStridePosScaleNeg", 0, 100, 100, 99, Long.MIN_VALUE + 50);
// no spurious deopt if the range check doesn't fail because not executed
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0, 0, 100);
compile(m);
m.invoke(null, 0, 100, 100, -50, 50, 100);
assertIsCompiled(m);
}
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0, 0, 100);
compile(m);
m.invoke(null, 0, 100, Long.MAX_VALUE, Long.MAX_VALUE - 50, 0, 50);
assertIsCompiled(m);
}
testConditional("testStridePosScalePosConditional", 0, 100, 100, 0, -50, 50, 100);
testConditional("testStridePosScalePosConditional", 0, 100, Long.MAX_VALUE, 0, Long.MAX_VALUE - 50, 0, 50);
testConditional("testStrideNegScaleNegConditional", 0, 100, 100, 100, 50, 0, 51);
testConditional("testStrideNegScaleNegConditional", 0, 100, Long.MAX_VALUE, 100, Long.MIN_VALUE + 50, 52, 100);
testConditional("testStrideNegScalePosConditional", 0, 100, 100, 0, -50, 50, 100);
testConditional("testStrideNegScalePosConditional", 0, 100, Long.MAX_VALUE, 100, Long.MAX_VALUE - 50, 0, 50);
testConditional("testStridePosScaleNegConditional", 0, 100, 100, 99, 50, 0, 51);
testConditional("testStridePosScaleNegConditional", 0, 100, Long.MAX_VALUE, 99, Long.MIN_VALUE + 50, 52, 100);
test("testStridePosScalePosInIntLoop", 0, 100, 100, 0);
@ -221,40 +266,19 @@ public class TestLongRangeCheck {
test("testStridePosNotOneScaleNegInIntLoop", -v, v, v * 4, 2 * v - 1);
// offset causes overflow
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosInIntLoop", long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0);
compile(m);
m.invoke(null, 0, 100, 100, 0);
assertIsCompiled(m);
try {
m.invoke(null, 0, 100, 100, Long.MAX_VALUE - 50);
throw new RuntimeException("should have thrown");
} catch(InvocationTargetException e) {
if (!(e.getCause() instanceof IndexOutOfBoundsException)) {
throw new RuntimeException("unexpected exception");
}
}
assertIsNotCompiled(m);
}
testOverflow("testStridePosScalePosInIntLoop", 0, 100, 100, 0, Long.MAX_VALUE - 50);
testOverflow("testStrideNegScaleNegInIntLoop", 0, 100, 100, 100, Long.MIN_VALUE + 50);
testOverflow("testStrideNegScalePosInIntLoop", 0, 100, 100, 0, Long.MAX_VALUE - 50);
testOverflow("testStridePosScaleNegInIntLoop", 0, 100, 100, 99, Long.MIN_VALUE + 50);
// no spurious deopt if the range check doesn't fail because not executed
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0, 0, 100);
compile(m);
m.invoke(null, 0, 100, 100, -50, 50, 100);
assertIsCompiled(m);
}
{
Method m = newClassLoader().loadClass("TestLongRangeCheck").getDeclaredMethod("testStridePosScalePosConditional", long.class, long.class, long.class, long.class, long.class, long.class);
m.invoke(null, 0, 100, 100, 0, 0, 100);
compile(m);
m.invoke(null, 0, 100, Long.MAX_VALUE, Long.MAX_VALUE - 50, 0, 50);
assertIsCompiled(m);
}
testConditional("testStridePosScalePosConditionalInIntLoop", 0, 100, 100, 0, -50, 50, 100);
testConditional("testStridePosScalePosConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 0, Long.MAX_VALUE - 50, 0, 50);
testConditional("testStrideNegScaleNegConditionalInIntLoop", 0, 100, 100, 100, 50, 0, 51);
testConditional("testStrideNegScaleNegConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 100, Long.MIN_VALUE + 50, 52, 100);
testConditional("testStrideNegScalePosConditionalInIntLoop", 0, 100, 100, 0, -50, 50, 100);
testConditional("testStrideNegScalePosConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 100, Long.MAX_VALUE - 50, 0, 50);
testConditional("testStridePosScaleNegConditionalInIntLoop", 0, 100, 100, 99, 50, 0, 51);
testConditional("testStridePosScaleNegConditionalInIntLoop", 0, 100, Long.MAX_VALUE, 99, Long.MIN_VALUE + 50, 52, 100);
test("testStridePosScalePosNotOneInIntLoop2", 0, 100, 1090, 0);
@ -411,6 +435,36 @@ public class TestLongRangeCheck {
}
}
public static void testStrideNegScaleNegConditional(long start, long stop, long length, long offset, long start2, long stop2) {
final long scale = -1;
final long stride = 1;
for (long i = stop; i > start; i -= stride) {
if (i >= start2 && i < stop2) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
}
public static void testStrideNegScalePosConditional(long start, long stop, long length, long offset, long start2, long stop2) {
final long scale = 1;
final long stride = 1;
for (long i = stop-1; i >= start; i -= stride) {
if (i >= start2 && i < stop2) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
}
public static void testStridePosScaleNegConditional(long start, long stop, long length, long offset, long start2, long stop2) {
final long scale = -1;
final long stride = 1;
for (long i = start; i < stop; i += stride) {
if (i >= start2 && i < stop2) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
}
private static void checkInputs(long... inputs) {
for (int i = 0; i < inputs.length; i++) {
if ((long)((int)inputs[i]) != inputs[i]) {
@ -529,7 +583,6 @@ public class TestLongRangeCheck {
public static void testStridePosScalePosConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
checkInputs(start, stop, start2, stop2);
Preconditions.checkIndex(0, length, null);
final long scale = 1;
final int stride = 1;
for (int i = (int)start; i < (int)stop; i += stride) {
@ -539,6 +592,39 @@ public class TestLongRangeCheck {
}
}
public static void testStrideNegScaleNegConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
checkInputs(start, stop, start2, stop2);
final long scale = -1;
final int stride = 1;
for (int i = (int)stop; i > (int)start; i -= stride) {
if (i >= (int)start2 && i < (int)stop2) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
}
public static void testStrideNegScalePosConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
checkInputs(start, stop, start2, stop2);
final long scale = 1;
final int stride = 1;
for (int i = (int)(stop-1); i >= (int)start; i -= stride) {
if (i >= (int)start2 && i < (int)stop2) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
}
public static void testStridePosScaleNegConditionalInIntLoop(long start, long stop, long length, long offset, long start2, long stop2) {
checkInputs(start, stop, start2, stop2);
final long scale = -1;
final int stride = 1;
for (int i = (int)start; i < (int)stop; i += stride) {
if (i >= (int)start2 && i < (int)stop2) {
Preconditions.checkIndex(scale * i + offset, length, null);
}
}
}
public static void testStridePosScalePosNotOneInIntLoop2(long start, long stop, long length, long offset) {
checkInputs(start, stop);
final int scale = 11;