8342692: C2: long counted loop/long range checks: don't create loop-nest for short running loops

Co-authored-by: Maurizio Cimadamore <mcimadamore@openjdk.org>
Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org>
Reviewed-by: chagedorn, thartmann
This commit is contained in:
Roland Westrelin 2025-07-22 08:35:36 +00:00
parent c68697e178
commit f155661151
27 changed files with 1665 additions and 79 deletions

View File

@ -763,6 +763,7 @@
declare_constant(Deoptimization::Reason_constraint) \
declare_constant(Deoptimization::Reason_div0_check) \
declare_constant(Deoptimization::Reason_loop_limit_check) \
declare_constant(Deoptimization::Reason_short_running_long_loop) \
declare_constant(Deoptimization::Reason_auto_vectorization_check) \
declare_constant(Deoptimization::Reason_type_checked_inlining) \
declare_constant(Deoptimization::Reason_optimized_type_check) \

View File

@ -872,6 +872,15 @@
"could corrupt the graph in rare cases and should be used with " \
"care.") \
\
product(bool, ShortRunningLongLoop, true, DIAGNOSTIC, \
"long counted loop/long range checks: don't create loop nest if " \
"loop runs for small enough number of iterations. Long loop is " \
"converted to a single int loop.") \
\
develop(bool, StressShortRunningLongLoop, false, \
"Speculate all long counted loops are short running when bounds " \
"are unknown even if profile data doesn't say so.") \
\
develop(bool, StressLoopPeeling, false, \
"Randomize loop peeling decision") \

View File

@ -26,7 +26,9 @@
#include "opto/addnode.hpp"
#include "opto/callnode.hpp"
#include "opto/castnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
#include "opto/loopnode.hpp"
#include "opto/matcher.hpp"
#include "opto/phaseX.hpp"
#include "opto/subnode.hpp"
@ -323,6 +325,67 @@ const Type* CastLLNode::Value(PhaseGVN* phase) const {
return widen_type(phase, res, T_LONG);
}
bool CastLLNode::is_inner_loop_backedge(ProjNode* proj) {
if (proj != nullptr) {
Node* ctrl_use = proj->unique_ctrl_out_or_null();
if (ctrl_use != nullptr && ctrl_use->Opcode() == Op_Loop &&
ctrl_use->in(2) == proj &&
ctrl_use->as_Loop()->is_loop_nest_inner_loop()) {
return true;
}
}
return false;
}
bool CastLLNode::cmp_used_at_inner_loop_exit_test(CmpNode* cmp) {
for (DUIterator_Fast imax, i = cmp->fast_outs(imax); i < imax; i++) {
Node* bol = cmp->fast_out(i);
if (bol->Opcode() == Op_Bool) {
for (DUIterator_Fast jmax, j = bol->fast_outs(jmax); j < jmax; j++) {
Node* iff = bol->fast_out(j);
if (iff->Opcode() == Op_If) {
ProjNode* true_proj = iff->as_If()->proj_out_or_null(true);
ProjNode* false_proj = iff->as_If()->proj_out_or_null(false);
if (is_inner_loop_backedge(true_proj) || is_inner_loop_backedge(false_proj)) {
return true;
}
}
}
}
}
return false;
}
// Find if this is a cast node added by PhaseIdealLoop::create_loop_nest() to narrow the number of iterations of the
// inner loop
bool CastLLNode::used_at_inner_loop_exit_test() const {
for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
Node* convl2i = fast_out(i);
if (convl2i->Opcode() == Op_ConvL2I) {
for (DUIterator_Fast jmax, j = convl2i->fast_outs(jmax); j < jmax; j++) {
Node* cmp_or_sub = convl2i->fast_out(j);
if (cmp_or_sub->Opcode() == Op_CmpI) {
if (cmp_used_at_inner_loop_exit_test(cmp_or_sub->as_Cmp())) {
// (Loop .. .. (IfProj (If (Bool (CmpI (ConvL2I (CastLL )))))))
return true;
}
} else if (cmp_or_sub->Opcode() == Op_SubI && cmp_or_sub->in(1)->find_int_con(-1) == 0) {
for (DUIterator_Fast kmax, k = cmp_or_sub->fast_outs(kmax); k < kmax; k++) {
Node* cmp = cmp_or_sub->fast_out(k);
if (cmp->Opcode() == Op_CmpI) {
if (cmp_used_at_inner_loop_exit_test(cmp->as_Cmp())) {
// (Loop .. .. (IfProj (If (Bool (CmpI (SubI 0 (ConvL2I (CastLL ))))))))
return true;
}
}
}
}
}
}
}
return false;
}
Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
Node* progress = ConstraintCastNode::Ideal(phase, can_reshape);
if (progress != nullptr) {
@ -352,7 +415,12 @@ Node* CastLLNode::Ideal(PhaseGVN* phase, bool can_reshape) {
}
}
}
return optimize_integer_cast(phase, T_LONG);
// If it's a cast created by PhaseIdealLoop::short_running_loop(), don't transform it until the counted loop is created
// in next loop opts pass
if (!can_reshape || !used_at_inner_loop_exit_test()) {
return optimize_integer_cast(phase, T_LONG);
}
return nullptr;
}
//------------------------------Value------------------------------------------

View File

@ -138,6 +138,12 @@ public:
}
virtual const Type* Value(PhaseGVN* phase) const;
static bool is_inner_loop_backedge(ProjNode* proj);
static bool cmp_used_at_inner_loop_exit_test(CmpNode* cmp);
bool used_at_inner_loop_exit_test() const;
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegL; }

View File

@ -4050,6 +4050,11 @@ void GraphKit::add_parse_predicate(Deoptimization::DeoptReason reason, const int
// Add Parse Predicates which serve as placeholders to create new Runtime Predicates above them. All
// Runtime Predicates inside a Runtime Predicate block share the same uncommon trap as the Parse Predicate.
void GraphKit::add_parse_predicates(int nargs) {
if (ShortRunningLongLoop) {
// Will narrow the limit down with a cast node. Predicates added later may depend on the cast so should be last when
// walking up from the loop.
add_parse_predicate(Deoptimization::Reason_short_running_long_loop, nargs);
}
if (UseLoopPredicate) {
add_parse_predicate(Deoptimization::Reason_predicate, nargs);
if (UseProfiledLoopPredicate) {

View File

@ -2178,6 +2178,7 @@ ParsePredicateNode::ParsePredicateNode(Node* control, Deoptimization::DeoptReaso
case Deoptimization::Reason_profile_predicate:
case Deoptimization::Reason_auto_vectorization_check:
case Deoptimization::Reason_loop_limit_check:
case Deoptimization::Reason_short_running_long_loop:
break;
default:
assert(false, "unsupported deoptimization reason for Parse Predicate");
@ -2226,6 +2227,9 @@ void ParsePredicateNode::dump_spec(outputStream* st) const {
case Deoptimization::DeoptReason::Reason_loop_limit_check:
st->print("Loop_Limit_Check ");
break;
case Deoptimization::DeoptReason::Reason_short_running_long_loop:
st->print("Short_Running_Long_Loop ");
break;
default:
fatal("unknown kind");
}

View File

@ -1054,7 +1054,7 @@ bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree* loop, IfProjNod
#ifdef ASSERT
const bool exact_trip_count = cl->has_exact_trip_count();
const uint trip_count = cl->trip_count();
loop->compute_trip_count(this);
loop->compute_trip_count(this, T_INT);
assert(exact_trip_count == cl->has_exact_trip_count() && trip_count == cl->trip_count(),
"should have computed trip count on Loop Predication entry");
#endif
@ -1171,7 +1171,7 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree* loop) {
// Do nothing for iteration-splitted loops
return false;
}
loop->compute_trip_count(this);
loop->compute_trip_count(this, T_INT);
if (cl->trip_count() == 1) {
// Not worth to hoist checks out of a loop that is only run for one iteration since the checks are only going to
// be executed once anyway.

View File

@ -96,11 +96,11 @@ void IdealLoopTree::record_for_igvn() {
//------------------------------compute_exact_trip_count-----------------------
// Compute loop trip count if possible. Do not recalculate trip count for
// split loops (pre-main-post) which have their limits and inits behind Opaque node.
void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
if (!_head->as_Loop()->is_valid_counted_loop(T_INT)) {
void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase, BasicType loop_bt) {
if (!_head->as_Loop()->is_valid_counted_loop(loop_bt)) {
return;
}
CountedLoopNode* cl = _head->as_CountedLoop();
BaseCountedLoopNode* cl = _head->as_BaseCountedLoop();
// Trip count may become nonexact for iteration split loops since
// RCE modifies limits. Note, _trip_count value is not reset since
// it is used to limit unrolling of main loop.
@ -119,24 +119,62 @@ void IdealLoopTree::compute_trip_count(PhaseIdealLoop* phase) {
Node* init_n = cl->init_trip();
Node* limit_n = cl->limit();
if (init_n != nullptr && limit_n != nullptr) {
// Use longs to avoid integer overflow.
int stride_con = cl->stride_con();
const TypeInt* init_type = phase->_igvn.type(init_n)->is_int();
const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
jlong init_con = (stride_con > 0) ? init_type->_lo : init_type->_hi;
jlong limit_con = (stride_con > 0) ? limit_type->_hi : limit_type->_lo;
int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
jlong trip_count = (limit_con - init_con + stride_m)/stride_con;
jlong stride_con = cl->stride_con();
const TypeInteger* init_type = phase->_igvn.type(init_n)->is_integer(loop_bt);
const TypeInteger* limit_type = phase->_igvn.type(limit_n)->is_integer(loop_bt);
// compute trip count
// It used to be computed as:
// max(1, limit_con - init_con + stride_m) / stride_con
// with stride_m = stride_con - (stride_con > 0 ? 1 : -1)
// for int counted loops only and by promoting all values to long to avoid overflow
// This implements the computation for int and long counted loops in a way that promotion to the next larger integer
// type is not needed to protect against overflow.
//
// Use unsigned longs to avoid overflow: number of iteration is a positive number but can be really large for
// instance if init_con = min_jint, limit_con = max_jint
jlong init_con = (stride_con > 0) ? init_type->lo_as_long() : init_type->hi_as_long();
julong uinit_con = init_con;
jlong limit_con = (stride_con > 0) ? limit_type->hi_as_long() : limit_type->lo_as_long();
julong ulimit_con = limit_con;
// The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
// init <= limit (for stride_con < 0).
trip_count = MAX2(trip_count, (jlong)1);
if (trip_count < (jlong)max_juint) {
julong udiff = 1;
if (stride_con > 0 && limit_con > init_con) {
udiff = ulimit_con - uinit_con;
} else if (stride_con < 0 && limit_con < init_con) {
udiff = uinit_con - ulimit_con;
}
// The loop runs for one more iteration if the limit is (stride > 0 in this example):
// init + k * stride + small_value, 0 < small_value < stride
julong utrip_count = udiff / ABS(stride_con);
if (utrip_count * ABS(stride_con) != udiff) {
// Guaranteed to not overflow because it can only happen for ABS(stride) > 1 in which case, utrip_count can't be
// max_juint/max_julong
utrip_count++;
}
#ifdef ASSERT
if (loop_bt == T_INT) {
// Use longs to avoid integer overflow.
jlong init_con = (stride_con > 0) ? init_type->is_int()->_lo : init_type->is_int()->_hi;
jlong limit_con = (stride_con > 0) ? limit_type->is_int()->_hi : limit_type->is_int()->_lo;
int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
jlong trip_count = (limit_con - init_con + stride_m) / stride_con;
// The loop body is always executed at least once even if init >= limit (for stride_con > 0) or
// init <= limit (for stride_con < 0).
trip_count = MAX2(trip_count, (jlong)1);
assert(checked_cast<juint>(trip_count) == checked_cast<juint>(utrip_count), "incorrect trip count computation");
}
#endif
if (utrip_count < max_unsigned_integer(loop_bt)) {
if (init_n->is_Con() && limit_n->is_Con()) {
// Set exact trip count.
cl->set_exact_trip_count((uint)trip_count);
} else if (cl->unrolled_count() == 1) {
cl->set_exact_trip_count(utrip_count);
} else if (loop_bt == T_LONG || cl->as_CountedLoop()->unrolled_count() == 1) {
// Set maximum trip count before unrolling.
cl->set_trip_count((uint)trip_count);
cl->set_trip_count(utrip_count);
}
}
}
@ -1851,7 +1889,7 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
#ifndef PRODUCT
if (TraceLoopOpts) {
if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
tty->print("Unroll %d(" JULONG_FORMAT_W(2) ") ", loop_head->unrolled_count()*2, loop_head->trip_count());
} else {
tty->print("Unroll %d ", loop_head->unrolled_count()*2);
}
@ -2104,7 +2142,7 @@ void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new
assert(cl->trip_count() > 0, "");
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("MaxUnroll %d ", cl->trip_count());
tty->print("MaxUnroll " JULONG_FORMAT " ", cl->trip_count());
loop->dump_head();
}
#endif
@ -3359,7 +3397,7 @@ bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_n
return false;
}
// Compute loop trip count if possible.
compute_trip_count(phase);
compute_trip_count(phase, T_INT);
// Convert one-iteration loop into normal code.
if (do_one_iteration_loop(phase)) {

View File

@ -601,7 +601,6 @@ void PhaseIdealLoop::add_parse_predicate(Deoptimization::DeoptReason reason, Nod
int trap_request = Deoptimization::make_trap_request(reason, Deoptimization::Action_maybe_recompile);
address call_addr = OptoRuntime::uncommon_trap_blob()->entry_point();
const TypePtr* no_memory_effects = nullptr;
JVMState* jvms = sfpt->jvms();
CallNode* unc = new CallStaticJavaNode(OptoRuntime::uncommon_trap_Type(), call_addr, "uncommon_trap",
no_memory_effects);
@ -856,8 +855,9 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
return false;
}
assert(iters_limit > 0, "can't be negative");
PhiNode* phi = head->phi()->as_Phi();
Node* incr = head->incr();
Node* back_control = head->in(LoopNode::LoopBackControl);
@ -888,7 +888,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
// Take what we know about the number of iterations of the long counted loop into account when computing the limit of
// the inner loop.
const Node* init = head->init_trip();
Node* init = head->init_trip();
const TypeInteger* lo = _igvn.type(init)->is_integer(bt);
const TypeInteger* hi = _igvn.type(limit)->is_integer(bt);
if (stride_con < 0) {
@ -907,7 +907,7 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
// going to execute as many range checks once transformed with range checks eliminated (1 peeled iteration with
// range checks + 2 predicates per range checks) as it would have not transformed. It also has to pay for the extra
// logic on loop entry and for the outer loop.
loop->compute_trip_count(this);
loop->compute_trip_count(this, bt);
if (head->is_CountedLoop() && head->as_CountedLoop()->has_exact_trip_count()) {
if (head->as_CountedLoop()->trip_count() <= 3) {
return false;
@ -920,6 +920,11 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
}
}
if (try_make_short_running_loop(loop, stride_con, range_checks, iters_limit)) {
C->set_major_progress();
return true;
}
julong orig_iters = (julong)hi->hi_as_long() - lo->lo_as_long();
iters_limit = checked_cast<int>(MIN2((julong)iters_limit, orig_iters));
@ -1118,6 +1123,9 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
if (safepoint != nullptr) {
SafePointNode* cloned_sfpt = old_new[safepoint->_idx]->as_SafePoint();
if (ShortRunningLongLoop) {
add_parse_predicate(Deoptimization::Reason_short_running_long_loop, inner_head, outer_ilt, cloned_sfpt);
}
if (UseLoopPredicate) {
add_parse_predicate(Deoptimization::Reason_predicate, inner_head, outer_ilt, cloned_sfpt);
if (UseProfiledLoopPredicate) {
@ -1147,6 +1155,215 @@ bool PhaseIdealLoop::create_loop_nest(IdealLoopTree* loop, Node_List &old_new) {
return true;
}
// Make a copy of Parse/Template Assertion predicates below existing predicates at the loop passed as argument
class CloneShortLoopPredicateVisitor : public PredicateVisitor {
ClonePredicateToTargetLoop _clone_predicate_to_loop;
PhaseIdealLoop* const _phase;
public:
CloneShortLoopPredicateVisitor(LoopNode* target_loop_head,
const NodeInSingleLoopBody &node_in_loop_body,
PhaseIdealLoop* phase)
: _clone_predicate_to_loop(target_loop_head, node_in_loop_body, phase),
_phase(phase) {
}
NONCOPYABLE(CloneShortLoopPredicateVisitor);
using PredicateVisitor::visit;
void visit(const ParsePredicate& parse_predicate) override {
_clone_predicate_to_loop.clone_parse_predicate(parse_predicate, true);
parse_predicate.kill(_phase->igvn());
}
void visit(const TemplateAssertionPredicate& template_assertion_predicate) override {
_clone_predicate_to_loop.clone_template_assertion_predicate(template_assertion_predicate);
template_assertion_predicate.kill(_phase->igvn());
}
};
// If the loop is either statically known to run for a small enough number of iterations or if profile data indicates
// that, we don't want an outer loop because the overhead of having an outer loop whose backedge is never taken, has a
// measurable cost. Furthermore, creating the loop nest usually causes one iteration of the loop to be peeled so
// predicates can be set up. If the loop is short running, then it's an extra iteration that's run with range checks
// (compared to an int counted loop with int range checks).
//
// In the short running case, turn the loop into a regular loop again and transform the long range checks:
// - LongCountedLoop: Create LoopNode but keep the loop limit type with a CastLL node to avoid that we later try to
// create a Loop Limit Check when turning the LoopNode into a CountedLoopNode.
// - CountedLoop: Can be reused.
bool PhaseIdealLoop::try_make_short_running_loop(IdealLoopTree* loop, jint stride_con, const Node_List &range_checks,
const uint iters_limit) {
if (!ShortRunningLongLoop) {
return false;
}
BaseCountedLoopNode* head = loop->_head->as_BaseCountedLoop();
BasicType bt = head->bt();
Node* entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl);
loop->compute_trip_count(this, bt);
// Loop must run for no more than iter_limits as it guarantees no overflow of scale * iv in long range checks (see
// comment above PhaseIdealLoop::transform_long_range_checks()).
// iters_limit / ABS(stride_con) is the largest trip count for which we know it's correct to not create a loop nest:
// it's always beneficial to have a single loop rather than a loop nest, so we try to apply this transformation as
// often as possible.
bool known_short_running_loop = head->trip_count() <= iters_limit / ABS(stride_con);
bool profile_short_running_loop = false;
if (!known_short_running_loop) {
loop->compute_profile_trip_cnt(this);
if (StressShortRunningLongLoop) {
profile_short_running_loop = true;
} else {
profile_short_running_loop = !head->is_profile_trip_failed() && head->profile_trip_cnt() <= iters_limit / ABS(stride_con);
}
}
if (!known_short_running_loop && !profile_short_running_loop) {
return false;
}
Node* limit = head->limit();
Node* init = head->init_trip();
Node* new_limit;
if (stride_con > 0) {
new_limit = SubNode::make(limit, init, bt);
} else {
new_limit = SubNode::make(init, limit, bt);
}
register_new_node(new_limit, entry_control);
PhiNode* phi = head->phi()->as_Phi();
if (profile_short_running_loop) {
// Add a Short Running Long Loop Predicate. It's the first predicate in the predicate chain before entering a loop
// because a cast that's control dependent on the Short Running Long Loop Predicate is added to narrow the limit and
// future predicates may be dependent on the new limit (so have to be between the loop and Short Running Long Loop
// Predicate). The current limit could, itself, be dependent on an existing predicate. Clone parse and template
// assertion predicates below existing predicates to get proper ordering of predicates when walking from the loop
// up: future predicates, Short Running Long Loop Predicate, existing predicates.
//
// Existing Hoisted
// Check Predicates
// |
// New Short Running Long
// Loop Predicate
// |
// Cloned Parse Predicates and
// Template Assertion Predicates
// (future predicates added here)
// |
// Loop
const Predicates predicates_before_cloning(entry_control);
const PredicateBlock* short_running_long_loop_predicate_block = predicates_before_cloning.short_running_long_loop_predicate_block();
if (!short_running_long_loop_predicate_block->has_parse_predicate()) { // already trapped
return false;
}
PredicateIterator predicate_iterator(entry_control);
NodeInSingleLoopBody node_in_short_loop_body(this, loop);
CloneShortLoopPredicateVisitor clone_short_loop_predicates_visitor(head, node_in_short_loop_body, this);
predicate_iterator.for_each(clone_short_loop_predicates_visitor);
entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl);
const Predicates predicates_after_cloning(entry_control);
ParsePredicateSuccessProj* short_running_loop_predicate_proj = predicates_after_cloning.
short_running_long_loop_predicate_block()->
parse_predicate_success_proj();
assert(short_running_loop_predicate_proj->in(0)->is_ParsePredicate(), "must be parse predicate");
const jlong iters_limit_long = iters_limit;
Node* cmp_limit = CmpNode::make(new_limit, _igvn.integercon(iters_limit_long, bt), bt);
Node* bol = new BoolNode(cmp_limit, BoolTest::le);
Node* new_predicate_proj = create_new_if_for_predicate(short_running_loop_predicate_proj,
nullptr,
Deoptimization::Reason_short_running_long_loop,
Op_If);
Node* iff = new_predicate_proj->in(0);
_igvn.replace_input_of(iff, 1, bol);
register_new_node(cmp_limit, iff->in(0));
register_new_node(bol, iff->in(0));
new_limit = ConstraintCastNode::make_cast_for_basic_type(new_predicate_proj, new_limit,
TypeInteger::make(1, iters_limit_long, Type::WidenMin, bt),
ConstraintCastNode::UnconditionalDependency, bt);
register_new_node(new_limit, new_predicate_proj);
#ifndef PRODUCT
if (TraceLoopLimitCheck) {
tty->print_cr("Short Long Loop Check Predicate generated:");
DEBUG_ONLY(bol->dump(2);)
}
#endif
entry_control = head->skip_strip_mined()->in(LoopNode::EntryControl);
} else if (bt == T_LONG) {
// We're turning a long counted loop into a regular loop that will be converted into an int counted loop. That loop
// won't need loop limit check predicates (iters_limit guarantees that). Add a cast to make sure that, whatever
// transformation happens by the time the counted loop is created (in a subsequent pass of loop opts), C2 knows
// enough about the loop's limit that it doesn't try to add loop limit check predicates.
const Predicates predicates(entry_control);
const TypeLong* new_limit_t = new_limit->Value(&_igvn)->is_long();
new_limit = ConstraintCastNode::make_cast_for_basic_type(predicates.entry(), new_limit,
TypeLong::make(0, new_limit_t->_hi, new_limit_t->_widen),
ConstraintCastNode::UnconditionalDependency, bt);
register_new_node(new_limit, predicates.entry());
} else {
assert(bt == T_INT && known_short_running_loop, "only CountedLoop statically known to be short running");
}
IfNode* exit_test = head->loopexit();
if (bt == T_LONG) {
// The loop is short running so new_limit fits into an int: either we determined that statically or added a guard
new_limit = new ConvL2INode(new_limit);
register_new_node(new_limit, entry_control);
}
Node* int_zero = intcon(0);
if (stride_con < 0) {
new_limit = new SubINode(int_zero, new_limit);
register_new_node(new_limit, entry_control);
}
// Clone the iv data nodes as an integer iv
Node* int_stride = intcon(stride_con);
Node* inner_phi = new PhiNode(head, TypeInt::INT);
Node* inner_incr = new AddINode(inner_phi, int_stride);
Node* inner_cmp = new CmpINode(inner_incr, new_limit);
Node* inner_bol = new BoolNode(inner_cmp, exit_test->in(1)->as_Bool()->_test._test);
inner_phi->set_req(LoopNode::EntryControl, int_zero);
inner_phi->set_req(LoopNode::LoopBackControl, inner_incr);
register_new_node(inner_phi, head);
register_new_node(inner_incr, head);
register_new_node(inner_cmp, head);
register_new_node(inner_bol, head);
_igvn.replace_input_of(exit_test, 1, inner_bol);
// Replace inner loop long iv phi as inner loop int iv phi + outer
// loop iv phi
Node* iv_add = loop_nest_replace_iv(phi, inner_phi, init, head, bt);
LoopNode* inner_head = head;
if (bt == T_LONG) {
// Turn the loop back to a counted loop
inner_head = create_inner_head(loop, head, exit_test);
} else {
// Use existing counted loop
revert_to_normal_loop(head);
}
if (bt == T_INT) {
init = new ConvI2LNode(init);
register_new_node(init, entry_control);
}
transform_long_range_checks(stride_con, range_checks, init, new_limit,
inner_phi, iv_add, inner_head);
inner_head->mark_loop_nest_inner_loop();
return true;
}
int PhaseIdealLoop::extract_long_range_checks(const IdealLoopTree* loop, jint stride_con, int iters_limit, PhiNode* phi,
Node_List& range_checks) {
const jlong min_iters = 2;
@ -1318,7 +1535,6 @@ void PhaseIdealLoop::transform_long_range_checks(int stride_con, const Node_List
for (uint i = 0; i < range_checks.size(); i++) {
ProjNode* proj = range_checks.at(i)->as_Proj();
ProjNode* unc_proj = proj->other_if_proj();
RangeCheckNode* rc = proj->in(0)->as_RangeCheck();
jlong scale = 0;
Node* offset = nullptr;
@ -4415,6 +4631,9 @@ void IdealLoopTree::dump_head() {
if (predicates.loop_limit_check_predicate_block()->is_non_empty()) {
tty->print(" limit_check");
}
if (predicates.short_running_long_loop_predicate_block()->is_non_empty()) {
tty->print(" short_running");
}
if (UseLoopPredicate) {
if (UseProfiledLoopPredicate && predicates.profiled_loop_predicate_block()->is_non_empty()) {
tty->print(" profile_predicated");
@ -4922,7 +5141,7 @@ void PhaseIdealLoop::build_and_optimize() {
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
IdealLoopTree* lpt = iter.current();
if (lpt->is_innermost() && lpt->_allow_optimizations && !lpt->_has_call && lpt->is_counted()) {
lpt->compute_trip_count(this);
lpt->compute_trip_count(this, T_INT);
if (!lpt->do_one_iteration_loop(this) &&
!lpt->do_remove_empty_loop(this)) {
AutoNodeBudget node_budget(this);

View File

@ -218,6 +218,18 @@ public:
jlong stride_con() const;
static BaseCountedLoopNode* make(Node* entry, Node* backedge, BasicType bt);
virtual void set_trip_count(julong tc) = 0;
virtual julong trip_count() const = 0;
bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
void set_exact_trip_count(julong tc) {
set_trip_count(tc);
_loop_flags |= HasExactTripCount;
}
void set_nonexact_trip_count() {
_loop_flags &= ~HasExactTripCount;
}
};
@ -298,26 +310,17 @@ public:
int main_idx() const { return _main_idx; }
void set_trip_count(julong tc) {
assert(tc < max_juint, "Cannot set trip count to max_juint");
_trip_count = checked_cast<uint>(tc);
}
julong trip_count() const { return _trip_count; }
void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
// We use max_juint for the default value of _trip_count to signal it wasn't set.
// We shouldn't set _trip_count to max_juint explicitly.
void set_trip_count(uint tc) { assert(tc < max_juint, "Cannot set trip count to max_juint"); _trip_count = tc; }
uint trip_count() { return _trip_count; }
bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
void set_exact_trip_count(uint tc) {
assert(tc < max_juint, "Cannot set trip count to max_juint");
_trip_count = tc;
_loop_flags |= HasExactTripCount;
}
void set_nonexact_trip_count() {
_loop_flags &= ~HasExactTripCount;
}
void set_notpassed_slp() {
_loop_flags &= ~PassedSlpAnalysis;
}
@ -380,9 +383,15 @@ public:
};
class LongCountedLoopNode : public BaseCountedLoopNode {
private:
virtual uint size_of() const { return sizeof(*this); }
// Known trip count calculated by compute_exact_trip_count()
julong _trip_count;
public:
LongCountedLoopNode(Node *entry, Node *backedge)
: BaseCountedLoopNode(entry, backedge) {
: BaseCountedLoopNode(entry, backedge), _trip_count(max_julong) {
init_class_id(Class_LongCountedLoop);
}
@ -392,6 +401,12 @@ public:
return T_LONG;
}
void set_trip_count(julong tc) {
assert(tc < max_julong, "Cannot set trip count to max_julong");
_trip_count = tc;
}
julong trip_count() const { return _trip_count; }
LongCountedLoopEndNode* loopexit_or_null() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit_or_null(); }
LongCountedLoopEndNode* loopexit() const { return (LongCountedLoopEndNode*) BaseCountedLoopNode::loopexit(); }
};
@ -778,7 +793,7 @@ public:
uint est_loop_unroll_sz(uint factor) const;
// Compute loop trip count if possible
void compute_trip_count(PhaseIdealLoop* phase);
void compute_trip_count(PhaseIdealLoop* phase, BasicType bt);
// Compute loop trip count from profile data
float compute_profile_trip_cnt_helper(Node* n);
@ -1829,6 +1844,8 @@ public:
Node* ensure_node_and_inputs_are_above_pre_end(CountedLoopEndNode* pre_end, Node* node);
bool try_make_short_running_loop(IdealLoopTree* loop, jint stride_con, const Node_List& range_checks, const uint iters_limit);
ConINode* intcon(jint i);
ConLNode* longcon(jlong i);

View File

@ -82,12 +82,11 @@ ParsePredicateNode* ParsePredicate::init_parse_predicate(const Node* parse_predi
return nullptr;
}
ParsePredicate ParsePredicate::clone_to_unswitched_loop(Node* new_control, const bool is_false_path_loop,
PhaseIdealLoop* phase) const {
ParsePredicate ParsePredicate::clone_to_loop(Node* new_control, const bool rewire_uncommon_proj_phi_inputs,
PhaseIdealLoop* phase) const {
ParsePredicateSuccessProj* success_proj = phase->create_new_if_for_predicate(_success_proj, new_control,
_parse_predicate_node->deopt_reason(),
Op_ParsePredicate, is_false_path_loop);
NOT_PRODUCT(trace_cloned_parse_predicate(is_false_path_loop, success_proj));
Op_ParsePredicate, rewire_uncommon_proj_phi_inputs);
return ParsePredicate(success_proj, _parse_predicate_node->deopt_reason());
}
@ -97,11 +96,10 @@ void ParsePredicate::kill(PhaseIterGVN& igvn) const {
}
#ifndef PRODUCT
void ParsePredicate::trace_cloned_parse_predicate(const bool is_false_path_loop,
const ParsePredicateSuccessProj* success_proj) {
if (TraceLoopPredicate) {
void ParsePredicate::trace_cloned_parse_predicate(const bool is_false_path_loop) const {
if (TraceLoopUnswitching) {
tty->print("Parse Predicate cloned to %s path loop: ", is_false_path_loop ? "false" : "true");
success_proj->in(0)->dump();
head()->dump();
}
}
#endif // NOT PRODUCT
@ -126,6 +124,7 @@ bool RuntimePredicate::has_valid_uncommon_trap(const Node* success_proj) {
assert(RegularPredicate::may_be_predicate_if(success_proj), "must have been checked before");
const Deoptimization::DeoptReason deopt_reason = uncommon_trap_reason(success_proj->as_IfProj());
return (deopt_reason == Deoptimization::Reason_loop_limit_check ||
deopt_reason == Deoptimization::Reason_short_running_long_loop ||
deopt_reason == Deoptimization::Reason_auto_vectorization_check ||
deopt_reason == Deoptimization::Reason_predicate ||
deopt_reason == Deoptimization::Reason_profile_predicate);
@ -941,6 +940,8 @@ void Predicates::dump() const {
_profiled_loop_predicate_block.dump(" ");
tty->print_cr("- Loop Predicate Block:");
_loop_predicate_block.dump(" ");
tty->print_cr("- Short Running Long Loop Predicate Block:");
_short_running_long_loop_predicate_block.dump(" ");
tty->cr();
} else {
tty->print_cr("<no predicates>");
@ -999,6 +1000,10 @@ InitializedAssertionPredicate CreateAssertionPredicatesVisitor::initialize_from_
return initialized_assertion_predicate;
}
bool NodeInSingleLoopBody::check_node_in_loop_body(Node* node) const {
return _phase->is_member(_ilt, _phase->get_ctrl(node));
}
// Clone the provided Template Assertion Predicate and set '_init' as new input for the OpaqueLoopInitNode.
TemplateAssertionPredicate CreateAssertionPredicatesVisitor::clone_template_and_replace_init_input(
const TemplateAssertionPredicate& template_assertion_predicate) const {
@ -1108,11 +1113,18 @@ void CloneUnswitchedLoopPredicatesVisitor::visit(const ParsePredicate& parse_pre
if (_is_counted_loop && deopt_reason == Deoptimization::Reason_loop_limit_check) {
return;
}
_clone_predicate_to_true_path_loop.clone_parse_predicate(parse_predicate, false);
_clone_predicate_to_false_path_loop.clone_parse_predicate(parse_predicate, true);
clone_parse_predicate(parse_predicate, false);
clone_parse_predicate(parse_predicate, true);
parse_predicate.kill(_phase->igvn());
}
void CloneUnswitchedLoopPredicatesVisitor::clone_parse_predicate(const ParsePredicate& parse_predicate,
const bool is_false_path_loop) {
ClonePredicateToTargetLoop& clone_predicate_to_loop = is_false_path_loop ? _clone_predicate_to_false_path_loop : _clone_predicate_to_true_path_loop;
const ParsePredicate cloned_parse_predicate = clone_predicate_to_loop.clone_parse_predicate(parse_predicate, is_false_path_loop);
NOT_PRODUCT(cloned_parse_predicate.trace_cloned_parse_predicate(is_false_path_loop);)
}
// Clone the Template Assertion Predicate, which is currently found before the newly added unswitched loop selector,
// to the true path and false path loop.
void CloneUnswitchedLoopPredicatesVisitor::visit(const TemplateAssertionPredicate& template_assertion_predicate) {

View File

@ -73,6 +73,14 @@ class TemplateAssertionPredicate;
* counted loop to avoid these overflow problems.
* The predicate does not replace an actual check inside the loop. This predicate can only
* be added once above the Loop Limit Check Parse Predicate for a loop.
* - Short: This predicate is created when a long counted loop is transformed into an int counted
* Running Long loop. In general, that transformation requires an outer loop to guarantee that the new
* Loop loop nest iterates over the entire range of the loop before transformation. However, if the
* Predicate loop is speculated to run for a small enough number of iterations, the outer loop is not
* needed. This predicate is added to catch mis-speculation in this case. It also applies to
* int counted loops with long range checks for which a loop nest also needs to be created
* in the general case (so the transformation of long range checks to int range checks is
* legal).
* - Assertion Predicate: An always true predicate which will never fail (its range is already covered by an earlier
* Hoisted Check Predicate or the main-loop entry guard) but is required in order to fold away a
* dead sub loop in which some data could be proven to be dead (by the type system) and replaced
@ -288,8 +296,6 @@ class ParsePredicate : public Predicate {
}
static ParsePredicateNode* init_parse_predicate(const Node* parse_predicate_proj, Deoptimization::DeoptReason deopt_reason);
NOT_PRODUCT(static void trace_cloned_parse_predicate(bool is_false_path_loop,
const ParsePredicateSuccessProj* success_proj);)
public:
ParsePredicate(Node* parse_predicate_proj, Deoptimization::DeoptReason deopt_reason)
@ -320,8 +326,8 @@ class ParsePredicate : public Predicate {
return _success_proj;
}
ParsePredicate clone_to_unswitched_loop(Node* new_control, bool is_false_path_loop,
PhaseIdealLoop* phase) const;
ParsePredicate clone_to_loop(Node* new_control, bool rewire_uncommon_proj_phi_inputs, PhaseIdealLoop* phase) const;
NOT_PRODUCT(void trace_cloned_parse_predicate(bool is_false_path_loop) const;)
void kill(PhaseIterGVN& igvn) const;
};
@ -786,7 +792,8 @@ class PredicateIterator : public StackObj {
PredicateBlockIterator loop_predicate_iterator(current_node, Deoptimization::Reason_predicate);
current_node = loop_predicate_iterator.for_each(predicate_visitor);
}
return current_node;
PredicateBlockIterator short_running_loop_predicate_iterator(current_node, Deoptimization::Reason_short_running_long_loop);
return short_running_loop_predicate_iterator.for_each(predicate_visitor);
}
};
@ -953,6 +960,7 @@ class Predicates : public StackObj {
const PredicateBlock _auto_vectorization_check_block;
const PredicateBlock _profiled_loop_predicate_block;
const PredicateBlock _loop_predicate_block;
const PredicateBlock _short_running_long_loop_predicate_block;
Node* const _entry;
public:
@ -965,7 +973,9 @@ class Predicates : public StackObj {
Deoptimization::Reason_profile_predicate),
_loop_predicate_block(_profiled_loop_predicate_block.entry(),
Deoptimization::Reason_predicate),
_entry(_loop_predicate_block.entry()) {}
_short_running_long_loop_predicate_block(_loop_predicate_block.entry(),
Deoptimization::Reason_short_running_long_loop),
_entry(_short_running_long_loop_predicate_block.entry()) {}
NONCOPYABLE(Predicates);
// Returns the control input the first predicate if there are any predicates. If there are no predicates, the same
@ -990,6 +1000,10 @@ class Predicates : public StackObj {
return &_loop_limit_check_predicate_block;
}
const PredicateBlock* short_running_long_loop_predicate_block() const {
return &_short_running_long_loop_predicate_block;
}
bool has_any() const {
return _entry != _tail;
}
@ -1082,6 +1096,19 @@ class NodeInClonedLoopBody : public NodeInLoopBody {
}
};
// This class checks whether a node is in the loop body passed to the constructor.
class NodeInSingleLoopBody : public NodeInLoopBody {
PhaseIdealLoop* const _phase;
IdealLoopTree* const _ilt;
public:
NodeInSingleLoopBody(PhaseIdealLoop* phase, IdealLoopTree* ilt) : _phase(phase), _ilt(ilt) {
}
NONCOPYABLE(NodeInSingleLoopBody);
bool check_node_in_loop_body(Node* node) const override;
};
// Visitor to create Initialized Assertion Predicates at a target loop from Template Assertion Predicates from a source
// loop. This visitor can be used in combination with a PredicateIterator.
class CreateAssertionPredicatesVisitor : public PredicateVisitor {
@ -1158,10 +1185,11 @@ public:
ClonePredicateToTargetLoop(LoopNode* target_loop_head, const NodeInLoopBody& node_in_loop_body, PhaseIdealLoop* phase);
// Clones the provided Parse Predicate to the head of the current predicate chain at the target loop.
void clone_parse_predicate(const ParsePredicate& parse_predicate, bool is_false_path_loop) {
ParsePredicate cloned_parse_predicate = parse_predicate.clone_to_unswitched_loop(_old_target_loop_entry,
is_false_path_loop, _phase);
ParsePredicate clone_parse_predicate(const ParsePredicate& parse_predicate, bool rewire_uncommon_proj_phi_inputs) {
ParsePredicate cloned_parse_predicate = parse_predicate.clone_to_loop(_old_target_loop_entry,
rewire_uncommon_proj_phi_inputs, _phase);
_target_loop_predicate_chain.insert_predicate(cloned_parse_predicate);
return cloned_parse_predicate;
}
void clone_template_assertion_predicate(const TemplateAssertionPredicate& template_assertion_predicate);
@ -1189,6 +1217,9 @@ class CloneUnswitchedLoopPredicatesVisitor : public PredicateVisitor {
using PredicateVisitor::visit;
void visit(const ParsePredicate& parse_predicate) override;
void clone_parse_predicate(const ParsePredicate &parse_predicate,
bool is_false_path_loop);
void visit(const TemplateAssertionPredicate& template_assertion_predicate) override;
};

View File

@ -2767,8 +2767,8 @@ const char* Deoptimization::_trap_reason_name[] = {
"unstable_if",
"unstable_fused_if",
"receiver_constraint",
"short_running_loop" JVMCI_ONLY("_or_aliasing"),
#if INCLUDE_JVMCI
"aliasing",
"transfer_to_interpreter",
"not_compiled_exception_handler",
"unresolved",

View File

@ -117,8 +117,9 @@ class Deoptimization : AllStatic {
Reason_unstable_if, // a branch predicted always false was taken
Reason_unstable_fused_if, // fused two ifs that had each one untaken branch. One is now taken.
Reason_receiver_constraint, // receiver subtype check failed
Reason_short_running_long_loop, // profile reports loop runs for small number of iterations
#if INCLUDE_JVMCI
Reason_aliasing, // optimistic assumption about aliasing failed
Reason_aliasing = Reason_short_running_long_loop, // optimistic assumption about aliasing failed
Reason_transfer_to_interpreter, // explicit transferToInterpreter()
Reason_not_compiled_exception_handler,
Reason_unresolved,

View File

@ -1566,6 +1566,7 @@
declare_constant(Deoptimization::Reason_age) \
declare_constant(Deoptimization::Reason_predicate) \
declare_constant(Deoptimization::Reason_loop_limit_check) \
declare_constant(Deoptimization::Reason_short_running_long_loop) \
declare_constant(Deoptimization::Reason_auto_vectorization_check) \
declare_constant(Deoptimization::Reason_speculate_class_check) \
declare_constant(Deoptimization::Reason_speculate_null_check) \
@ -1573,7 +1574,6 @@
declare_constant(Deoptimization::Reason_unstable_if) \
declare_constant(Deoptimization::Reason_unstable_fused_if) \
declare_constant(Deoptimization::Reason_receiver_constraint) \
NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_aliasing))) \
NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_transfer_to_interpreter))) \
NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_not_compiled_exception_handler))) \
NOT_ZERO(JVMCI_ONLY(declare_constant(Deoptimization::Reason_unresolved))) \

View File

@ -148,6 +148,9 @@ class oopDesc;
#ifndef JULONG_FORMAT_X
#define JULONG_FORMAT_X UINT64_FORMAT_X
#endif
#ifndef JULONG_FORMAT_W
#define JULONG_FORMAT_W(width) UINT64_FORMAT_W(width)
#endif
// Format pointers and padded integral values which change size between 32- and 64-bit.
#ifdef _LP64
@ -771,6 +774,14 @@ inline jlong min_signed_integer(BasicType bt) {
return min_jlong;
}
inline julong max_unsigned_integer(BasicType bt) {
if (bt == T_INT) {
return max_juint;
}
assert(bt == T_LONG, "unsupported");
return max_julong;
}
inline uint bits_per_java_integer(BasicType bt) {
if (bt == T_INT) {
return BitsPerJavaInteger;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, 2022, Red Hat, Inc. All rights reserved.
* Copyright (c) 2021, 2022, 2025 Red Hat, Inc. All rights reserved.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -38,6 +38,9 @@ import java.util.Objects;
public class TestLongRangeChecks {
public static void main(String[] args) {
TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0");
TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0");
TestFramework.runWithFlags("-XX:-ShortRunningLongLoop", "-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0");
TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:-UseCountedLoopSafepoints", "-XX:LoopUnrollLimit=0");
TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1", "-XX:LoopUnrollLimit=0");
TestFramework.runWithFlags("-XX:+TieredCompilation", "-XX:+UseCountedLoopSafepoints", "-XX:LoopStripMiningIter=1000", "-XX:LoopUnrollLimit=0");
@ -45,7 +48,8 @@ public class TestLongRangeChecks {
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStridePosScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
@ -66,7 +70,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStridePosScalePosInIntLoop1(int start, int stop, long length, long offset) {
final long scale = 2;
@ -84,7 +89,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStridePosScalePosInIntLoop2(int start, int stop, long length, long offset) {
final int scale = 2;
@ -102,7 +108,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStrideNegScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
@ -118,7 +125,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStrideNegScaleNegInIntLoop1(int start, int stop, long length, long offset) {
final long scale = -2;
@ -135,7 +143,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStrideNegScaleNegInIntLoop2(int start, int stop, long length, long offset) {
final int scale = -2;
@ -152,7 +161,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStrideNegScalePos(long start, long stop, long length, long offset) {
final long scale = 1;
@ -168,7 +178,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStrideNegScalePosInIntLoop1(int start, int stop, long length, long offset) {
final long scale = 2;
@ -184,7 +195,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStrideNegScalePosInIntLoop2(int start, int stop, long length, long offset) {
final int scale = 2;
@ -200,7 +212,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStridePosScaleNeg(long start, long stop, long length, long offset) {
final long scale = -1;
@ -216,7 +229,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStridePosScaleNegInIntLoop1(int start, int stop, long length, long offset) {
final long scale = -2;
@ -232,7 +246,8 @@ public class TestLongRangeChecks {
}
@Test
@IR(counts = { IRNode.LOOP, "1"})
@IR(applyIf = { "ShortRunningLongLoop", "false" }, counts = { IRNode.LOOP, "1" })
@IR(applyIf = { "ShortRunningLongLoop", "true" }, failOn = IRNode.LOOP)
@IR(failOn = { IRNode.COUNTED_LOOP})
public static void testStridePosScaleNegInIntLoop2(int start, int stop, long length, long offset) {
final int scale = -2;

View File

@ -1663,6 +1663,11 @@ public class IRNode {
trapNodes(RANGE_CHECK_TRAP, "range_check");
}
public static final String SHORT_RUNNING_LOOP_TRAP = PREFIX + "SHORT_RUNNING_LOOP_TRAP" + POSTFIX;
static {
trapNodes(SHORT_RUNNING_LOOP_TRAP, "short_running_loop");
}
public static final String REINTERPRET_S2HF = PREFIX + "REINTERPRET_S2HF" + POSTFIX;
static {
beforeMatchingNameRegex(REINTERPRET_S2HF, "ReinterpretS2HF");

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8342692
* @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
* @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation TestShortLoopLostLimit
* @run main/othervm TestShortLoopLostLimit
*/
public class TestShortLoopLostLimit {
private static volatile int volatileField;
public static void main(String[] args) {
for (int i = 0; i < 20_000; i++) {
test1(0, 100);
test2(0, 100);
}
}
private static void test1(int a, long b) {
for (long i = 0; i < a + b; i += 2) {
volatileField = 42;
}
}
private static void test2(int a, long b) {
for (long i = a + b; i > 0; i -= 2) {
volatileField = 42;
}
}
}

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8342692
* @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
* @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopUnrollLimit=100
* TestShortRunningIntLoopWithLongChecksPredicates
* @run main/othervm TestShortRunningIntLoopWithLongChecksPredicates
*/
import java.util.Objects;
// int RC is first eliminated by predication which causes Assertion
// Predicates to be added. Then the loop is transformed to make it
// possible to optimize long RC. Finally unrolling happen which
// require the Assertion Predicates to have been properly copied when
// the loop was transformed for the long range check.
public class TestShortRunningIntLoopWithLongChecksPredicates {
private static volatile int volatileField;
public static void main(String[] args) {
int[] array = new int[100];
for (int i = 0; i < 20_000; i++) {
helper1(100, array, 100);
test1(1, 100);
}
}
private static void test1(int stop, long range) {
int[] array = new int[3];
helper1(stop, array, range);
}
private static void helper1(int stop, int[] array, long range) {
for (int i = 0; i < stop; i++) {
if (i % 2 == 0) {
array[i] += i;
} else {
volatileField = 42;
}
Objects.checkIndex(i, range);
}
}
}

View File

@ -0,0 +1,579 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.longcountedloops;
import compiler.lib.ir_framework.*;
import compiler.whitebox.CompilerWhiteBoxTest;
import jdk.test.whitebox.WhiteBox;
import java.util.Objects;
/*
* @test
* @bug 8342692
* @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
* @library /test/lib /
* @build jdk.test.whitebox.WhiteBox
* @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.longcountedloops.TestShortRunningLongCountedLoop
*/
public class TestShortRunningLongCountedLoop {
private static volatile int volatileField;
private final static WhiteBox wb = WhiteBox.getWhiteBox();
public static void main(String[] args) {
// IR rules expect a single loop so disable unrolling
// IR rules expect strip mined loop to be enabled
// testIntLoopUnknownBoundsShortUnswitchedLoop and testLongLoopUnknownBoundsShortUnswitchedLoop need -XX:-UseProfiledLoopPredicate
TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:LoopStripMiningIter=1000", "-XX:+UseCountedLoopSafepoints", "-XX:-UseProfiledLoopPredicate");
}
// Check IR only has a counted loop when bounds are known and loop run for a short time
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopConstantBoundsShortLoop1() {
int j = 0;
for (long i = 0; i < 100; i++) {
volatileField = 42;
j++;
}
return j;
}
@Check(test = "testLongLoopConstantBoundsShortLoop1")
public static void checkTestLongLoopConstantBoundsShortLoop1(int res) {
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Same with stride > 1
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopConstantBoundsShortLoop2() {
int j = 0;
for (long i = 0; i < 2000; i += 20) {
volatileField = 42;
j++;
}
return j;
}
@Check(test = "testLongLoopConstantBoundsShortLoop2")
public static void checkTestLongLoopConstantBoundsShortLoop2(int res) {
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Same with loop going downward
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopConstantBoundsShortLoop3() {
int j = 0;
for (long i = 99; i >= 0; i--) {
volatileField = 42;
j++;
}
return j;
}
@Check(test = "testLongLoopConstantBoundsShortLoop3")
public static void checkTestLongLoopConstantBoundsShortLoop3(int res) {
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Same with loop going downward and stride > 1
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopConstantBoundsShortLoop4() {
int j = 0;
for (long i = 1999; i >= 0; i-=20) {
volatileField = 42;
j++;
}
return j;
}
@Check(test = "testLongLoopConstantBoundsShortLoop4")
public static void checkTestLongLoopConstantBoundsShortLoop4(int res) {
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Check IR only has a counted loop when bounds are known but not exact and loop run for a short time
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopConstantBoundsShortLoop5(int start, int stop) {
start= Integer.max(start, 0);
stop= Integer.min(stop, 999);
int j = 0;
for (long i = start; i < stop; i++) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopConstantBoundsShortLoop5")
public static void testLongLoopConstantBoundsShortLoop5_runner() {
int res = testLongLoopConstantBoundsShortLoop5(0, 100);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Check that loop nest is created when bounds are known and loop is not short run
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1"})
@IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP, IRNode.OUTER_STRIP_MINED_LOOP })
public static int testLongLoopConstantBoundsLongLoop1() {
final long stride = Integer.MAX_VALUE / 1000;
int j = 0;
for (long i = 0; i < stride * 1001; i += stride) {
volatileField = 42;
j++;
}
return j;
}
@Check(test = "testLongLoopConstantBoundsLongLoop1")
public static void checkTestLongLoopConstantBoundsLongLoop1(int res) {
if (res != 1001) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Same with negative stride
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1"})
@IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP, IRNode.OUTER_STRIP_MINED_LOOP })
public static int testLongLoopConstantBoundsLongLoop2() {
final long stride = Integer.MAX_VALUE / 1000;
int j = 0;
for (long i = stride * 1000; i >= 0; i -= stride) {
volatileField = 42;
j++;
}
return j;
}
@Check(test = "testLongLoopConstantBoundsLongLoop2")
public static void checkTestLongLoopConstantBoundsLongLoop2(int res) {
if (res != 1001) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Check IR only has a counted loop when bounds are unknown but profile reports a short running loop
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static int testLongLoopUnknownBoundsShortLoop(long start, long stop) {
int j = 0;
for (long i = start; i < stop; i++) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsShortLoop")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsShortLoop_runner() {
int res = testLongLoopUnknownBoundsShortLoop(0, 100);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// same with stride > 1
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static int testLongLoopUnknownBoundsShortLoop2(long start, long stop) {
int j = 0;
for (long i = start; i < stop; i+=20) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsShortLoop2")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsShortLoop2_runner() {
int res = testLongLoopUnknownBoundsShortLoop2(0, 2000);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// same with negative stride
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static int testLongLoopUnknownBoundsShortLoop3(long start, long stop) {
int j = 0;
for (long i = start; i >= stop; i--) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsShortLoop3")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsShortLoop3_runner() {
int res = testLongLoopUnknownBoundsShortLoop3(99, 0);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// same with negative stride > 1
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static int testLongLoopUnknownBoundsShortLoop4(long start, long stop) {
int j = 0;
for (long i = start; i >= stop; i -= 20) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsShortLoop4")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsShortLoop4_runner() {
int res = testLongLoopUnknownBoundsShortLoop4(1999, 0);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Check that loop nest is created when bounds are not known but profile reports loop is not short run
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1", IRNode.LOOP, "1"})
@IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopUnknownBoundsLongLoop1(long start, long stop, long range) {
int j = 0;
for (long i = start; i < stop; i++) {
volatileField = 42;
Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsLongLoop1")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsLongLoop1_runner() {
int res = testLongLoopUnknownBoundsLongLoop1(0, 3000, Long.MAX_VALUE);
if (res != 3000) {
throw new RuntimeException("incorrect result: " + res);
}
}
// same with negative stride
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1", IRNode.LOOP, "1"})
@IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopUnknownBoundsLongLoop2(long start, long stop, long range) {
int j = 0;
for (long i = start; i >= stop; i--) {
volatileField = 42;
Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsLongLoop2")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsLongLoop2_runner() {
int res = testLongLoopUnknownBoundsLongLoop2(2999, 0, Long.MAX_VALUE);
if (res != 3000) {
throw new RuntimeException("incorrect result: " + res);
}
}
// Check IR has a loop nest when bounds are unknown, profile reports a short running loop but trap is taken
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopUnknownBoundsShortLoopFailedSpeculation(long start, long stop, long range) {
int j = 0;
for (long i = start; i < stop; i++) {
volatileField = 42;
Objects.checkIndex(i * (1024 * 1024), range); // max number of iteration of inner loop is roughly Integer.MAX_VALUE / 1024 / 1024
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsShortLoopFailedSpeculation")
@Warmup(1)
public static void testLongLoopUnknownBoundsShortLoopFailedSpeculation_runner(RunInfo info) {
if (info.isWarmUp()) {
for (int i = 0; i < 10_0000; i++) {
int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 100, Long.MAX_VALUE);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
wb.enqueueMethodForCompilation(info.getTest(), CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
if (!wb.isMethodCompiled(info.getTest())) {
throw new RuntimeException("Should be compiled now");
}
for (int i = 0; i < 10; i++) {
int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 10_000, Long.MAX_VALUE);
if (res != 10_000) {
throw new RuntimeException("incorrect result: " + res);
}
}
} else {
int res = testLongLoopUnknownBoundsShortLoopFailedSpeculation(0, 100, Long.MAX_VALUE);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
}
// Check IR has a loop nest when bounds are known, is short running loop but trap was taken
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopKnownBoundsShortLoopFailedSpeculation() {
return testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 100);
}
@ForceInline
private static int testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(long start, long stop) {
int j = 0;
for (long i = start; i < stop; i++) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopKnownBoundsShortLoopFailedSpeculation")
@Warmup(1)
public static void testLongLoopKnownBoundsShortLoopFailedSpeculation_runner(RunInfo info) {
if (info.isWarmUp()) {
for (int i = 0; i < 10_0000; i++) {
int res = testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 100);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
for (int i = 0; i < 10; i++) {
int res = testLongLoopKnownBoundsShortLoopFailedSpeculationHelper(0, 10_000);
if (res != 10_000) {
throw new RuntimeException("incorrect result: " + res);
}
}
for (int i = 0; i < 10_0000; i++) {
int res = testLongLoopKnownBoundsShortLoopFailedSpeculation();
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
} else {
int res = testLongLoopKnownBoundsShortLoopFailedSpeculation();
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
}
// Check range check can be eliminated by predication
@Test
@IR(counts = { IRNode.PREDICATE_TRAP, "1" })
@IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static void testLongLoopConstantBoundsPredication(long range) {
for (long i = 0; i < 100; i++) {
Objects.checkIndex(i, range);
}
}
@Run(test = "testLongLoopConstantBoundsPredication")
public static void testLongLoopConstantBoundsPredication_runner() {
testLongLoopConstantBoundsPredication(100);
}
@Test
@IR(counts = { IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1" })
@IR(failOn = { IRNode.COUNTED_LOOP, IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP })
public static void testLongLoopUnknownBoundsShortLoopPredication(long start, long stop, long range) {
for (long i = start; i < stop; i++) {
Objects.checkIndex(i, range);
}
}
@Run(test = "testLongLoopUnknownBoundsShortLoopPredication")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsShortLoopPredication_runner() {
testLongLoopUnknownBoundsShortLoopPredication(0, 100, 100);
}
// If scale too large, transformation can't happen
static final long veryLargeScale = Integer.MAX_VALUE / 99;
@Test
@IR(counts = { IRNode.LOOP, "1", IRNode.PREDICATE_TRAP, "2"})
@IR(failOn = { IRNode.COUNTED_LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static void testLongLoopConstantBoundsLargeScale(long range) {
for (long i = 0; i < 100; i++) {
Objects.checkIndex(veryLargeScale * i, range);
}
}
@Run(test = "testLongLoopConstantBoundsLargeScale")
public static void testLongLoopConstantBoundsLargeScale_runner() {
testLongLoopConstantBoundsLargeScale(veryLargeScale * 100);
}
@Test
@IR(counts = { IRNode.LOOP, "1", IRNode.PREDICATE_TRAP, "2"})
@IR(failOn = { IRNode.COUNTED_LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static void testLongLoopUnknownBoundsShortLoopLargeScale(long start, long stop, long range) {
for (long i = start; i < stop; i++) {
Objects.checkIndex(veryLargeScale * i, range);
}
}
@Run(test = "testLongLoopUnknownBoundsShortLoopLargeScale")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsShortLoopLargeScale_runner() {
testLongLoopUnknownBoundsShortLoopLargeScale(0, 100, veryLargeScale * 100);
}
// Check IR only has a counted loop when bounds are known and loop run for a short time (int loop case)
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.PREDICATE_TRAP, "1" })
@IR(failOn = { IRNode.LOOP, IRNode.OUTER_STRIP_MINED_LOOP, IRNode.SHORT_RUNNING_LOOP_TRAP })
public static void testIntLoopConstantBoundsShortLoop1(long range) {
for (int i = 0; i < 100; i++) {
Objects.checkIndex(i, range);
volatileField = 42;
}
}
@Run(test = "testIntLoopConstantBoundsShortLoop1")
public static void testIntLoopConstantBoundsShortLoop1_runner() {
testIntLoopConstantBoundsShortLoop1(100);
}
// Check IR only has a counted loop when bounds are unknown but profile reports a short running loop (int loop case)
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static void testIntLoopUnknownBoundsShortLoop(int start, int stop, long range) {
for (int i = start; i < stop; i++) {
Objects.checkIndex(i, range);
volatileField = 42;
}
}
@Run(test = "testIntLoopUnknownBoundsShortLoop")
@Warmup(10_000)
public static void testIntLoopUnknownBoundsShortLoop_runner() {
testIntLoopUnknownBoundsShortLoop(0, 100, 100);
}
// Same with unswitched loop
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "2", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "2" })
@IR(failOn = { IRNode.LOOP })
public static void testIntLoopUnknownBoundsShortUnswitchedLoop(int start, int stop, long range, boolean flag) {
for (int i = start; i < stop; i++) {
if (flag) {
Objects.checkIndex(i, range);
volatileField = 42;
} else {
Objects.checkIndex(i, range);
volatileField = 42;
}
}
}
@Run(test = "testIntLoopUnknownBoundsShortUnswitchedLoop")
@Warmup(10_000)
public static void testIntLoopUnknownBoundsShortUnswitchedLoop_runner() {
testIntLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, true);
testIntLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, false);
}
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "2", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.PREDICATE_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "2" })
@IR(failOn = { IRNode.LOOP })
public static void testLongLoopUnknownBoundsShortUnswitchedLoop(long start, long stop, long range, boolean flag) {
for (long i = start; i < stop; i++) {
if (flag) {
Objects.checkIndex(i, range);
volatileField = 42;
} else {
Objects.checkIndex(i, range);
volatileField = 42;
}
}
}
@Run(test = "testLongLoopUnknownBoundsShortUnswitchedLoop")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsShortUnswitchedLoop_runner() {
testLongLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, true);
testLongLoopUnknownBoundsShortUnswitchedLoop(0, 100, 100, false);
}
@Test
@IR(counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(failOn = { IRNode.LOOP })
public static int testLongLoopUnknownBoundsAddLimitShortLoop(int stop1, long stop2) {
int j = 0;
for (long i = 0; i < stop1 + stop2; i++) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsAddLimitShortLoop")
@Warmup(10_000)
public static void testLongLoopUnknownBoundsAddLimitShortLoop_runner() {
int res = testLongLoopUnknownBoundsAddLimitShortLoop(100, 0);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
}

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8342692
* @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
* @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0
* TestShortRunningLongCountedLoopPredicatesClone
* @run main/othervm TestShortRunningLongCountedLoopPredicatesClone
*/
import java.util.Objects;
// Predicate added after int counted loop is created depends on
// narrowed limit which depends on predicate added before the int
// counted loop was created: predicates need to be properly ordered.
public class TestShortRunningLongCountedLoopPredicatesClone {
public static void main(String[] args) {
A a = new A(100);
for (int i = 0; i < 20_000; i++) {
test1(a, 0);
}
}
private static void test1(A a, long start) {
long i = start;
do {
synchronized (new Object()) {}
Objects.checkIndex(i, a.range);
i++;
} while (i < a.range);
}
static class A {
A(long range) {
this.range = range;
}
long range;
}
}

View File

@ -0,0 +1,82 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8342692
* @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
* @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0
* -XX:-UseLoopPredicate -XX:-RangeCheckElimination TestShortRunningLongCountedLoopScaleOverflow
* @run main/othervm TestShortRunningLongCountedLoopScaleOverflow
*/
import java.util.Objects;
// When scale is large, even if loop is short running having a single
// counted loop is not possible.
public class TestShortRunningLongCountedLoopScaleOverflow {
public static void main(String[] args) {
for (int i = 0; i < 20_000; i++) {
test1(Integer.MAX_VALUE, 0);
test2(Integer.MAX_VALUE, 0, 100);
}
boolean exception = false;
try {
test1(Integer.MAX_VALUE, 10);
} catch (IndexOutOfBoundsException indexOutOfBoundsException) {
exception = true;
}
if (!exception) {
throw new RuntimeException("Expected exception not thrown");
}
exception = false;
try {
test2(Integer.MAX_VALUE, 10, 100);
} catch (IndexOutOfBoundsException indexOutOfBoundsException) {
exception = true;
}
if (!exception) {
throw new RuntimeException("Expected exception not thrown");
}
}
static final long veryLargeScale = 1 << 29;
private static void test1(long range, long j) {
Objects.checkIndex(0, range);
for (long i = 0; i < 100; i++) {
if (i == j) {
Objects.checkIndex(veryLargeScale * i, range);
}
}
}
private static void test2(long range, long j, long stop) {
Objects.checkIndex(0, range);
for (long i = 0; i < stop; i++) {
if (i == j) {
Objects.checkIndex(veryLargeScale * i, range);
}
}
}
}

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.longcountedloops;
import jdk.internal.misc.Unsafe;
import java.util.Objects;
/*
* @test
* @bug 8342692
* @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
* @modules java.base/jdk.internal.misc
* @run main/othervm -XX:-BackgroundCompilation compiler.longcountedloops.TestShortRunningLongCountedLoopVectorization
*/
public class TestShortRunningLongCountedLoopVectorization {
private static final Unsafe UNSAFE = Unsafe.getUnsafe();
private static volatile int volatileField;
public static void main(String[] args) {
for (int i = 0; i < 20_000; i++) {
test1();
}
}
static int size = 1024;
static long longSize = size;
static int[] intArray = new int[size];
public static void test1() {
boolean doIt = true;
int localSize = Integer.max(Integer.min(size, 10000), 0);
int i = 0;
while (true) {
synchronized (new Object()) {};
if (i >= localSize) {
break;
}
if (doIt) {
volatileField = 42;
doIt = false;
}
long j = Objects.checkIndex(i, longSize);
UNSAFE.putInt(intArray, Unsafe.ARRAY_INT_BASE_OFFSET + j * Unsafe.ARRAY_INT_INDEX_SCALE, 42);
i++;
}
}
};

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.longcountedloops;
import compiler.lib.ir_framework.*;
/*
* @test
* @bug 8342692
* @summary C2: long counted loop/long range checks: don't create loop-nest for short running loops
* @library /test/lib /
* @run driver compiler.longcountedloops.TestStressShortRunningLongCountedLoop
*/
public class TestStressShortRunningLongCountedLoop {
private static volatile int volatileField;
public static void main(String[] args) {
TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+StressShortRunningLongLoop");
TestFramework.runWithFlags("-XX:LoopMaxUnroll=0", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:-StressShortRunningLongLoop");
}
@Test
@IR(applyIf = { "StressShortRunningLongLoop", "true" }, counts = { IRNode.COUNTED_LOOP, "1", IRNode.SHORT_RUNNING_LOOP_TRAP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(applyIf = { "StressShortRunningLongLoop", "true" }, failOn = { IRNode.LOOP })
@IR(applyIf = { "StressShortRunningLongLoop", "false" }, counts = { IRNode.COUNTED_LOOP, "1", IRNode.LOOP, "1", IRNode.OUTER_STRIP_MINED_LOOP, "1" })
@IR(applyIf = { "StressShortRunningLongLoop", "false" }, failOn = { IRNode.SHORT_RUNNING_LOOP_TRAP })
public static int testLongLoopUnknownBoundsShortLoop(long start, long stop) {
int j = 0;
for (long i = start; i < stop; i++) {
volatileField = 42;
j++;
}
return j;
}
@Run(test = "testLongLoopUnknownBoundsShortLoop")
@Warmup(0)
public static void testLongLoopUnknownBoundsShortLoop_runner() {
int res = testLongLoopUnknownBoundsShortLoop(0, 100);
if (res != 100) {
throw new RuntimeException("incorrect result: " + res);
}
}
}

View File

@ -47,6 +47,23 @@ import java.lang.foreign.*;
* @run driver compiler.loopopts.superword.TestMemorySegment ByteArray AlignVector
*/
/*
* @test id=byte-array-NoShortRunningLongLoop
* @bug 8329273 8342692
* @summary Test vectorization of loops over MemorySegment
* @library /test/lib /
* @run driver compiler.loopopts.superword.TestMemorySegment ByteArray NoShortRunningLongLoop
*/
/*
* @test id=byte-array-AlignVector-NoShortRunningLongLoop
* @bug 8329273 8348263 8342692
* @summary Test vectorization of loops over MemorySegment
* @library /test/lib /
* @run driver compiler.loopopts.superword.TestMemorySegment ByteArray AlignVector NoShortRunningLongLoop
*/
/*
* @test id=char-array
* @bug 8329273
@ -172,6 +189,13 @@ public class TestMemorySegment {
public static void main(String[] args) {
TestFramework framework = new TestFramework(TestMemorySegmentImpl.class);
framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0]);
for (int i = 1; i < args.length; i++) {
String tag = args[i];
switch (tag) {
case "AlignVector" -> framework.addFlags("-XX:+AlignVector");
case "NoShortRunningLongLoop" -> framework.addFlags("-XX:-ShortRunningLongLoop");
}
}
if (args.length > 1 && args[1].equals("AlignVector")) {
framework.addFlags("-XX:+AlignVector");
}
@ -777,6 +801,13 @@ class TestMemorySegmentImpl {
@IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
IRNode.ADD_VI, "= 0",
IRNode.STORE_VECTOR, "= 0"},
applyIfAnd = { "ShortRunningLongLoop", "false", "AlignVector", "false" },
applyIfPlatform = {"64-bit", "true"},
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.ADD_VI, "> 0",
IRNode.STORE_VECTOR, "> 0"},
applyIfAnd = { "ShortRunningLongLoop", "true", "AlignVector", "false" },
applyIfPlatform = {"64-bit", "true"},
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
// FAILS: invariants are sorted differently, because of differently inserted Cast.
@ -795,6 +826,13 @@ class TestMemorySegmentImpl {
@IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
IRNode.ADD_VI, "= 0",
IRNode.STORE_VECTOR, "= 0"},
applyIfAnd = { "ShortRunningLongLoop", "false", "AlignVector", "false" },
applyIfPlatform = {"64-bit", "true"},
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
@IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
IRNode.ADD_VI, "> 0",
IRNode.STORE_VECTOR, "> 0"},
applyIfAnd = { "ShortRunningLongLoop", "true", "AlignVector", "false" },
applyIfPlatform = {"64-bit", "true"},
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
// FAILS: invariants are sorted differently, because of differently inserted Cast.

View File

@ -0,0 +1,130 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
package org.openjdk.bench.java.lang.foreign;
import org.openjdk.jmh.annotations.*;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
import jdk.internal.misc.Unsafe;
import java.util.Objects;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(org.openjdk.jmh.annotations.Scope.Thread)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Fork(value = 3, jvmArgs = { "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED" })
public class HeapMismatchManualLoopTest {
@Param({"4", "8", "16", "32", "64", "128"})
public int ELEM_SIZE;
static final Unsafe unsafe = Utils.unsafe;
byte[] srcArray;
byte[] dstArray;
MemorySegment srcSegment;
MemorySegment dstSegment;
ByteBuffer srcBuffer;
ByteBuffer dstBuffer;
long srcByteSize;
long dstByteSize;
@Setup
public void setup() {
srcArray = new byte[ELEM_SIZE];
dstArray = new byte[ELEM_SIZE];
srcSegment = MemorySegment.ofArray(srcArray);
dstSegment = MemorySegment.ofArray(dstArray);
srcBuffer = ByteBuffer.wrap(srcArray);
dstBuffer = ByteBuffer.wrap(dstArray);
srcByteSize = ELEM_SIZE;
dstByteSize = ELEM_SIZE;
}
@Benchmark
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public int array_mismatch() {
for (int i = 0; i < srcArray.length ; i++) {
if (srcArray[i] != dstArray[i]) {
return i;
}
}
return -1;
}
@Benchmark
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public long segment_mismatch() {
for (long i = 0; i < srcSegment.byteSize() ; i++) {
if (srcSegment.get(ValueLayout.JAVA_BYTE, i) != dstSegment.get(ValueLayout.JAVA_BYTE, i)) {
return i;
}
}
return -1;
}
@Benchmark
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public int buffer_mismatch() {
for (int i = 0; i < srcBuffer.capacity() ; i++) {
if (srcBuffer.get(i) != dstBuffer.get(i)) {
return i;
}
}
return -1;
}
@Benchmark
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public long unsafe_mismatch() {
for (long i = 0; i < srcByteSize ; i++) {
Objects.checkIndex(i, srcByteSize);
Objects.checkIndex(i, dstByteSize);
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET + i * Unsafe.ARRAY_BYTE_INDEX_SCALE;
if (unsafe.getByte(srcArray, offset) != unsafe.getByte(dstArray, offset)) {
return i;
}
}
return -1;
}
@Benchmark
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public long unsafe_mismatch2() {
for (long i = 0; i < srcByteSize ; i++) {
long offset = Unsafe.ARRAY_BYTE_BASE_OFFSET + i * Unsafe.ARRAY_BYTE_INDEX_SCALE;
if (unsafe.getByte(srcArray, offset) != unsafe.getByte(dstArray, offset)) {
return i;
}
}
return -1;
}
}