From 7f3ecb4d92fdb084ce632cab484cf4578487b090 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 17 Feb 2025 13:07:23 +0000 Subject: [PATCH] 8346664: C2: Optimize mask check with constant offset Reviewed-by: epeter, qamai --- src/hotspot/share/opto/mulnode.cpp | 206 ++++++------ src/hotspot/share/opto/mulnode.hpp | 4 +- .../compiler/c2/irTests/TestShiftAndMask.java | 300 +++++++++++++----- .../superword/TestEquivalentInvariants.java | 80 ++++- .../vectorization/TestPopulateIndex.java | 30 +- 5 files changed, 431 insertions(+), 189 deletions(-) diff --git a/src/hotspot/share/opto/mulnode.cpp b/src/hotspot/share/opto/mulnode.cpp index 58439ce3773..b1a9219e113 100644 --- a/src/hotspot/share/opto/mulnode.cpp +++ b/src/hotspot/share/opto/mulnode.cpp @@ -692,9 +692,11 @@ const Type *AndINode::mul_ring( const Type *t0, const Type *t1 ) const { return and_value(r0, r1); } +static bool AndIL_is_zero_element_under_mask(const PhaseGVN* phase, const Node* expr, const Node* mask, BasicType bt); + const Type* AndINode::Value(PhaseGVN* phase) const { - // patterns similar to (v << 2) & 3 - if (AndIL_shift_and_mask_is_always_zero(phase, in(1), in(2), T_INT, true)) { + if (AndIL_is_zero_element_under_mask(phase, in(1), in(2), T_INT) || + AndIL_is_zero_element_under_mask(phase, in(2), in(1), T_INT)) { return TypeInt::ZERO; } @@ -740,8 +742,8 @@ Node* AndINode::Identity(PhaseGVN* phase) { //------------------------------Ideal------------------------------------------ Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) { - // pattern similar to (v1 + (v2 << 2)) & 3 transformed to v1 & 3 - Node* progress = AndIL_add_shift_and_mask(phase, T_INT); + // Simplify (v1 + v2) & mask to v1 & mask or v2 & mask when possible. + Node* progress = AndIL_sum_and_mask(phase, T_INT); if (progress != nullptr) { return progress; } @@ -824,8 +826,8 @@ const Type *AndLNode::mul_ring( const Type *t0, const Type *t1 ) const { } const Type* AndLNode::Value(PhaseGVN* phase) const { - // patterns similar to (v << 2) & 3 - if (AndIL_shift_and_mask_is_always_zero(phase, in(1), in(2), T_LONG, true)) { + if (AndIL_is_zero_element_under_mask(phase, in(1), in(2), T_LONG) || + AndIL_is_zero_element_under_mask(phase, in(2), in(1), T_LONG)) { return TypeLong::ZERO; } @@ -872,8 +874,8 @@ Node* AndLNode::Identity(PhaseGVN* phase) { //------------------------------Ideal------------------------------------------ Node *AndLNode::Ideal(PhaseGVN *phase, bool can_reshape) { - // pattern similar to (v1 + (v2 << 2)) & 3 transformed to v1 & 3 - Node* progress = AndIL_add_shift_and_mask(phase, T_LONG); + // Simplify (v1 + v2) & mask to v1 & mask or v2 & mask when possible. + Node* progress = AndIL_sum_and_mask(phase, T_LONG); if (progress != nullptr) { return progress; } @@ -2096,99 +2098,109 @@ const Type* RotateRightNode::Value(PhaseGVN* phase) const { } } -// Given an expression (AndX shift mask) or (AndX mask shift), -// determine if the AndX must always produce zero, because the -// the shift (x< #0 -// (AndL (LShiftL _ #N) #M) => #0 -// (AndL (ConvI2L (LShiftI _ #N)) #M) => #0 -// The M and N values must satisfy ((-1 << N) & M) == 0. -// Because the optimization might work for a non-constant -// mask M, we check the AndX for both operand orders. -bool MulNode::AndIL_shift_and_mask_is_always_zero(PhaseGVN* phase, Node* shift, Node* mask, BasicType bt, bool check_reverse) { - if (mask == nullptr || shift == nullptr) { - return false; +//------------------------------ Sum & Mask ------------------------------ + +// Returns a lower bound on the number of trailing zeros in expr. +static jint AndIL_min_trailing_zeros(const PhaseGVN* phase, const Node* expr, BasicType bt) { + expr = expr->uncast(); + const TypeInteger* type = phase->type(expr)->isa_integer(bt); + if (type == nullptr) { + return 0; } - const TypeInteger* mask_t = phase->type(mask)->isa_integer(bt); - if (mask_t == nullptr || phase->type(shift)->isa_integer(bt) == nullptr) { - return false; + + if (type->is_con()) { + jlong con = type->get_con_as_long(bt); + return con == 0L ? (type2aelembytes(bt) * BitsPerByte) : count_trailing_zeros(con); } - shift = shift->uncast(); - if (shift == nullptr) { - return false; - } - if (phase->type(shift)->isa_integer(bt) == nullptr) { - return false; - } - BasicType shift_bt = bt; - if (bt == T_LONG && shift->Opcode() == Op_ConvI2L) { + + if (expr->Opcode() == Op_ConvI2L) { + expr = expr->in(1)->uncast(); bt = T_INT; - Node* val = shift->in(1); - if (val == nullptr) { - return false; - } - val = val->uncast(); - if (val == nullptr) { - return false; - } - if (val->Opcode() == Op_LShiftI) { - shift_bt = T_INT; - shift = val; - if (phase->type(shift)->isa_integer(bt) == nullptr) { - return false; - } - } - } - if (shift->Opcode() != Op_LShift(shift_bt)) { - if (check_reverse && - (mask->Opcode() == Op_LShift(bt) || - (bt == T_LONG && mask->Opcode() == Op_ConvI2L))) { - // try it the other way around - return AndIL_shift_and_mask_is_always_zero(phase, mask, shift, bt, false); - } - return false; - } - Node* shift2 = shift->in(2); - if (shift2 == nullptr) { - return false; - } - const Type* shift2_t = phase->type(shift2); - if (!shift2_t->isa_int() || !shift2_t->is_int()->is_con()) { - return false; + type = phase->type(expr)->isa_int(); } - jint shift_con = shift2_t->is_int()->get_con() & ((shift_bt == T_INT ? BitsPerJavaInteger : BitsPerJavaLong) - 1); - if ((((jlong)1) << shift_con) > mask_t->hi_as_long() && mask_t->lo_as_long() >= 0) { - return true; + // Pattern: expr = (x << shift) + if (expr->Opcode() == Op_LShift(bt)) { + const TypeInt* shift_t = phase->type(expr->in(2))->isa_int(); + if (shift_t == nullptr || !shift_t->is_con()) { + return 0; + } + // We need to truncate the shift, as it may not have been canonicalized yet. + // T_INT: 0..31 -> shift_mask = 4 * 8 - 1 = 31 + // T_LONG: 0..63 -> shift_mask = 8 * 8 - 1 = 63 + // (JLS: "Shift Operators") + jint shift_mask = type2aelembytes(bt) * BitsPerByte - 1; + return shift_t->get_con() & shift_mask; } - return false; + return 0; } -// Given an expression (AndX (AddX v1 (LShiftX v2 #N)) #M) -// determine if the AndX must always produce (AndX v1 #M), -// because the shift (v2< (AndI v1 #M) -// (AndL (AddI v1 (LShiftL _ #N)) #M) => (AndL v1 #M) -// (AndL (AddL v1 (ConvI2L (LShiftI _ #N))) #M) => (AndL v1 #M) -// The M and N values must satisfy ((-1 << N) & M) == 0. -// Because the optimization might work for a non-constant -// mask M, and because the AddX operands can come in either -// order, we check for every operand order. -Node* MulNode::AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt) { +// Checks whether expr is neutral additive element (zero) under mask, +// i.e. whether an expression of the form: +// (AndX (AddX (expr addend) mask) +// (expr + addend) & mask +// is equivalent to +// (AndX addend mask) +// addend & mask +// for any addend. +// (The X in AndX must be I or L, depending on bt). +// +// We check for the sufficient condition when the lowest set bit in expr is higher than +// the highest set bit in mask, i.e.: +// expr: eeeeee0000000000000 +// mask: 000000mmmmmmmmmmmmm +// <--w bits---> +// We do not test for other cases. +// +// Correctness: +// Given "expr" with at least "w" trailing zeros, +// let "mod = 2^w", "suffix_mask = mod - 1" +// +// Since "mask" only has bits set where "suffix_mask" does, we have: +// mask = suffix_mask & mask (SUFFIX_MASK) +// +// And since expr only has bits set above w, and suffix_mask only below: +// expr & suffix_mask == 0 (NO_BIT_OVERLAP) +// +// From unsigned modular arithmetic (with unsigned modulo %), and since mod is +// a power of 2, and we are computing in a ring of powers of 2, we know that +// (x + y) % mod = (x % mod + y) % mod +// (x + y) & suffix_mask = (x & suffix_mask + y) & suffix_mask (MOD_ARITH) +// +// We can now prove the equality: +// (expr + addend) & mask +// = (expr + addend) & suffix_mask & mask (SUFFIX_MASK) +// = (expr & suffix_mask + addend) & suffix_mask & mask (MOD_ARITH) +// = (0 + addend) & suffix_mask & mask (NO_BIT_OVERLAP) +// = addend & mask (SUFFIX_MASK) +// +// Hence, an expr with at least w trailing zeros is a neutral additive element under any mask with bit width w. +static bool AndIL_is_zero_element_under_mask(const PhaseGVN* phase, const Node* expr, const Node* mask, BasicType bt) { + // When the mask is negative, it has the most significant bit set. + const TypeInteger* mask_t = phase->type(mask)->isa_integer(bt); + if (mask_t == nullptr || mask_t->lo_as_long() < 0) { + return false; + } + + // When the mask is constant zero, we defer to MulNode::Value to eliminate the entire AndX operation. + if (mask_t->hi_as_long() == 0) { + assert(mask_t->lo_as_long() == 0, "checked earlier"); + return false; + } + + jint mask_bit_width = BitsPerLong - count_leading_zeros(mask_t->hi_as_long()); + jint expr_trailing_zeros = AndIL_min_trailing_zeros(phase, expr, bt); + return expr_trailing_zeros >= mask_bit_width; +} + +// Reduces the pattern: +// (AndX (AddX add1 add2) mask) +// to +// (AndX add1 mask), if add2 is neutral wrt mask (see above), and vice versa. +Node* MulNode::AndIL_sum_and_mask(PhaseGVN* phase, BasicType bt) { Node* add = in(1); Node* mask = in(2); - if (add == nullptr || mask == nullptr) { - return nullptr; - } int addidx = 0; if (add->Opcode() == Op_Add(bt)) { addidx = 1; @@ -2200,14 +2212,12 @@ Node* MulNode::AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt) { if (addidx > 0) { Node* add1 = add->in(1); Node* add2 = add->in(2); - if (add1 != nullptr && add2 != nullptr) { - if (AndIL_shift_and_mask_is_always_zero(phase, add1, mask, bt, false)) { - set_req_X(addidx, add2, phase); - return this; - } else if (AndIL_shift_and_mask_is_always_zero(phase, add2, mask, bt, false)) { - set_req_X(addidx, add1, phase); - return this; - } + if (AndIL_is_zero_element_under_mask(phase, add1, mask, bt)) { + set_req_X(addidx, add2, phase); + return this; + } else if (AndIL_is_zero_element_under_mask(phase, add2, mask, bt)) { + set_req_X(addidx, add1, phase); + return this; } } return nullptr; diff --git a/src/hotspot/share/opto/mulnode.hpp b/src/hotspot/share/opto/mulnode.hpp index bb572b9d9a2..65a2cd112ec 100644 --- a/src/hotspot/share/opto/mulnode.hpp +++ b/src/hotspot/share/opto/mulnode.hpp @@ -83,8 +83,8 @@ public: static MulNode* make(Node* in1, Node* in2, BasicType bt); - static bool AndIL_shift_and_mask_is_always_zero(PhaseGVN* phase, Node* shift, Node* mask, BasicType bt, bool check_reverse); - Node* AndIL_add_shift_and_mask(PhaseGVN* phase, BasicType bt); +protected: + Node* AndIL_sum_and_mask(PhaseGVN* phase, BasicType bt); }; //------------------------------MulINode--------------------------------------- diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestShiftAndMask.java b/test/hotspot/jtreg/compiler/c2/irTests/TestShiftAndMask.java index 4396873425a..2cdc6414685 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestShiftAndMask.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestShiftAndMask.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Red Hat, Inc. All rights reserved. + * Copyright (c) 2021, 2025, Red Hat, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,7 +30,7 @@ import java.util.Objects; /* * @test - * @bug 8277850 8278949 8285793 + * @bug 8277850 8278949 8285793 8346664 * @summary C2: optimize mask checks in counted loops * @library /test/lib / * @run driver compiler.c2.irTests.TestShiftAndMask @@ -43,11 +43,48 @@ public class TestShiftAndMask { TestFramework.run(); } + // any X << INT_MASK_WIDTH is zero under any INT_MASK + static final int INT_MASK_WIDTH = 1 + RANDOM.nextInt(30); + static final int INT_MAX_MASK = (1 << INT_MASK_WIDTH) - 1; + static final int INT_MASK = 1 + RANDOM.nextInt(INT_MAX_MASK); + static final int INT_MASK2 = 1 + RANDOM.nextInt(INT_MAX_MASK); + static final int INT_ZERO_CONST = RANDOM.nextInt() << INT_MASK_WIDTH; + + static final int INT_RANDOM_CONST = RANDOM.nextInt(); + static final int INT_RANDOM_SHIFT = RANDOM.nextInt(); + static final int INT_RANDOM_MASK = RANDOM.nextInt(); + + // any X << LONG_MASK_WIDTH is zero under any LONG_MASK + static final int LONG_MASK_WIDTH = 1 + RANDOM.nextInt(62); + static final long LONG_MAX_MASK = (1L << LONG_MASK_WIDTH) - 1; + static final long LONG_MASK = 1 + RANDOM.nextLong(LONG_MAX_MASK); + static final long LONG_MASK2 = 1 + RANDOM.nextLong(LONG_MAX_MASK); + static final long LONG_ZERO_CONST = RANDOM.nextLong() << LONG_MASK_WIDTH; + + static final long LONG_RANDOM_CONST = RANDOM.nextLong(); + static final long LONG_RANDOM_SHIFT = RANDOM.nextLong(); + static final long LONG_RANDOM_MASK = RANDOM.nextLong(); + + @Test + public static int intSumAndMask(int i, int j) { + return (j + i << INT_RANDOM_SHIFT + INT_RANDOM_CONST) & INT_RANDOM_MASK; + } + + @Run(test = { "intSumAndMask" }) + public static void checkIntSumAndMask() { + int j = RANDOM.nextInt(); + int i = RANDOM.nextInt(); + int res = intSumAndMask(i, j); + if (res != ((j + i << INT_RANDOM_SHIFT + INT_RANDOM_CONST) & INT_RANDOM_MASK)) { + throw new RuntimeException("incorrect result: " + res); + } + } + @Test @Arguments(values = Argument.RANDOM_EACH) @IR(failOn = { IRNode.AND_I, IRNode.LSHIFT_I }) public static int shiftMaskInt(int i) { - return (i << 2) & 3; // transformed to: return 0; + return (i << INT_MASK_WIDTH) & INT_MASK; // transformed to: return 0; } @Check(test = "shiftMaskInt") @@ -57,11 +94,26 @@ public class TestShiftAndMask { } } + @Test + public static long longSumAndMask(long i, long j) { + return (j + i << LONG_RANDOM_SHIFT + LONG_RANDOM_CONST) & LONG_RANDOM_MASK; + } + + @Run(test = { "longSumAndMask" }) + public static void checkLongSumAndMask() { + long j = RANDOM.nextLong(); + long i = RANDOM.nextLong(); + long res = longSumAndMask(i, j); + if (res != ((j + i << LONG_RANDOM_SHIFT + LONG_RANDOM_CONST) & LONG_RANDOM_MASK)) { + throw new RuntimeException("incorrect result: " + res); + } + } + @Test @Arguments(values = Argument.RANDOM_EACH) @IR(failOn = { IRNode.AND_L, IRNode.LSHIFT_L }) public static long shiftMaskLong(long i) { - return (i << 2) & 3; // transformed to: return 0; + return (i << LONG_MASK_WIDTH) & LONG_MASK; // transformed to: return 0; } @@ -81,11 +133,11 @@ public class TestShiftAndMask { int mask; if (flag) { barrier = 42; - mask = 3; + mask = INT_MASK; } else { - mask = 1; + mask = INT_MASK2; } - return mask & (i << 2); // transformed to: return 0; + return mask & (i << INT_MASK_WIDTH); // transformed to: return 0; } @Check(test = "shiftNonConstMaskInt") @@ -102,11 +154,11 @@ public class TestShiftAndMask { long mask; if (flag) { barrier = 42; - mask = 3; + mask = LONG_MASK; } else { - mask = 1; + mask = LONG_MASK2; } - return mask & (i << 2); // transformed to: return 0; + return mask & (i << LONG_MASK_WIDTH); // transformed to: return 0; } @Check(test = "shiftNonConstMaskLong") @@ -120,7 +172,7 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_I, "1" }) @IR(failOn = { IRNode.ADD_I, IRNode.LSHIFT_I }) public static int addShiftMaskInt(int i, int j) { - return (j + (i << 2)) & 3; // transformed to: return j & 3; + return (j + (i << INT_MASK_WIDTH)) & INT_MASK; // transformed to: return j & INT_MASK; } @Run(test = "addShiftMaskInt") @@ -128,11 +180,42 @@ public class TestShiftAndMask { int i = RANDOM.nextInt(); int j = RANDOM.nextInt(); int res = addShiftMaskInt(i, j); - if (res != (j & 3)) { + if (res != (j & INT_MASK)) { throw new RuntimeException("incorrect result: " + res); } } + @Test + @IR(counts = { IRNode.AND_I, "1" }) + @IR(failOn = { IRNode.ADD_I, IRNode.LSHIFT_I }) + public static int addShiftPlusConstMaskInt(int i, int j) { + return (j + ((i + INT_RANDOM_CONST) << INT_MASK_WIDTH)) & INT_MASK; // transformed to: return j & INT_MASK; + } + + @Run(test = "addShiftPlusConstMaskInt") + public static void addShiftPlusConstMaskInt_runner() { + int i = RANDOM.nextInt(); + int j = RANDOM.nextInt(); + int res = addShiftPlusConstMaskInt(i, j); + if (res != (j & INT_MASK)) { + throw new RuntimeException("incorrect result: " + res); + } + } + + @Test + @Arguments(values = {Argument.RANDOM_EACH, Argument.RANDOM_EACH}) + @IR(counts = { IRNode.ADD_I, "2", IRNode.LSHIFT_I, "1" }) + public static int addShiftPlusConstDisjointMaskInt(int i, int j) { + return (j + ((i + 5) << 2)) & 32; // NOT transformed even though (5<<2) & 32 == 0 + } + + @Test + @Arguments(values = {Argument.RANDOM_EACH, Argument.RANDOM_EACH}) + @IR(counts = { IRNode.ADD_I, "1", IRNode.LSHIFT_I, "1" }) + public static int addShiftPlusConstOverlyLargeShift(int i, int j) { + return (j + i << 129) & 32; // NOT transformed, only lower 5 bits of shift count. + } + @Test @IR(counts = { IRNode.AND_I, "1" }) @IR(failOn = { IRNode.ADD_I, IRNode.LSHIFT_I }) @@ -140,11 +223,11 @@ public class TestShiftAndMask { int mask; if (flag) { barrier = 42; - mask = 3; + mask = INT_MASK; } else { - mask = 1; + mask = INT_MASK2; } - return mask & (j + (i << 2)); // transformed to: return j & mask; + return mask & (j + (i << INT_MASK_WIDTH)); // transformed to: return j & mask; } @Run(test = "addSshiftNonConstMaskInt") @@ -152,11 +235,38 @@ public class TestShiftAndMask { int i = RANDOM.nextInt(); int j = RANDOM.nextInt(); int res = addSshiftNonConstMaskInt(i, j, true); - if (res != (j & 3)) { + if (res != (j & INT_MASK)) { throw new RuntimeException("incorrect result: " + res); } res = addSshiftNonConstMaskInt(i, j, false); - if (res != (j & 1)) { + if (res != (j & INT_MASK2)) { + throw new RuntimeException("incorrect result: " + res); + } + } + + @Test + @IR(counts = { IRNode.AND_I, "1" }) + @IR(failOn = { IRNode.ADD_I }) + public static int addConstNonConstMaskInt(int j, boolean flag) { + int mask; + if (flag) { + barrier = 42; + mask = INT_MASK; + } else { + mask = INT_MASK2; + } + return mask & (j + INT_ZERO_CONST); // transformed to: return j & mask; + } + + @Run(test = "addConstNonConstMaskInt") + public static void addConstNonConstMaskInt_runner() { + int j = RANDOM.nextInt(); + int res = addConstNonConstMaskInt(j, true); + if (res != (j & INT_MASK)) { + throw new RuntimeException("incorrect result: " + res); + } + res = addConstNonConstMaskInt(j, false); + if (res != (j & INT_MASK2)) { throw new RuntimeException("incorrect result: " + res); } } @@ -165,7 +275,7 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_L, "1" }) @IR(failOn = { IRNode.ADD_L, IRNode.LSHIFT_L }) public static long addShiftMaskLong(long i, long j) { - return (j + (i << 2)) & 3; // transformed to: return j & 3; + return (j + (i << LONG_MASK_WIDTH)) & LONG_MASK; // transformed to: return j & INT_MASK; } @Run(test = "addShiftMaskLong") @@ -173,7 +283,24 @@ public class TestShiftAndMask { long i = RANDOM.nextLong(); long j = RANDOM.nextLong(); long res = addShiftMaskLong(i, j); - if (res != (j & 3)) { + if (res != (j & LONG_MASK)) { + throw new RuntimeException("incorrect result: " + res); + } + } + + @Test + @IR(counts = { IRNode.AND_L, "1" }) + @IR(failOn = { IRNode.ADD_L, IRNode.LSHIFT_L }) + public static long addShiftPlusConstMaskLong(long i, long j) { + return (j + ((i + LONG_RANDOM_CONST) << LONG_MASK_WIDTH)) & LONG_MASK; // transformed to: return j & LONG_MASK; + } + + @Run(test = "addShiftPlusConstMaskLong") + public static void addShiftPlusConstMaskLong_runner() { + long i = RANDOM.nextLong(); + long j = RANDOM.nextLong(); + long res = addShiftPlusConstMaskLong(i, j); + if (res != (j & LONG_MASK)) { throw new RuntimeException("incorrect result: " + res); } } @@ -182,14 +309,14 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_L, "1" }) @IR(failOn = { IRNode.ADD_L, IRNode.LSHIFT_L }) public static long addSshiftNonConstMaskLong(long i, long j, boolean flag) { - int mask; + long mask; if (flag) { barrier = 42; - mask = 3; + mask = LONG_MASK; } else { - mask = 1; + mask = LONG_MASK2; } - return mask & (j + (i << 2)); // transformed to: return j & mask; + return mask & (j + (i << LONG_MASK_WIDTH)); // transformed to: return j & mask } @Run(test = "addSshiftNonConstMaskLong") @@ -197,11 +324,38 @@ public class TestShiftAndMask { long i = RANDOM.nextLong(); long j = RANDOM.nextLong(); long res = addSshiftNonConstMaskLong(i, j, true); - if (res != (j & 3)) { + if (res != (j & LONG_MASK)) { throw new RuntimeException("incorrect result: " + res); } res = addSshiftNonConstMaskLong(i, j, false); - if (res != (j & 1)) { + if (res != (j & LONG_MASK2)) { + throw new RuntimeException("incorrect result: " + res); + } + } + + @Test + @IR(counts = { IRNode.AND_L, "1" }) + @IR(failOn = { IRNode.ADD_L }) + public static long addConstNonConstMaskLong(long j, boolean flag) { + long mask; + if (flag) { + barrier = 42; + mask = LONG_MASK; + } else { + mask = LONG_MASK2; + } + return mask & (j + LONG_ZERO_CONST); // transformed to: return j & mask; + } + + @Run(test = "addConstNonConstMaskLong") + public static void addConstNonConstMaskLong_runner() { + long j = RANDOM.nextLong(); + long res = addConstNonConstMaskLong(j, true); + if (res != (j & LONG_MASK)) { + throw new RuntimeException("incorrect result: " + res); + } + res = addConstNonConstMaskLong(j, false); + if (res != (j & LONG_MASK2)) { throw new RuntimeException("incorrect result: " + res); } } @@ -210,7 +364,7 @@ public class TestShiftAndMask { @Arguments(values = {Argument.RANDOM_EACH, Argument.RANDOM_EACH}) @IR(failOn = { IRNode.AND_I, IRNode.ADD_I, IRNode.LSHIFT_I }) public static int addShiftMaskInt2(int i, int j) { - return ((j << 2) + (i << 2)) & 3; // transformed to: return 0; + return ((j << INT_MASK_WIDTH) + (i << INT_MASK_WIDTH)) & INT_MASK; // transformed to: return 0; } @Check(test = "addShiftMaskInt2") @@ -224,7 +378,7 @@ public class TestShiftAndMask { @Arguments(values = {Argument.RANDOM_EACH, Argument.RANDOM_EACH}) @IR(failOn = { IRNode.AND_L, IRNode.ADD_L, IRNode.LSHIFT_L }) public static long addShiftMaskLong2(long i, long j) { - return ((j << 2) + (i << 2)) & 3; // transformed to: return 0; + return ((j << INT_MASK_WIDTH) + (i << INT_MASK_WIDTH)) & INT_MASK; // transformed to: return 0; } @Check(test = "addShiftMaskLong2") @@ -239,9 +393,9 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_I, "1" }) @IR(failOn = { IRNode.ADD_I, IRNode.LSHIFT_I }) public static int addShiftMaskInt3(int i, long j) { - int add1 = (i << 2); + int add1 = (i << INT_MASK_WIDTH); int add2 = (int)j; - return (add1 + add2) & 3; // transformed to: return j & 3; + return (add1 + add2) & INT_MASK; // transformed to: return j & INT_MASK; } @Run(test = "addShiftMaskInt3") @@ -249,7 +403,7 @@ public class TestShiftAndMask { int i = RANDOM.nextInt(); int j = RANDOM.nextInt(); int res = addShiftMaskInt3(i, j); - if (res != (j & 3)) { + if (res != (j & INT_MASK)) { throw new RuntimeException("incorrect result: " + res); } } @@ -258,9 +412,9 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_L, "1" }) @IR(failOn = { IRNode.ADD_L, IRNode.LSHIFT_L }) public static long addShiftMaskLong3(long i, float j) { - long add1 = (i << 2); + long add1 = (i << LONG_MASK_WIDTH); long add2 = (long)j; - return (add1 + add2) & 3; // transformed to: return j & 3; + return (add1 + add2) & LONG_MASK; // transformed to: return j & LONG_MASK; } @Run(test = "addShiftMaskLong3") @@ -268,7 +422,7 @@ public class TestShiftAndMask { long i = RANDOM.nextLong(); float j = RANDOM.nextFloat(); long res = addShiftMaskLong3(i, j); - if (res != (((long)j) & 3)) { + if (res != (((long) j) & LONG_MASK)) { throw new RuntimeException("incorrect result: " + res); } } @@ -277,7 +431,7 @@ public class TestShiftAndMask { @Arguments(values = {Argument.RANDOM_EACH}) @IR(failOn = { IRNode.AND_L, IRNode.LSHIFT_I, IRNode.CONV_I2L }) public static long shiftConvMask(int i) { - return ((long)(i << 2)) & 3; // transformed to: return 0; + return ((long) (i << INT_MASK_WIDTH)) & INT_MASK; // transformed to: return 0; } @Check(test = "shiftConvMask") @@ -294,11 +448,11 @@ public class TestShiftAndMask { long mask; if (flag) { barrier = 42; - mask = 3; + mask = INT_MASK; } else { - mask = 1; + mask = INT_MASK2; } - return mask & ((long)(i << 2)); // transformed to: return 0; + return mask & ((long) (i << INT_MASK_WIDTH)); // transformed to: return 0; } @Check(test = "shiftNotConstConvMask") @@ -312,7 +466,7 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_L, "1" }) @IR(failOn = { IRNode.ADD_L, IRNode.LSHIFT_I, IRNode.CONV_I2L }) public static long addShiftConvMask(int i, long j) { - return (j + (i << 2)) & 3; // transformed to: return j & 3; + return (j + (i << INT_MASK_WIDTH)) & INT_MASK; // transformed to: return j & INT_MASK; } @Run(test = "addShiftConvMask") @@ -320,7 +474,7 @@ public class TestShiftAndMask { int i = RANDOM.nextInt(); long j = RANDOM.nextLong(); long res = addShiftConvMask(i, j); - if (res != (j & 3)) { + if (res != (j & INT_MASK)) { throw new RuntimeException("incorrect result: " + res); } } @@ -329,7 +483,7 @@ public class TestShiftAndMask { @Arguments(values = {Argument.RANDOM_EACH, Argument.RANDOM_EACH}) @IR(failOn = { IRNode.AND_L, IRNode.ADD_L, IRNode.LSHIFT_I, IRNode.CONV_I2L }) public static long addShiftConvMask2(int i, int j) { - return (((long)(j << 2)) + ((long)(i << 2))) & 3; // transformed to: return 0; + return (((long) (j << INT_MASK_WIDTH)) + ((long) (i << INT_MASK_WIDTH))) & INT_MASK; // transformed to: return 0; } @Check(test = "addShiftConvMask2") @@ -342,13 +496,13 @@ public class TestShiftAndMask { @Test @IR(failOn = { IRNode.AND_I }) public static int shiftMaskIntCheckIndex(int i, int length) { - return Objects.checkIndex(i << 2, length) & 3; // transformed to: return 0; + return Objects.checkIndex(i << INT_MASK_WIDTH, length) & INT_MASK; // transformed to: return 0; } @Run(test = "shiftMaskIntCheckIndex") public static void shiftMaskIntCheckIndex_runner() { - int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - int res = shiftMaskIntCheckIndex(i, (i << 2) + 1); + int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); + int res = shiftMaskIntCheckIndex(i, (i << INT_MASK_WIDTH) + 1); if (res != 0) { throw new RuntimeException("incorrect result: " + res); } @@ -357,13 +511,13 @@ public class TestShiftAndMask { @Test @IR(failOn = { IRNode.AND_L }) public static long shiftMaskLongCheckIndex(long i, long length) { - return Objects.checkIndex(i << 2, length) & 3; // transformed to: return 0; + return Objects.checkIndex(i << LONG_MASK_WIDTH, length) & LONG_MASK; // transformed to: return 0; } @Run(test = "shiftMaskLongCheckIndex") public static void shiftMaskLongCheckIndex_runner() { - long i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - long res = shiftMaskLongCheckIndex(i, (i << 2) + 1); + long i = RANDOM.nextLong((Long.MAX_VALUE - 1) >> LONG_MASK_WIDTH); + long res = shiftMaskLongCheckIndex(i, (i << LONG_MASK_WIDTH) + 1); if (res != 0) { throw new RuntimeException("incorrect result: " + res); } @@ -373,15 +527,15 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_I, "1" }) @IR(failOn = { IRNode.ADD_I }) public static int addShiftMaskIntCheckIndex(int i, int j, int length) { - return (j + Objects.checkIndex(i << 2, length)) & 3; // transformed to: return j & 3; + return (j + Objects.checkIndex(i << INT_MASK_WIDTH, length)) & INT_MASK; // transformed to: return j & INT_MASK; } @Run(test = "addShiftMaskIntCheckIndex") public static void addShiftMaskIntCheckIndex_runner() { - int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); + int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); int j = RANDOM.nextInt(); - int res = addShiftMaskIntCheckIndex(i, j, (i << 2) + 1); - if (res != (j & 3)) { + int res = addShiftMaskIntCheckIndex(i, j, (i << INT_MASK_WIDTH) + 1); + if (res != (j & INT_MASK)) { throw new RuntimeException("incorrect result: " + res); } } @@ -390,15 +544,15 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_L, "1" }) @IR(failOn = { IRNode.ADD_L }) public static long addShiftMaskLongCheckIndex(long i, long j, long length) { - return (j + Objects.checkIndex(i << 2, length)) & 3; // transformed to: return j & 3; + return (j + Objects.checkIndex(i << LONG_MASK_WIDTH, length)) & LONG_MASK; // transformed to: return j & LONG_MASK; } @Run(test = "addShiftMaskLongCheckIndex") public static void addShiftMaskLongCheckIndex_runner() { - long i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); + long i = RANDOM.nextLong((Long.MAX_VALUE - 1) >> LONG_MASK_WIDTH); long j = RANDOM.nextLong(); - long res = addShiftMaskLongCheckIndex(i, j, (i << 2) + 1); - if (res != (j & 3)) { + long res = addShiftMaskLongCheckIndex(i, j, (i << LONG_MASK_WIDTH) + 1); + if (res != (j & LONG_MASK)) { throw new RuntimeException("incorrect result: " + res); } } @@ -406,15 +560,15 @@ public class TestShiftAndMask { @Test @IR(failOn = { IRNode.AND_I, IRNode.ADD_I }) public static int addShiftMaskIntCheckIndex2(int i, int j, int length) { - return (Objects.checkIndex(j << 2, length) + Objects.checkIndex(i << 2, length)) & 3; // transformed to: return 0; + return (Objects.checkIndex(j << INT_MASK_WIDTH, length) + Objects.checkIndex(i << INT_MASK_WIDTH, length)) & INT_MASK; // transformed to: return 0; } @Run(test = "addShiftMaskIntCheckIndex2") public static void addShiftMaskIntCheckIndex2_runner() { - int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - int j = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - int res = addShiftMaskIntCheckIndex2(i, j, (Integer.max(i, j) << 2) + 1); + int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); + int j = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); + int res = addShiftMaskIntCheckIndex2(i, j, (Integer.max(i, j) << INT_MASK_WIDTH) + 1); if (res != 0) { throw new RuntimeException("incorrect result: " + res); } @@ -423,14 +577,14 @@ public class TestShiftAndMask { @Test @IR(failOn = { IRNode.AND_L, IRNode.ADD_L }) public static long addShiftMaskLongCheckIndex2(long i, long j, long length) { - return (Objects.checkIndex(j << 2, length) + Objects.checkIndex(i << 2, length)) & 3; // transformed to: return 0; + return (Objects.checkIndex(j << LONG_MASK_WIDTH, length) + Objects.checkIndex(i << LONG_MASK_WIDTH, length)) & LONG_MASK; // transformed to: return 0; } @Run(test = "addShiftMaskLongCheckIndex2") public static void addShiftMaskLongCheckIndex2_runner() { - long i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - long j = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - long res = addShiftMaskLongCheckIndex2(i, j, (Long.max(i, j) << 2) + 1); + long i = RANDOM.nextLong((Long.MAX_VALUE - 1) >> LONG_MASK_WIDTH); + long j = RANDOM.nextLong((Long.MAX_VALUE - 1) >> LONG_MASK_WIDTH); + long res = addShiftMaskLongCheckIndex2(i, j, (Long.max(i, j) << LONG_MASK_WIDTH) + 1); if (res != 0) { throw new RuntimeException("incorrect result: " + res); } @@ -439,13 +593,13 @@ public class TestShiftAndMask { @Test @IR(failOn = { IRNode.AND_L, IRNode.CONV_I2L }) public static long shiftConvMaskCheckIndex(int i, int length) { - return ((long)Objects.checkIndex(i << 2, length)) & 3; // transformed to: return 0; + return ((long) Objects.checkIndex(i << INT_MASK_WIDTH, length)) & INT_MASK; // transformed to: return 0; } @Run(test = "shiftConvMaskCheckIndex") public static void shiftConvMaskCheckIndex_runner() { - int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - long res = shiftConvMaskCheckIndex(i, (i << 2) + 1); + int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); + long res = shiftConvMaskCheckIndex(i, (i << INT_MASK_WIDTH) + 1); if (res != 0) { throw new RuntimeException("incorrect result: " + res); } @@ -455,15 +609,15 @@ public class TestShiftAndMask { @IR(counts = { IRNode.AND_L, "1" }) @IR(failOn = { IRNode.ADD_L, IRNode.CONV_I2L }) public static long addShiftConvMaskCheckIndex(int i, long j, int length) { - return (j + Objects.checkIndex(i << 2, length)) & 3; // transformed to: return j & 3; + return (j + Objects.checkIndex(i << INT_MASK_WIDTH, length)) & INT_MASK; // transformed to: return j & INT_MASK; } @Run(test = "addShiftConvMaskCheckIndex") public static void addShiftConvMaskCheckIndex_runner() { - int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); + int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); long j = RANDOM.nextLong(); - long res = addShiftConvMaskCheckIndex(i, j, (i << 2) + 1); - if (res != (j & 3)) { + long res = addShiftConvMaskCheckIndex(i, j, (i << INT_MASK_WIDTH) + 1); + if (res != (j & INT_MASK)) { throw new RuntimeException("incorrect result: " + res); } } @@ -471,14 +625,14 @@ public class TestShiftAndMask { @Test @IR(failOn = { IRNode.AND_L, IRNode.ADD_L }) public static long addShiftConvMaskCheckIndex2(int i, int j, int length) { - return (((long)Objects.checkIndex(j << 2, length)) + ((long)Objects.checkIndex(i << 2, length))) & 3; // transformed to: return 0; + return (((long) Objects.checkIndex(j << INT_MASK_WIDTH, length)) + ((long) Objects.checkIndex(i << INT_MASK_WIDTH, length))) & INT_MASK; // transformed to: return 0; } @Run(test = "addShiftConvMaskCheckIndex2") public static void addShiftConvMaskCheckIndex2_runner() { - int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - int j = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> 2); - long res = addShiftConvMaskCheckIndex2(i, j, (Integer.max(i, j) << 2) + 1); + int i = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); + int j = RANDOM.nextInt((Integer.MAX_VALUE - 1) >> INT_MASK_WIDTH); + long res = addShiftConvMaskCheckIndex2(i, j, (Integer.max(i, j) << INT_MASK_WIDTH) + 1); if (res != 0) { throw new RuntimeException("incorrect result: " + res); } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestEquivalentInvariants.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestEquivalentInvariants.java index a9b158f1c09..09b087bee54 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestEquivalentInvariants.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestEquivalentInvariants.java @@ -173,9 +173,13 @@ public class TestEquivalentInvariants { return testMemorySegmentIInvarL3e(data, 1, 2, 3, RANGE-200); }); tests.put("testMemorySegmentIInvarL3f", () -> { - MemorySegment data = MemorySegment.ofArray(aI.clone()); + MemorySegment data = MemorySegment.ofArray(aL.clone()); return testMemorySegmentIInvarL3f(data, 1, 2, 3, RANGE-200); }); + tests.put("testMemorySegmentIInvarL3g", () -> { + MemorySegment data = MemorySegment.ofArray(aI.clone()); + return testMemorySegmentIInvarL3g(data, 1, 2, 3, RANGE-200); + }); tests.put("testMemorySegmentLInvarL3a", () -> { MemorySegment data = MemorySegment.ofArray(aL.clone()); return testMemorySegmentLInvarL3a(data, 1, 2, 3, RANGE-200); @@ -246,6 +250,7 @@ public class TestEquivalentInvariants { "testMemorySegmentIInvarL3d3", "testMemorySegmentIInvarL3e", "testMemorySegmentIInvarL3f", + "testMemorySegmentIInvarL3g", "testMemorySegmentLInvarL3a", "testMemorySegmentLInvarL3b", "testMemorySegmentLInvarL3c", @@ -681,12 +686,17 @@ public class TestEquivalentInvariants { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // Would be nice if it vectorized. - // Fails because of control flow. Somehow the "offsetPlain" check (checks for alignment) is not folded away. + // With AlignVector (strict alignment requirements): we cannot prove that the invariants are alignable -> no vectorization. static Object[] testMemorySegmentIInvarL3d(MemorySegment m, int invar1, int invar2, int invar3, int size) { long i1 = (long)(-invar1 + invar2 + invar3); long i2 = (long)(invar2 + invar3 - invar1); // equivalent @@ -700,12 +710,17 @@ public class TestEquivalentInvariants { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // Would be nice if it vectorized. - // Fails because of control flow. Somehow the "offsetPlain" check (checks for alignment) is not folded away. + // With AlignVector (strict alignment requirements): we cannot prove that the invariants are alignable -> no vectorization. static Object[] testMemorySegmentIInvarL3d2(MemorySegment m, int invar1, int invar2, int invar3, int size) { long i1 = (long)(-invar1 + invar2 + invar3); for (int i = 0; i < size; i+=2) { @@ -735,6 +750,31 @@ public class TestEquivalentInvariants { return new Object[]{ m }; } + @Test + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", + IRNode.STORE_VECTOR, "= 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // With AlignVector (strict alignment requirements): we cannot prove that the invariants are alignable -> no vectorization. + static Object[] testMemorySegmentIInvarL3e(MemorySegment m, int invar1, int invar2, int invar3, int size) { + long i1 = (long)(-invar1 + invar2 + invar3); + long i2 = (long)(invar2 + invar3) - (long)(invar1); // not equivalent + for (int i = 0; i < size; i+=2) { + int v0 = m.getAtIndex(ValueLayout.JAVA_INT, i + i1 + 0); + int v1 = m.getAtIndex(ValueLayout.JAVA_INT, i + i2 + 1); + m.setAtIndex(ValueLayout.JAVA_INT, i + i1 + 0, v0 + 1); + m.setAtIndex(ValueLayout.JAVA_INT, i + i2 + 1, v1 + 1); + } + return new Object[]{ m }; + } + + // Same as testMemorySegmentIInvarL3e, but with long[] input. @Test @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", IRNode.STORE_VECTOR, "= 0"}, @@ -743,7 +783,7 @@ public class TestEquivalentInvariants { // Should never vectorize, since i1 and i2 are not guaranteed to be adjacent // invar2 + invar3 could overflow, and the address be valid with and without overflow. // So both addresses are valid, and not adjacent. - static Object[] testMemorySegmentIInvarL3e(MemorySegment m, int invar1, int invar2, int invar3, int size) { + static Object[] testMemorySegmentIInvarL3f(MemorySegment m, int invar1, int invar2, int invar3, int size) { long i1 = (long)(-invar1 + invar2 + invar3); long i2 = (long)(invar2 + invar3) - (long)(invar1); // not equivalent for (int i = 0; i < size; i+=2) { @@ -762,7 +802,7 @@ public class TestEquivalentInvariants { applyIfPlatform = {"64-bit", "true"}, applyIf = {"AlignVector", "false"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - static Object[] testMemorySegmentIInvarL3f(MemorySegment m, long invar1, long invar2, long invar3, int size) { + static Object[] testMemorySegmentIInvarL3g(MemorySegment m, long invar1, long invar2, long invar3, int size) { long i1 = -invar1 + invar2 + invar3; long i2 = invar2 + invar3 - invar1; // equivalent for (int i = 0; i < size; i++) { @@ -825,12 +865,17 @@ public class TestEquivalentInvariants { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // Would be nice if it vectorized. - // Fails because of control flow. Somehow the "offsetPlain" check (checks for alignment) is not folded away. + // With AlignVector (strict alignment requirements): we cannot prove that the invariants are alignable -> no vectorization. static Object[] testMemorySegmentLInvarL3d(MemorySegment m, int invar1, int invar2, int invar3, int size) { long i1 = (long)(-invar1 + invar2 + invar3); long i2 = (long)(invar2 + invar3 - invar1); // equivalent @@ -844,12 +889,17 @@ public class TestEquivalentInvariants { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // Would be nice if it vectorized. - // Fails because of control flow. Somehow the "offsetPlain" check (checks for alignment) is not folded away. + // With AlignVector (strict alignment requirements): we cannot prove that the invariants are alignable -> no vectorization. static Object[] testMemorySegmentLInvarL3d2(MemorySegment m, int invar1, int invar2, int invar3, int size) { long i1 = (long)(-invar1 + invar2 + invar3); for (int i = 0; i < size; i+=2) { @@ -880,11 +930,17 @@ public class TestEquivalentInvariants { } @Test + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", + IRNode.STORE_VECTOR, "> 0"}, + applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", IRNode.STORE_VECTOR, "= 0"}, applyIfPlatform = {"64-bit", "true"}, + applyIf = {"AlignVector", "true"}, applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) - // FAILS: should be ok to vectorize, but does not. Investigate in JDK-8331659. + // With AlignVector (strict alignment requirements): we cannot prove that the invariants are alignable -> no vectorization. static Object[] testMemorySegmentLInvarL3e(MemorySegment m, int invar1, int invar2, int invar3, int size) { long i1 = (long)(-invar1 + invar2 + invar3); long i2 = (long)(invar2 + invar3) - (long)(invar1); // not equivalent diff --git a/test/hotspot/jtreg/compiler/vectorization/TestPopulateIndex.java b/test/hotspot/jtreg/compiler/vectorization/TestPopulateIndex.java index e23accd27b0..afe733bad4b 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestPopulateIndex.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestPopulateIndex.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -78,17 +78,17 @@ public class TestPopulateIndex { } @Test - @IR(counts = {IRNode.POPULATE_INDEX, "> 0"}) + // Does not vectorize, possibly because the OrI is pushed through the Phi, see also JDK-8348096. public void exprWithIndex1() { for (int i = 0; i < count; i++) { - dst[i] = src[i] * (i & 7); + dst[i] = src[i] * (i | 7); } checkResultExprWithIndex1(); } public void checkResultExprWithIndex1() { for (int i = 0; i < count; i++) { - int expected = src[i] * (i & 7); + int expected = src[i] * (i | 7); if (dst[i] != expected) { throw new RuntimeException("Invalid result: dst[" + i + "] = " + dst[i] + " != " + expected); } @@ -112,4 +112,26 @@ public class TestPopulateIndex { } } } + + @Test + // Does not vectorize: due to sum-under-mask optimization. + // (i+0) & 7, (i+1) & 7 ... (i+8) & 7 .... -> PopulateIndex + // becomes + // (i+0) & 7, (i+1) & 7 ... (i+0) & 7 .... -> pattern broken + // See JDK-8349128. + public void exprWithIndex3() { + for (int i = 0; i < count; i++) { + dst[i] = src[i] * (i & 7); + } + checkResultExprWithIndex3(); + } + + public void checkResultExprWithIndex3() { + for (int i = 0; i < count; i++) { + int expected = src[i] * (i & 7); + if (dst[i] != expected) { + throw new RuntimeException("Invalid result: dst[" + i + "] = " + dst[i] + " != " + expected); + } + } + } }