mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8373453: C2 SuperWord: must handle load slices that have loads with different memory inputs
Reviewed-by: kvn, thartmann, qamai
This commit is contained in:
parent
929864b1a4
commit
da14813a5b
@ -187,7 +187,10 @@ VStatus VLoopAnalyzer::setup_submodules_helper() {
|
||||
return body_status;
|
||||
}
|
||||
|
||||
_memory_slices.find_memory_slices();
|
||||
VStatus slices_status = _memory_slices.find_memory_slices();
|
||||
if (!slices_status.is_success()) {
|
||||
return slices_status;
|
||||
}
|
||||
|
||||
// If there is no memory slice detected, it means there is no store.
|
||||
// If there is no reduction and no store, then we give up, because
|
||||
@ -207,9 +210,11 @@ VStatus VLoopAnalyzer::setup_submodules_helper() {
|
||||
}
|
||||
|
||||
// There are 2 kinds of slices:
|
||||
// - No memory phi: only loads. All have the same input memory state from before the loop.
|
||||
// - No memory phi: only loads.
|
||||
// - Usually, all loads have the same input memory state from before the loop.
|
||||
// - Only rarely this is not the case, and we just bail out for now.
|
||||
// - With memory phi. Chain of memory operations inside the loop.
|
||||
void VLoopMemorySlices::find_memory_slices() {
|
||||
VStatus VLoopMemorySlices::find_memory_slices() {
|
||||
Compile* C = _vloop.phase()->C;
|
||||
// We iterate over the body, which is topologically sorted. Hence, if there is a phi
|
||||
// in a slice, we will find it first, and the loads and stores afterwards.
|
||||
@ -228,8 +233,15 @@ void VLoopMemorySlices::find_memory_slices() {
|
||||
PhiNode* head = _heads.at(alias_idx);
|
||||
if (head == nullptr) {
|
||||
// We did not find a phi on this slice yet -> must be a slice with only loads.
|
||||
assert(_inputs.at(alias_idx) == nullptr || _inputs.at(alias_idx) == load->in(1),
|
||||
"not yet touched or the same input");
|
||||
// For now, we can only handle slices with a single memory input before the loop,
|
||||
// so if we find multiple, we bail out of auto vectorization. If this becomes
|
||||
// too restrictive in the fututure, we could consider tracking multiple inputs.
|
||||
// Different memory inputs can for example happen if one load has its memory state
|
||||
// optimized, and the other load fails to have it optimized, for example because
|
||||
// it does not end up on the IGVN worklist any more.
|
||||
if (_inputs.at(alias_idx) != nullptr && _inputs.at(alias_idx) != load->in(1)) {
|
||||
return VStatus::make_failure(FAILURE_DIFFERENT_MEMORY_INPUT);
|
||||
}
|
||||
_inputs.at_put(alias_idx, load->in(1));
|
||||
} // else: the load belongs to a slice with a phi that already set heads and inputs.
|
||||
#ifdef ASSERT
|
||||
@ -243,6 +255,7 @@ void VLoopMemorySlices::find_memory_slices() {
|
||||
}
|
||||
}
|
||||
NOT_PRODUCT( if (_vloop.is_trace_memory_slices()) { print(); } )
|
||||
return VStatus::make_success();
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -504,6 +504,8 @@ private:
|
||||
//
|
||||
class VLoopMemorySlices : public StackObj {
|
||||
private:
|
||||
static constexpr char const* FAILURE_DIFFERENT_MEMORY_INPUT = "Load only slice has multiple memory inputs";
|
||||
|
||||
const VLoop& _vloop;
|
||||
const VLoopBody& _body;
|
||||
|
||||
@ -521,7 +523,7 @@ public:
|
||||
const GrowableArray<Node*>& inputs() const { return _inputs; }
|
||||
const GrowableArray<PhiNode*>& heads() const { return _heads; }
|
||||
|
||||
void find_memory_slices();
|
||||
VStatus find_memory_slices();
|
||||
void get_slice_in_reverse_order(PhiNode* head, MemNode* tail, GrowableArray<MemNode*>& slice) const;
|
||||
bool same_memory_slice(MemNode* m1, MemNode* m2) const;
|
||||
|
||||
|
||||
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=all-flags
|
||||
* @summary Test a case where we can have one memory slice that has only loads,
|
||||
* but the loads from the slice do not have all the same input memory
|
||||
* state from before the loop. This is rather rare but it can happen.
|
||||
* @bug 8373453
|
||||
* @run main/othervm
|
||||
* -XX:CompileCommand=compileonly,${test.main.class}::test
|
||||
* -Xbatch -XX:-TieredCompilation
|
||||
* ${test.main.class}
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=fewer-flags
|
||||
* @bug 8373453
|
||||
* @run main/othervm
|
||||
* -XX:CompileCommand=compileonly,${test.main.class}::test
|
||||
* ${test.main.class}
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=vanilla
|
||||
* @bug 8373453
|
||||
* @run main ${test.main.class}
|
||||
*/
|
||||
|
||||
package compiler.loopopts.superword;
|
||||
|
||||
public class TestLoadSliceWithMultipleMemoryInputStates {
|
||||
static void test() {
|
||||
// The relevant slice is the value field of the Byte Objects.
|
||||
Byte x = 1;
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if ((i & 1) == 0) {
|
||||
// Not sure what this loop is needed for, but it is very sensitive,
|
||||
// I cannot even replace N with 32.
|
||||
int N = 32;
|
||||
for (int j = 0; j < N; j++) {
|
||||
if (j == 1) {
|
||||
x = (byte) x;
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < 32; j++) {
|
||||
// The call below has an effect on the memory state
|
||||
// If we optimize the Load for Byte::value, we can bypass
|
||||
// this call, since we know that Byte::value cannot be
|
||||
// modified during the call.
|
||||
Object o = 1;
|
||||
o.toString();
|
||||
|
||||
for (int k = 0; k < 32; k++) { // OSR around here
|
||||
// Loads of x byte field have different memory input states
|
||||
// This is because some loads can split their memory state
|
||||
// through a phi further up, and others are not put back on
|
||||
// the IGVN worklist and are thus not optimized and keep
|
||||
// the old memory state. Both are correct though.
|
||||
x = (byte) (x + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
for (int i = 0; i < 10_000; i++) {
|
||||
test();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user