mirror of
https://github.com/openjdk/jdk.git
synced 2026-05-12 14:39:49 +00:00
8298935: fix independence bug in create_pack logic in SuperWord::find_adjacent_refs
Reviewed-by: kvn, jbhateja
This commit is contained in:
parent
3d77e217b2
commit
01e6920581
@ -539,6 +539,8 @@ bool SuperWord::SLP_extract() {
|
||||
|
||||
filter_packs();
|
||||
|
||||
DEBUG_ONLY(verify_packs();)
|
||||
|
||||
schedule();
|
||||
|
||||
// Record eventual count of vector packs for checks in post loop vectorization
|
||||
@ -640,54 +642,10 @@ void SuperWord::find_adjacent_refs() {
|
||||
}
|
||||
}
|
||||
|
||||
// Create initial pack pairs of memory operations for which
|
||||
// alignment is set and vectors will be aligned.
|
||||
bool create_pack = true;
|
||||
if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) {
|
||||
if (vectors_should_be_aligned()) {
|
||||
int vw = vector_width(mem_ref);
|
||||
int vw_best = vector_width(best_align_to_mem_ref);
|
||||
if (vw > vw_best) {
|
||||
// Do not vectorize a memory access with more elements per vector
|
||||
// if unaligned memory access is not allowed because number of
|
||||
// iterations in pre-loop will be not enough to align it.
|
||||
create_pack = false;
|
||||
} else {
|
||||
SWPointer p2(best_align_to_mem_ref, this, nullptr, false);
|
||||
if (!align_to_ref_p.invar_equals(p2)) {
|
||||
// Do not vectorize memory accesses with different invariants
|
||||
// if unaligned memory accesses are not allowed.
|
||||
create_pack = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (same_memory_slice(best_align_to_mem_ref, mem_ref)) {
|
||||
// Can't allow vectorization of unaligned memory accesses with the
|
||||
// same memory slice since it could be overlapped accesses to the same array.
|
||||
create_pack = false;
|
||||
} else {
|
||||
// Allow independent (different type) unaligned memory operations
|
||||
// if HW supports them.
|
||||
if (vectors_should_be_aligned()) {
|
||||
create_pack = false;
|
||||
} else {
|
||||
// Check if packs of the same memory slice but
|
||||
// with a different alignment were created before.
|
||||
for (uint i = 0; i < align_to_refs.size(); i++) {
|
||||
MemNode* mr = align_to_refs.at(i)->as_Mem();
|
||||
if (mr == mem_ref) {
|
||||
// Skip when we are looking at same memory operation.
|
||||
continue;
|
||||
}
|
||||
if (same_memory_slice(mem_ref, mr) &&
|
||||
memory_alignment(mr, iv_adjustment) != 0)
|
||||
create_pack = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (create_pack) {
|
||||
if (can_create_pairs(mem_ref, iv_adjustment, align_to_ref_p,
|
||||
best_align_to_mem_ref, best_iv_adjustment,
|
||||
align_to_refs)) {
|
||||
// Create initial pack pairs of memory operations for which alignment was set.
|
||||
for (uint i = 0; i < memops.size(); i++) {
|
||||
Node* s1 = memops.at(i);
|
||||
int align = alignment(s1);
|
||||
@ -707,7 +665,9 @@ void SuperWord::find_adjacent_refs() {
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { // Don't create unaligned pack
|
||||
} else {
|
||||
// Cannot create pairs for mem_ref. Reject all related memops forever.
|
||||
|
||||
// First, remove remaining memory ops of the same memory slice from the list.
|
||||
for (int i = memops.size() - 1; i >= 0; i--) {
|
||||
MemNode* s = memops.at(i)->as_Mem();
|
||||
@ -794,6 +754,96 @@ void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best
|
||||
}
|
||||
#endif
|
||||
|
||||
// Check if we can create the pack pairs for mem_ref:
|
||||
// If required, enforce strict alignment requirements of hardware.
|
||||
// Else, only enforce alignment within a memory slice, so that there cannot be any
|
||||
// memory-dependence between different vector "lanes".
|
||||
bool SuperWord::can_create_pairs(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p,
|
||||
MemNode* best_align_to_mem_ref, int best_iv_adjustment,
|
||||
Node_List &align_to_refs) {
|
||||
bool is_aligned_with_best = memory_alignment(mem_ref, best_iv_adjustment) == 0;
|
||||
|
||||
if (vectors_should_be_aligned()) {
|
||||
// All vectors need to be memory aligned, modulo their vector_width. This is more strict
|
||||
// than the hardware probably requires. Most hardware at most requires 4-byte alignment.
|
||||
//
|
||||
// In the pre-loop, we align best_align_to_mem_ref to its vector_length. To ensure that
|
||||
// all mem_ref's are memory aligned modulo their vector_width, we only need to check that
|
||||
// they are all aligned to best_align_to_mem_ref, modulo their vector_width. For that,
|
||||
// we check the following 3 conditions.
|
||||
|
||||
// (1) All packs are aligned with best_align_to_mem_ref.
|
||||
if (!is_aligned_with_best) {
|
||||
return false;
|
||||
}
|
||||
// (2) All other vectors have vector_size less or equal to that of best_align_to_mem_ref.
|
||||
int vw = vector_width(mem_ref);
|
||||
int vw_best = vector_width(best_align_to_mem_ref);
|
||||
if (vw > vw_best) {
|
||||
// We only align to vector_width of best_align_to_mem_ref during pre-loop.
|
||||
// A mem_ref with a larger vector_width might thus not be vector_width aligned.
|
||||
return false;
|
||||
}
|
||||
// (3) Ensure that all vectors have the same invariant. We model memory accesses like this
|
||||
// address = base + k*iv + constant [+ invar]
|
||||
// memory_alignment ignores the invariant.
|
||||
SWPointer p2(best_align_to_mem_ref, this, nullptr, false);
|
||||
if (!align_to_ref_p.invar_equals(p2)) {
|
||||
// Do not vectorize memory accesses with different invariants
|
||||
// if unaligned memory accesses are not allowed.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
// Alignment is not required by the hardware.
|
||||
|
||||
// However, we need to ensure that the pack for mem_ref is independent, i.e. all members
|
||||
// of the pack are mutually independent.
|
||||
|
||||
if (_do_vector_loop) {
|
||||
// Wait until combine_packs to check independence of packs. For now we just know that
|
||||
// the adjacent pairs are independent. This allows us to vectorize when we do not have
|
||||
// alignment modulo vector_width. For example (forward read):
|
||||
// for (int i ...) { v[i] = v[i + 1] + 5; }
|
||||
// The following will be filtered out in combine_packs (forward write):
|
||||
// for (int i ...) { v[i + 1] = v[i] + 5; }
|
||||
return true;
|
||||
}
|
||||
|
||||
// If all mem_ref's are modulo vector_width aligned with all other mem_ref's of their
|
||||
// memory slice, then the VectorLoad / VectorStore regions are either exactly overlapping
|
||||
// or completely non-overlapping. This ensures that there cannot be memory-dependencies
|
||||
// between different vector "lanes".
|
||||
// During SuperWord::filter_packs -> SuperWord::profitable -> SuperWord::is_vector_use,
|
||||
// we check that all inputs are vectors that match on every element (with some reasonable
|
||||
// exceptions). This ensures that every "lane" is isomorpic and independent to all other
|
||||
// "lanes". This allows us to vectorize these cases:
|
||||
// for (int i ...) { v[i] = v[i] + 5; } // same alignment
|
||||
// for (int i ...) { v[i] = v[i + 32] + 5; } // alignment modulo vector_width
|
||||
if (same_memory_slice(mem_ref, best_align_to_mem_ref)) {
|
||||
return is_aligned_with_best;
|
||||
} else {
|
||||
return is_mem_ref_aligned_with_same_memory_slice(mem_ref, iv_adjustment, align_to_refs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if alignment of mem_ref is consistent with the other packs of the same memory slice
|
||||
bool SuperWord::is_mem_ref_aligned_with_same_memory_slice(MemNode* mem_ref, int iv_adjustment,
|
||||
Node_List &align_to_refs) {
|
||||
for (uint i = 0; i < align_to_refs.size(); i++) {
|
||||
MemNode* mr = align_to_refs.at(i)->as_Mem();
|
||||
if (mr != mem_ref &&
|
||||
same_memory_slice(mr, mem_ref) &&
|
||||
memory_alignment(mr, iv_adjustment) != 0) {
|
||||
// mem_ref is misaligned with mr, another ref of the same memory slice.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// No misalignment found.
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------find_align_to_ref---------------------------
|
||||
// Find a memory reference to align the loop induction variable to.
|
||||
// Looks first at stores then at loads, looking for a memory reference
|
||||
@ -1326,6 +1376,44 @@ bool SuperWord::independent(Node* s1, Node* s2) {
|
||||
return independent_path(shallow, deep);
|
||||
}
|
||||
|
||||
//------------------------------find_dependence---------------------
|
||||
// Is any s1 in p dependent on any s2 in p? Yes: return such a s2. No: return nullptr.
|
||||
// We could query independent(s1, s2) for all pairs, but that results
|
||||
// in O(p.size * p.size) graph traversals. We can do it all in one BFS!
|
||||
// Start the BFS traversal at all nodes from the pack. Traverse DepPreds
|
||||
// recursively, for nodes that have at least depth min_d, which is the
|
||||
// smallest depth of all nodes from the pack. Once we have traversed all
|
||||
// those nodes, and have not found another node from the pack, we know
|
||||
// that all nodes in the pack are independent.
|
||||
Node* SuperWord::find_dependence(Node_List* p) {
|
||||
if (p->at(0)->is_reduction()) {
|
||||
return nullptr; // ignore reductions
|
||||
}
|
||||
ResourceMark rm;
|
||||
Unique_Node_List worklist; // traversal queue
|
||||
int min_d = depth(p->at(0));
|
||||
visited_clear();
|
||||
for (uint k = 0; k < p->size(); k++) {
|
||||
Node* n = p->at(k);
|
||||
min_d = MIN2(min_d, depth(n));
|
||||
worklist.push(n); // start traversal at all nodes in p
|
||||
visited_set(n); // mark node
|
||||
}
|
||||
for (uint i = 0; i < worklist.size(); i++) {
|
||||
Node* n = worklist.at(i);
|
||||
for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
|
||||
Node* pred = preds.current();
|
||||
if (in_bb(pred) && depth(pred) >= min_d) {
|
||||
if (visited_test(pred)) { // marked as in p?
|
||||
return pred;
|
||||
}
|
||||
worklist.push(pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//--------------------------have_similar_inputs-----------------------
|
||||
// For a node pair (s1, s2) which is isomorphic and independent,
|
||||
// do s1 and s2 have similar input edges?
|
||||
@ -1483,8 +1571,10 @@ bool SuperWord::follow_use_defs(Node_List* p) {
|
||||
int align = alignment(s1);
|
||||
Node* t1 = s1->in(j);
|
||||
Node* t2 = s2->in(j);
|
||||
if (!in_bb(t1) || !in_bb(t2))
|
||||
if (!in_bb(t1) || !in_bb(t2) || t1->is_Mem() || t2->is_Mem()) {
|
||||
// Only follow non-memory nodes in block - we do not want to resurrect misaligned packs.
|
||||
continue;
|
||||
}
|
||||
align = adjust_alignment_for_type_conversion(s1, t1, align);
|
||||
if (stmts_can_pack(t1, t2, align)) {
|
||||
if (est_savings(t1, t2) >= 0) {
|
||||
@ -1522,10 +1612,16 @@ bool SuperWord::follow_def_uses(Node_List* p) {
|
||||
for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
|
||||
Node* t1 = s1->fast_out(i);
|
||||
num_s1_uses++;
|
||||
if (!in_bb(t1)) continue;
|
||||
if (!in_bb(t1) || t1->is_Mem()) {
|
||||
// Only follow non-memory nodes in block - we do not want to resurrect misaligned packs.
|
||||
continue;
|
||||
}
|
||||
for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
|
||||
Node* t2 = s2->fast_out(j);
|
||||
if (!in_bb(t2)) continue;
|
||||
if (!in_bb(t2) || t2->is_Mem()) {
|
||||
// Only follow non-memory nodes in block - we do not want to resurrect misaligned packs.
|
||||
continue;
|
||||
}
|
||||
if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv
|
||||
if (!opnd_positions_match(s1, t1, s2, t2))
|
||||
continue;
|
||||
@ -1714,7 +1810,6 @@ void SuperWord::combine_packs() {
|
||||
for (int j = i + 1; j < _packset.length(); j++) {
|
||||
Node_List* p2 = _packset.at(j);
|
||||
if (p2 == nullptr) continue;
|
||||
if (i == j) continue;
|
||||
if (p1->at(p1->size()-1) == p2->at(0)) {
|
||||
for (uint k = 1; k < p2->size(); k++) {
|
||||
p1->push(p2->at(k));
|
||||
@ -1755,6 +1850,32 @@ void SuperWord::combine_packs() {
|
||||
}
|
||||
}
|
||||
|
||||
if (_do_vector_loop) {
|
||||
// Since we did not enforce exact alignment of the packsets, we only know that there
|
||||
// is no dependence with distance 1, because we have checked independent(s1, s2) for
|
||||
// all adjacent memops. But there could be a dependence of a different distance.
|
||||
// Hence: remove the pack if there is a dependence.
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
Node_List* p = _packset.at(i);
|
||||
if (p != nullptr) {
|
||||
Node* dependence = find_dependence(p);
|
||||
if (dependence != nullptr) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceSuperWord) {
|
||||
tty->cr();
|
||||
tty->print_cr("WARNING: Found dependency.");
|
||||
tty->print_cr("Cannot vectorize despite compile directive Vectorize.");
|
||||
dependence->dump();
|
||||
tty->print_cr("In pack[%d]", i);
|
||||
print_pack(p);
|
||||
}
|
||||
#endif
|
||||
_packset.at_put(i, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compress list.
|
||||
for (int i = _packset.length() - 1; i >= 0; i--) {
|
||||
Node_List* p1 = _packset.at(i);
|
||||
@ -1773,7 +1894,6 @@ void SuperWord::combine_packs() {
|
||||
// Construct the map from nodes to packs. Only valid after the
|
||||
// point where a node is only in one pack (after combine_packs).
|
||||
void SuperWord::construct_my_pack_map() {
|
||||
Node_List* rslt = nullptr;
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
Node_List* p = _packset.at(i);
|
||||
for (uint j = 0; j < p->size(); j++) {
|
||||
@ -2203,6 +2323,29 @@ bool SuperWord::profitable(Node_List* p) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void SuperWord::verify_packs() {
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
Node_List* p = _packset.at(i);
|
||||
Node* dependence = find_dependence(p);
|
||||
if (dependence != nullptr) {
|
||||
tty->print_cr("Other nodes in pack have dependence on:");
|
||||
dependence->dump();
|
||||
tty->print_cr("The following nodes are not independent:");
|
||||
for (uint k = 0; k < p->size(); k++) {
|
||||
Node* n = p->at(k);
|
||||
if (!independent(n, dependence)) {
|
||||
n->dump();
|
||||
}
|
||||
}
|
||||
tty->print_cr("They are all from pack[%d]", i);
|
||||
print_pack(p);
|
||||
}
|
||||
assert(dependence == nullptr, "all nodes in pack must be mutually independent");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------schedule---------------------------
|
||||
// Adjust the memory graph for the packed operations
|
||||
void SuperWord::schedule() {
|
||||
@ -4027,7 +4170,11 @@ void SuperWord::print_packset() {
|
||||
for (int i = 0; i < _packset.length(); i++) {
|
||||
tty->print_cr("Pack: %d", i);
|
||||
Node_List* p = _packset.at(i);
|
||||
print_pack(p);
|
||||
if (p == nullptr) {
|
||||
tty->print_cr(" nullptr");
|
||||
} else {
|
||||
print_pack(p);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -476,6 +476,15 @@ class SuperWord : public ResourceObj {
|
||||
void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment);
|
||||
void print_loop(bool whole);
|
||||
#endif
|
||||
// Check if we can create the pack pairs for mem_ref:
|
||||
// If required, enforce strict alignment requirements of hardware.
|
||||
// Else, only enforce alignment within a memory slice, so that there cannot be any
|
||||
// memory-dependence between different vector "lanes".
|
||||
bool can_create_pairs(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p,
|
||||
MemNode* best_align_to_mem_ref, int best_iv_adjustment,
|
||||
Node_List &align_to_refs);
|
||||
// Check if alignment of mem_ref is consistent with the other packs of the same memory slice.
|
||||
bool is_mem_ref_aligned_with_same_memory_slice(MemNode* mem_ref, int iv_adjustment, Node_List &align_to_refs);
|
||||
// Find a memory reference to align the loop induction variable to.
|
||||
MemNode* find_align_to_ref(Node_List &memops, int &idx);
|
||||
// Calculate loop's iv adjustment for this memory ops.
|
||||
@ -512,6 +521,8 @@ class SuperWord : public ResourceObj {
|
||||
bool isomorphic(Node* s1, Node* s2);
|
||||
// Is there no data path from s1 to s2 or s2 to s1?
|
||||
bool independent(Node* s1, Node* s2);
|
||||
// Is any s1 in p dependent on any s2 in p? Yes: return such a s2. No: return nullptr.
|
||||
Node* find_dependence(Node_List* p);
|
||||
// For a node pair (s1, s2) which is isomorphic and independent,
|
||||
// do s1 and s2 have similar input edges?
|
||||
bool have_similar_inputs(Node* s1, Node* s2);
|
||||
@ -543,6 +554,8 @@ class SuperWord : public ResourceObj {
|
||||
void filter_packs();
|
||||
// Merge CMove into new vector-nodes
|
||||
void merge_packs_to_cmove();
|
||||
// Verify that for every pack, all nodes are mutually independent
|
||||
DEBUG_ONLY(void verify_packs();)
|
||||
// Adjust the memory graph for the packed operations
|
||||
void schedule();
|
||||
// Remove "current" from its current position in the memory graph and insert
|
||||
|
||||
@ -166,6 +166,11 @@ public class IRNode {
|
||||
beforeMatchingNameRegex(ADD_VI, "AddVI");
|
||||
}
|
||||
|
||||
public static final String ADD_VF = PREFIX + "ADD_VF" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(ADD_VF, "AddVF");
|
||||
}
|
||||
|
||||
public static final String ADD_REDUCTION_V = PREFIX + "ADD_REDUCTION_V" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(ADD_REDUCTION_V, "AddReductionV(B|S|I|L|F|D)");
|
||||
@ -714,6 +719,11 @@ public class IRNode {
|
||||
beforeMatchingNameRegex(MUL_VL, "MulVL");
|
||||
}
|
||||
|
||||
public static final String MUL_VI = PREFIX + "MUL_VI" + POSTFIX;
|
||||
static {
|
||||
beforeMatchingNameRegex(MUL_VI, "MulVI");
|
||||
}
|
||||
|
||||
public static final String MUL_REDUCTION_VD = PREFIX + "MUL_REDUCTION_VD" + POSTFIX;
|
||||
static {
|
||||
superWordNodes(MUL_REDUCTION_VD, "MulReductionVD");
|
||||
|
||||
@ -0,0 +1,399 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8298935
|
||||
* @summary Writing forward on array creates cyclic dependency
|
||||
* which leads to wrong result, when ignored.
|
||||
* @requires vm.compiler2.enabled
|
||||
* @library /test/lib /
|
||||
* @run driver TestCyclicDependency
|
||||
*/
|
||||
|
||||
import jdk.test.lib.Asserts;
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
public class TestCyclicDependency {
|
||||
static final int RANGE = 512;
|
||||
static final int ITER = 100;
|
||||
int[] goldI0 = new int[RANGE];
|
||||
float[] goldF0 = new float[RANGE];
|
||||
int[] goldI1 = new int[RANGE];
|
||||
float[] goldF1 = new float[RANGE];
|
||||
int[] goldI2 = new int[RANGE];
|
||||
float[] goldF2 = new float[RANGE];
|
||||
int[] goldI3 = new int[RANGE];
|
||||
float[] goldF3 = new float[RANGE];
|
||||
int[] goldI4 = new int[RANGE];
|
||||
float[] goldF4 = new float[RANGE];
|
||||
int[] goldI5a = new int[RANGE];
|
||||
float[] goldF5a = new float[RANGE];
|
||||
int[] goldI5b = new int[RANGE];
|
||||
float[] goldF5b = new float[RANGE];
|
||||
int[] goldI6a = new int[RANGE];
|
||||
float[] goldF6a = new float[RANGE];
|
||||
int[] goldI6b = new int[RANGE];
|
||||
float[] goldF6b = new float[RANGE];
|
||||
int[] goldI7 = new int[RANGE];
|
||||
float[] goldF7 = new float[RANGE];
|
||||
int[] goldI8 = new int[RANGE];
|
||||
float[] goldF8 = new float[RANGE];
|
||||
int[] goldI9 = new int[RANGE];
|
||||
float[] goldF9 = new float[RANGE];
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.runWithFlags("-XX:CompileCommand=compileonly,TestCyclicDependency::test*");
|
||||
}
|
||||
|
||||
TestCyclicDependency() {
|
||||
// compute the gold standard in interpreter mode
|
||||
// test0
|
||||
init(goldI0, goldF0);
|
||||
test0(goldI0, goldF0);
|
||||
// test1
|
||||
init(goldI1, goldF1);
|
||||
test1(goldI1, goldF1);
|
||||
// test2
|
||||
init(goldI2, goldF2);
|
||||
test2(goldI2, goldF2);
|
||||
// test3
|
||||
init(goldI3, goldF3);
|
||||
test3(goldI3, goldF3);
|
||||
// test4
|
||||
init(goldI4, goldF4);
|
||||
test4(goldI4, goldF4);
|
||||
// test5a
|
||||
init(goldI5a, goldF5a);
|
||||
test5a(goldI5a, goldF5a);
|
||||
// test5b
|
||||
init(goldI5b, goldF5b);
|
||||
test5b(goldI5b, goldF5b);
|
||||
// test6a
|
||||
init(goldI6a, goldF6a);
|
||||
test6a(goldI6a, goldF6a);
|
||||
// test6b
|
||||
init(goldI6b, goldF6b);
|
||||
test6b(goldI6b, goldF6b);
|
||||
// test7
|
||||
init(goldI7, goldF7);
|
||||
test7(goldI7, goldF7);
|
||||
// test8
|
||||
init(goldI8, goldF8);
|
||||
test8(goldI8, goldF8);
|
||||
// test9
|
||||
init(goldI9, goldF9);
|
||||
test9(goldI9, goldF9);
|
||||
}
|
||||
|
||||
@Run(test = "test0")
|
||||
@Warmup(100)
|
||||
public void runTest0() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test0(dataI, dataF);
|
||||
verifyI("test0", dataI, goldI0);
|
||||
verifyF("test0", dataF, goldF0);
|
||||
}
|
||||
|
||||
@Run(test = "test1")
|
||||
@Warmup(100)
|
||||
public void runTest1() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test1(dataI, dataF);
|
||||
verifyI("test1", dataI, goldI1);
|
||||
verifyF("test1", dataF, goldF1);
|
||||
}
|
||||
|
||||
@Run(test = "test2")
|
||||
@Warmup(100)
|
||||
public void runTest2() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test2(dataI, dataF);
|
||||
verifyI("test2", dataI, goldI2);
|
||||
verifyF("test2", dataF, goldF2);
|
||||
}
|
||||
|
||||
@Run(test = "test3")
|
||||
@Warmup(100)
|
||||
public void runTest3() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test3(dataI, dataF);
|
||||
verifyI("test3", dataI, goldI3);
|
||||
verifyF("test3", dataF, goldF3);
|
||||
}
|
||||
|
||||
@Run(test = "test4")
|
||||
@Warmup(100)
|
||||
public void runTest4() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test4(dataI, dataF);
|
||||
verifyI("test4", dataI, goldI4);
|
||||
verifyF("test4", dataF, goldF4);
|
||||
}
|
||||
|
||||
@Run(test = "test5a")
|
||||
@Warmup(100)
|
||||
public void runTest5a() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test5a(dataI, dataF);
|
||||
verifyI("test5a", dataI, goldI5a);
|
||||
verifyF("test5a", dataF, goldF5a);
|
||||
}
|
||||
|
||||
@Run(test = "test5b")
|
||||
@Warmup(100)
|
||||
public void runTest5b() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test5b(dataI, dataF);
|
||||
verifyI("test5b", dataI, goldI5b);
|
||||
verifyF("test5b", dataF, goldF5b);
|
||||
}
|
||||
|
||||
@Run(test = "test6a")
|
||||
@Warmup(100)
|
||||
public void runTest6a() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test6a(dataI, dataF);
|
||||
verifyI("test6a", dataI, goldI6a);
|
||||
verifyF("test6a", dataF, goldF6a);
|
||||
}
|
||||
|
||||
@Run(test = "test6b")
|
||||
@Warmup(100)
|
||||
public void runTest6b() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test6b(dataI, dataF);
|
||||
verifyI("test6b", dataI, goldI6b);
|
||||
verifyF("test6b", dataF, goldF6b);
|
||||
}
|
||||
|
||||
@Run(test = "test7")
|
||||
@Warmup(100)
|
||||
public void runTest7() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test7(dataI, dataF);
|
||||
verifyI("test7", dataI, goldI7);
|
||||
verifyF("test7", dataF, goldF7);
|
||||
}
|
||||
|
||||
@Run(test = "test8")
|
||||
@Warmup(100)
|
||||
public void runTest8() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test8(dataI, dataF);
|
||||
verifyI("test8", dataI, goldI8);
|
||||
verifyF("test8", dataF, goldF8);
|
||||
}
|
||||
|
||||
@Run(test = "test9")
|
||||
@Warmup(100)
|
||||
public void runTest9() {
|
||||
int[] dataI = new int[RANGE];
|
||||
float[] dataF = new float[RANGE];
|
||||
init(dataI, dataF);
|
||||
test9(dataI, dataF);
|
||||
verifyI("test9", dataI, goldI9);
|
||||
verifyF("test9", dataF, goldF9);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test0(int[] dataI, float[] dataF) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
// All perfectly aligned, expect vectorization
|
||||
int v = dataI[i];
|
||||
dataI[i] = v + 5;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test1(int[] dataI, float[] dataF) {
|
||||
for (int i = 0; i < RANGE - 1; i++) {
|
||||
// dataI has cyclic dependency of distance 1
|
||||
int v = dataI[i];
|
||||
dataI[i + 1] = v;
|
||||
dataF[i] = v; // let's not get confused by another type
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test2(int[] dataI, float[] dataF) {
|
||||
for (int i = 0; i < RANGE - 2; i++) {
|
||||
// dataI has cyclic dependency of distance 2
|
||||
int v = dataI[i];
|
||||
dataI[i + 2] = v;
|
||||
dataF[i] = v; // let's not get confused by another type
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test3(int[] dataI, float[] dataF) {
|
||||
for (int i = 0; i < RANGE - 3; i++) {
|
||||
// dataI has cyclic dependency of distance 3
|
||||
int v = dataI[i];
|
||||
dataI[i + 3] = v;
|
||||
dataF[i] = v; // let's not get confused by another type
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test4(int[] dataI, float[] dataF) {
|
||||
for (int i = 1; i < RANGE - 1; i++) {
|
||||
// dataI has cyclic dependency of distance 2
|
||||
int v = dataI[i - 1];
|
||||
dataI[i + 1] = v;
|
||||
dataF[i] = v; // let's not get confused by another type
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test5a(int[] dataI, float[] dataF) {
|
||||
for (int i = 2; i < RANGE; i++) {
|
||||
// dataI has read / write distance 1, but no cyclic dependency
|
||||
int v = dataI[i];
|
||||
dataI[i - 1] = v + 5;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test5b(int[] dataI, float[] dataF) {
|
||||
for (int i = 1; i < RANGE; i++) {
|
||||
// dataI has read / write distance 1, but no cyclic dependency
|
||||
int v = dataI[i];
|
||||
dataI[i - 1] = v;
|
||||
dataF[i] = v; // let's not get confused by another type
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test6a(int[] dataI, float[] dataF) {
|
||||
for (int i = 2; i < RANGE; i++) {
|
||||
// dataI has read / write distance 2, but no cyclic dependency
|
||||
int v = dataI[i];
|
||||
dataI[i - 2] = v + 5;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test6b(int[] dataI, float[] dataF) {
|
||||
for (int i = 2; i < RANGE; i++) {
|
||||
// dataI has read / write distance 2, but no cyclic dependency
|
||||
int v = dataI[i];
|
||||
dataI[i - 2] = v;
|
||||
dataF[i] = v; // let's not get confused by another type
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_VI, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Some aarch64 machines have AlignVector == true, like ThunderX2
|
||||
static void test7(int[] dataI, float[] dataF) {
|
||||
for (int i = 0; i < RANGE - 32; i++) {
|
||||
// write forward 32 -> more than vector size -> can vectorize
|
||||
// write forward 3 -> cannot vectorize
|
||||
// separate types should make decision separately if they vectorize or not
|
||||
int v = dataI[i];
|
||||
dataI[i + 32] = v + 5;
|
||||
float f = dataF[i];
|
||||
dataF[i + 3] = f + 3.5f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_VF, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
// Some aarch64 machines have AlignVector == true, like ThunderX2
|
||||
static void test8(int[] dataI, float[] dataF) {
|
||||
for (int i = 0; i < RANGE - 32; i++) {
|
||||
// write forward 32 -> more than vector size -> can vectorize
|
||||
// write forward 3 -> cannot vectorize
|
||||
// separate types should make decision separately if they vectorize or not
|
||||
int v = dataI[i];
|
||||
dataI[i + 3] = v + 5;
|
||||
float f = dataF[i];
|
||||
dataF[i + 32] = f + 3.5f;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.ADD_REDUCTION_VI, "> 0"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test9(int[] dataI, float[] dataF) {
|
||||
int sI = 666;
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
// self-cycle allowed for reduction
|
||||
sI += dataI[i] * 2; // factor necessary to make it profitable
|
||||
}
|
||||
dataI[0] = sI; // write back
|
||||
}
|
||||
|
||||
public static void init(int[] dataI, float[] dataF) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
dataI[j] = j;
|
||||
dataF[j] = j * 0.5f;
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyI(String name, int[] data, int[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyF(String name, float[] data, float[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -27,10 +27,12 @@
|
||||
* @summary Test vectorization of Streams$RangeIntSpliterator::forEachRemaining
|
||||
* @requires vm.compiler2.enabled & vm.compMode != "Xint"
|
||||
*
|
||||
* @run main compiler.vectorization.TestForEachRem test1
|
||||
* @run main compiler.vectorization.TestForEachRem test2
|
||||
* @run main compiler.vectorization.TestForEachRem test3
|
||||
* @run main compiler.vectorization.TestForEachRem test4
|
||||
* @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test1
|
||||
* @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test2
|
||||
* @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test3
|
||||
* @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test4
|
||||
* @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test5
|
||||
* @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test6
|
||||
*/
|
||||
|
||||
package compiler.vectorization;
|
||||
@ -65,6 +67,25 @@ public class TestForEachRem {
|
||||
});
|
||||
}
|
||||
|
||||
static void test5(int[] data) {
|
||||
IntStream.range(0, RANGE - 2).forEach(j -> {
|
||||
data[j + 2] = data[j];
|
||||
});
|
||||
}
|
||||
|
||||
static void initByte(byte[] data) {
|
||||
IntStream.range(0, RANGE).parallel().forEach(j -> {
|
||||
data[j] = (byte)j;
|
||||
});
|
||||
}
|
||||
|
||||
static void test6(byte[] data) {
|
||||
// 2-byte offset -> can only vectorize if alignment not required by hardware
|
||||
IntStream.range(0, RANGE - 2).forEach(j -> {
|
||||
data[j] = data[j + 2];
|
||||
});
|
||||
}
|
||||
|
||||
static void verify(String name, int[] data, int[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
@ -73,12 +94,22 @@ public class TestForEachRem {
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, byte[] data, byte[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
int[] data = new int[RANGE];
|
||||
int[] gold = new int[RANGE];
|
||||
byte[] dataB = new byte[RANGE];
|
||||
byte[] goldB = new byte[RANGE];
|
||||
|
||||
if (args.length == 0) {
|
||||
throw new RuntimeException(" Missing test name: test1, test2, test3, test4");
|
||||
throw new RuntimeException(" Missing test name: test1, test2, test3, test4, test5");
|
||||
}
|
||||
|
||||
if (args[0].equals("test1")) {
|
||||
@ -126,5 +157,30 @@ public class TestForEachRem {
|
||||
verify("test4", data, gold);
|
||||
System.out.println(" Finished test4.");
|
||||
}
|
||||
|
||||
if (args[0].equals("test5")) {
|
||||
System.out.println(" Run test5 ...");
|
||||
test1(gold); // reset
|
||||
test5(gold);
|
||||
for (int i = 0; i < ITER; i++) {
|
||||
test1(data); // reset
|
||||
test5(data);
|
||||
}
|
||||
verify("test5", data, gold);
|
||||
System.out.println(" Finished test5.");
|
||||
}
|
||||
|
||||
if (args[0].equals("test6")) {
|
||||
System.out.println(" Run test6 ...");
|
||||
initByte(goldB); // reset
|
||||
test6(goldB);
|
||||
for (int i = 0; i < ITER; i++) {
|
||||
initByte(dataB); // reset
|
||||
test6(dataB);
|
||||
}
|
||||
verify("test6", dataB, goldB);
|
||||
System.out.println(" Finished test6.");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,803 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8298935
|
||||
* @summary Test forced vectorization, and check IR for vector instructions
|
||||
* @requires vm.compiler2.enabled
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorization.TestOptionVectorizeIR
|
||||
*/
|
||||
|
||||
package compiler.vectorization;
|
||||
import compiler.lib.ir_framework.*;
|
||||
|
||||
public class TestOptionVectorizeIR {
|
||||
static final int RANGE = 512;
|
||||
static final int ITER = 100;
|
||||
int[] gold1 = new int[RANGE];
|
||||
int[] gold2 = new int[RANGE];
|
||||
int[] gold3 = new int[RANGE];
|
||||
int[] gold4 = new int[RANGE];
|
||||
int[] gold5 = new int[RANGE];
|
||||
int[] gold6 = new int[RANGE];
|
||||
|
||||
long[] gold10 = new long[RANGE];
|
||||
long[] gold11 = new long[RANGE];
|
||||
long[] gold12 = new long[RANGE];
|
||||
long[] gold13 = new long[RANGE];
|
||||
|
||||
short[] gold20 = new short[RANGE];
|
||||
short[] gold21 = new short[RANGE];
|
||||
short[] gold22 = new short[RANGE];
|
||||
short[] gold23 = new short[RANGE];
|
||||
|
||||
byte[] gold30 = new byte[RANGE];
|
||||
byte[] gold31 = new byte[RANGE];
|
||||
byte[] gold32 = new byte[RANGE];
|
||||
byte[] gold33 = new byte[RANGE];
|
||||
|
||||
char[] gold40 = new char[RANGE];
|
||||
char[] gold41 = new char[RANGE];
|
||||
char[] gold42 = new char[RANGE];
|
||||
char[] gold43 = new char[RANGE];
|
||||
|
||||
float[] gold50 = new float[RANGE];
|
||||
float[] gold51 = new float[RANGE];
|
||||
float[] gold52 = new float[RANGE];
|
||||
float[] gold53 = new float[RANGE];
|
||||
|
||||
double[] gold60 = new double[RANGE];
|
||||
double[] gold61 = new double[RANGE];
|
||||
double[] gold62 = new double[RANGE];
|
||||
double[] gold63 = new double[RANGE];
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.runWithFlags("-XX:CompileCommand=option,compiler.vectorization.TestOptionVectorizeIR::test*,Vectorize");
|
||||
}
|
||||
|
||||
TestOptionVectorizeIR() {
|
||||
// compute the gold standard in interpreter mode
|
||||
// test1
|
||||
test1(gold1);
|
||||
// test2
|
||||
test1(gold2);
|
||||
test2(gold2);
|
||||
// test3
|
||||
test1(gold3);
|
||||
test3(gold3, 2, 3);
|
||||
// test4
|
||||
test1(gold4);
|
||||
test4(gold4);
|
||||
// test5
|
||||
test1(gold5);
|
||||
test5(gold5);
|
||||
// test6
|
||||
test1(gold6);
|
||||
test6(gold6);
|
||||
|
||||
// long
|
||||
init(gold10);
|
||||
test10(gold10);
|
||||
init(gold11);
|
||||
test11(gold11);
|
||||
init(gold12);
|
||||
test12(gold12);
|
||||
init(gold13);
|
||||
test13(gold13);
|
||||
|
||||
// short
|
||||
init(gold20);
|
||||
test20(gold20);
|
||||
init(gold21);
|
||||
test21(gold21);
|
||||
init(gold22);
|
||||
test22(gold22);
|
||||
init(gold23);
|
||||
test23(gold23);
|
||||
|
||||
// byte
|
||||
init(gold30);
|
||||
test30(gold30);
|
||||
init(gold31);
|
||||
test31(gold31);
|
||||
init(gold32);
|
||||
test32(gold32);
|
||||
init(gold33);
|
||||
test33(gold33);
|
||||
|
||||
// char
|
||||
init(gold40);
|
||||
test40(gold40);
|
||||
init(gold41);
|
||||
test41(gold41);
|
||||
init(gold42);
|
||||
test42(gold42);
|
||||
init(gold43);
|
||||
test43(gold43);
|
||||
|
||||
// float
|
||||
init(gold50);
|
||||
test50(gold50);
|
||||
init(gold51);
|
||||
test51(gold51);
|
||||
init(gold52);
|
||||
test52(gold52);
|
||||
init(gold53);
|
||||
test53(gold53);
|
||||
|
||||
// double
|
||||
init(gold60);
|
||||
test60(gold60);
|
||||
init(gold61);
|
||||
test61(gold61);
|
||||
init(gold62);
|
||||
test62(gold62);
|
||||
init(gold63);
|
||||
test63(gold63);
|
||||
}
|
||||
|
||||
@Run(test = "test1")
|
||||
@Warmup(100)
|
||||
public void runTest1() {
|
||||
int[] data = new int[RANGE];
|
||||
test1(data);
|
||||
verify("test1", data, gold1);
|
||||
}
|
||||
|
||||
@Run(test = "test2")
|
||||
@Warmup(100)
|
||||
public void runTest2() {
|
||||
int[] data = new int[RANGE];
|
||||
test1(data);
|
||||
test2(data);
|
||||
verify("test2", data, gold2);
|
||||
}
|
||||
|
||||
@Run(test = "test3")
|
||||
@Warmup(100)
|
||||
public void runTest3() {
|
||||
int[] data = new int[RANGE];
|
||||
test1(data);
|
||||
test3(data, 2, 3);
|
||||
verify("test3", data, gold3);
|
||||
}
|
||||
|
||||
@Run(test = "test4")
|
||||
@Warmup(100)
|
||||
public void runTest4() {
|
||||
int[] data = new int[RANGE];
|
||||
test1(data);
|
||||
test4(data);
|
||||
verify("test4", data, gold4);
|
||||
}
|
||||
|
||||
@Run(test = "test5")
|
||||
@Warmup(100)
|
||||
public void runTest5() {
|
||||
int[] data = new int[RANGE];
|
||||
test1(data);
|
||||
test5(data);
|
||||
verify("test5", data, gold5);
|
||||
}
|
||||
|
||||
@Run(test = "test6")
|
||||
@Warmup(100)
|
||||
public void runTest6() {
|
||||
int[] data = new int[RANGE];
|
||||
test1(data);
|
||||
test6(data);
|
||||
verify("test6", data, gold6);
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test1(int[] data) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
// Vectorizes even if it is not forced
|
||||
data[j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test2(int[] data) {
|
||||
for (int j = 0; j < RANGE - 1; j++) {
|
||||
// Only vectorizes if forced, because of offset by 1
|
||||
data[j] = data[j] + data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.REPLICATE_I, "> 0", IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test3(int[] data, int A, int B) {
|
||||
for (int j = 0; j < RANGE - 1; j++) {
|
||||
// Only vectorizes if forced, because of offset by 1
|
||||
data[j] = A * data[j] + B * data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test4(int[] data) {
|
||||
for (int j = 0; j < RANGE - 1; j++) {
|
||||
// write forward -> cyclic dependency -> cannot vectorize
|
||||
// independent(s1, s2) for adjacent loads should detect this
|
||||
data[j + 1] = data[j];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test5(int[] data) {
|
||||
for (int j = 0; j < RANGE - 3; j++) {
|
||||
// write forward -> cyclic dependency -> cannot vectorize
|
||||
// independent(s1, s2) for adjacent loads cannot detect this
|
||||
// Checks with memory_alignment are disabled via compile option
|
||||
data[j + 2] = data[j];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test6(int[] data) {
|
||||
for (int j = 0; j < RANGE - 3; j++) {
|
||||
// write forward -> cyclic dependency -> cannot vectorize
|
||||
// independent(s1, s2) for adjacent loads cannot detect this
|
||||
// Checks with memory_alignment are disabled via compile option
|
||||
data[j + 3] = data[j];
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------- Long -----------------------------
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test10(long[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test11(long[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test12(long[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test13(long[] data) {
|
||||
// 128-bit vectors -> can vectorize because only 2 elements
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test10")
|
||||
@Warmup(100)
|
||||
public void runTest10() {
|
||||
long[] data = new long[RANGE];
|
||||
init(data);
|
||||
test10(data);
|
||||
verify("test10", data, gold10);
|
||||
}
|
||||
|
||||
@Run(test = "test11")
|
||||
@Warmup(100)
|
||||
public void runTest11() {
|
||||
long[] data = new long[RANGE];
|
||||
init(data);
|
||||
test11(data);
|
||||
verify("test11", data, gold11);
|
||||
}
|
||||
|
||||
@Run(test = "test12")
|
||||
@Warmup(100)
|
||||
public void runTest12() {
|
||||
long[] data = new long[RANGE];
|
||||
init(data);
|
||||
test12(data);
|
||||
verify("test12", data, gold12);
|
||||
}
|
||||
|
||||
@Run(test = "test13")
|
||||
@Warmup(100)
|
||||
public void runTest13() {
|
||||
long[] data = new long[RANGE];
|
||||
init(data);
|
||||
test13(data);
|
||||
verify("test13", data, gold13);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------- Short -----------------------------
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test20(short[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test21(short[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test22(short[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test23(short[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test20")
|
||||
@Warmup(100)
|
||||
public void runTest20() {
|
||||
short[] data = new short[RANGE];
|
||||
init(data);
|
||||
test20(data);
|
||||
verify("test20", data, gold20);
|
||||
}
|
||||
|
||||
@Run(test = "test21")
|
||||
@Warmup(100)
|
||||
public void runTest21() {
|
||||
short[] data = new short[RANGE];
|
||||
init(data);
|
||||
test21(data);
|
||||
verify("test21", data, gold21);
|
||||
}
|
||||
|
||||
@Run(test = "test22")
|
||||
@Warmup(100)
|
||||
public void runTest22() {
|
||||
short[] data = new short[RANGE];
|
||||
init(data);
|
||||
test22(data);
|
||||
verify("test22", data, gold22);
|
||||
}
|
||||
|
||||
@Run(test = "test23")
|
||||
@Warmup(100)
|
||||
public void runTest23() {
|
||||
short[] data = new short[RANGE];
|
||||
init(data);
|
||||
test23(data);
|
||||
verify("test23", data, gold23);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------- Byte -----------------------------
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test30(byte[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test31(byte[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test32(byte[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test33(byte[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test30")
|
||||
@Warmup(100)
|
||||
public void runTest30() {
|
||||
byte[] data = new byte[RANGE];
|
||||
init(data);
|
||||
test30(data);
|
||||
verify("test30", data, gold30);
|
||||
}
|
||||
|
||||
@Run(test = "test31")
|
||||
@Warmup(100)
|
||||
public void runTest31() {
|
||||
byte[] data = new byte[RANGE];
|
||||
init(data);
|
||||
test31(data);
|
||||
verify("test31", data, gold31);
|
||||
}
|
||||
|
||||
@Run(test = "test32")
|
||||
@Warmup(100)
|
||||
public void runTest32() {
|
||||
byte[] data = new byte[RANGE];
|
||||
init(data);
|
||||
test32(data);
|
||||
verify("test32", data, gold32);
|
||||
}
|
||||
|
||||
@Run(test = "test33")
|
||||
@Warmup(100)
|
||||
public void runTest33() {
|
||||
byte[] data = new byte[RANGE];
|
||||
init(data);
|
||||
test33(data);
|
||||
verify("test33", data, gold33);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------- Char -----------------------------
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test40(char[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test41(char[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test42(char[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test43(char[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test40")
|
||||
@Warmup(100)
|
||||
public void runTest40() {
|
||||
char[] data = new char[RANGE];
|
||||
init(data);
|
||||
test40(data);
|
||||
verify("test40", data, gold40);
|
||||
}
|
||||
|
||||
@Run(test = "test41")
|
||||
@Warmup(100)
|
||||
public void runTest41() {
|
||||
char[] data = new char[RANGE];
|
||||
init(data);
|
||||
test41(data);
|
||||
verify("test41", data, gold41);
|
||||
}
|
||||
|
||||
@Run(test = "test42")
|
||||
@Warmup(100)
|
||||
public void runTest42() {
|
||||
char[] data = new char[RANGE];
|
||||
init(data);
|
||||
test42(data);
|
||||
verify("test42", data, gold42);
|
||||
}
|
||||
|
||||
@Run(test = "test43")
|
||||
@Warmup(100)
|
||||
public void runTest43() {
|
||||
char[] data = new char[RANGE];
|
||||
init(data);
|
||||
test43(data);
|
||||
verify("test43", data, gold43);
|
||||
}
|
||||
|
||||
// ------------------------- Float -----------------------------
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test50(float[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test51(float[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test52(float[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test53(float[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test50")
|
||||
@Warmup(100)
|
||||
public void runTest50() {
|
||||
float[] data = new float[RANGE];
|
||||
init(data);
|
||||
test50(data);
|
||||
verify("test50", data, gold50);
|
||||
}
|
||||
|
||||
@Run(test = "test51")
|
||||
@Warmup(100)
|
||||
public void runTest51() {
|
||||
float[] data = new float[RANGE];
|
||||
init(data);
|
||||
test51(data);
|
||||
verify("test51", data, gold51);
|
||||
}
|
||||
|
||||
@Run(test = "test52")
|
||||
@Warmup(100)
|
||||
public void runTest52() {
|
||||
float[] data = new float[RANGE];
|
||||
init(data);
|
||||
test52(data);
|
||||
verify("test52", data, gold52);
|
||||
}
|
||||
|
||||
@Run(test = "test53")
|
||||
@Warmup(100)
|
||||
public void runTest53() {
|
||||
float[] data = new float[RANGE];
|
||||
init(data);
|
||||
test53(data);
|
||||
verify("test53", data, gold53);
|
||||
}
|
||||
|
||||
// ------------------------- Double -----------------------------
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test60(double[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"},
|
||||
applyIf = {"AlignVector", "false"},
|
||||
applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
|
||||
static void test61(double[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test62(double[] data) {
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
static void test63(double[] data) {
|
||||
// 128-bit vectors -> can vectorize because only 2 elements
|
||||
for (int j = 2; j < RANGE - 2; j++) {
|
||||
data[j] += data[j - 2];
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "test60")
|
||||
@Warmup(100)
|
||||
public void runTest60() {
|
||||
double[] data = new double[RANGE];
|
||||
init(data);
|
||||
test60(data);
|
||||
verify("test60", data, gold60);
|
||||
}
|
||||
|
||||
@Run(test = "test61")
|
||||
@Warmup(100)
|
||||
public void runTest61() {
|
||||
double[] data = new double[RANGE];
|
||||
init(data);
|
||||
test61(data);
|
||||
verify("test61", data, gold61);
|
||||
}
|
||||
|
||||
@Run(test = "test62")
|
||||
@Warmup(100)
|
||||
public void runTest62() {
|
||||
double[] data = new double[RANGE];
|
||||
init(data);
|
||||
test62(data);
|
||||
verify("test62", data, gold62);
|
||||
}
|
||||
|
||||
@Run(test = "test63")
|
||||
@Warmup(100)
|
||||
public void runTest63() {
|
||||
double[] data = new double[RANGE];
|
||||
init(data);
|
||||
test63(data);
|
||||
verify("test63", data, gold63);
|
||||
}
|
||||
|
||||
static void init(long[] data) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
data[j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
static void init(short[] data) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
data[j] = (short)j;
|
||||
}
|
||||
}
|
||||
|
||||
static void init(byte[] data) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
data[j] = (byte)j;
|
||||
}
|
||||
}
|
||||
|
||||
static void init(char[] data) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
data[j] = (char)j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void init(float[] data) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
data[j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void init(double[] data) {
|
||||
for (int j = 0; j < RANGE; j++) {
|
||||
data[j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, int[] data, int[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, long[] data, long[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, short[] data, short[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, byte[] data, byte[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, char[] data, char[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, float[] data, float[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void verify(String name, double[] data, double[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
if (data[i] != gold[i]) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user