From 01e6920581407bc3bd69db495fc694629ef01262 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 15 Mar 2023 14:02:45 +0000 Subject: [PATCH] 8298935: fix independence bug in create_pack logic in SuperWord::find_adjacent_refs Reviewed-by: kvn, jbhateja --- src/hotspot/share/opto/superword.cpp | 257 +- src/hotspot/share/opto/superword.hpp | 13 + .../compiler/lib/ir_framework/IRNode.java | 10 + .../superword/TestCyclicDependency.java | 399 + .../superword/TestDependencyOffsets.java | 11389 ++++++++++++++++ .../vectorization/TestForEachRem.java | 68 +- .../vectorization/TestOptionVectorizeIR.java | 803 ++ 7 files changed, 12878 insertions(+), 61 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/loopopts/superword/TestCyclicDependency.java create mode 100644 test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java create mode 100644 test/hotspot/jtreg/compiler/vectorization/TestOptionVectorizeIR.java diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index baf880aac20..7d3517d34b2 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -539,6 +539,8 @@ bool SuperWord::SLP_extract() { filter_packs(); + DEBUG_ONLY(verify_packs();) + schedule(); // Record eventual count of vector packs for checks in post loop vectorization @@ -640,54 +642,10 @@ void SuperWord::find_adjacent_refs() { } } - // Create initial pack pairs of memory operations for which - // alignment is set and vectors will be aligned. - bool create_pack = true; - if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) { - if (vectors_should_be_aligned()) { - int vw = vector_width(mem_ref); - int vw_best = vector_width(best_align_to_mem_ref); - if (vw > vw_best) { - // Do not vectorize a memory access with more elements per vector - // if unaligned memory access is not allowed because number of - // iterations in pre-loop will be not enough to align it. - create_pack = false; - } else { - SWPointer p2(best_align_to_mem_ref, this, nullptr, false); - if (!align_to_ref_p.invar_equals(p2)) { - // Do not vectorize memory accesses with different invariants - // if unaligned memory accesses are not allowed. - create_pack = false; - } - } - } - } else { - if (same_memory_slice(best_align_to_mem_ref, mem_ref)) { - // Can't allow vectorization of unaligned memory accesses with the - // same memory slice since it could be overlapped accesses to the same array. - create_pack = false; - } else { - // Allow independent (different type) unaligned memory operations - // if HW supports them. - if (vectors_should_be_aligned()) { - create_pack = false; - } else { - // Check if packs of the same memory slice but - // with a different alignment were created before. - for (uint i = 0; i < align_to_refs.size(); i++) { - MemNode* mr = align_to_refs.at(i)->as_Mem(); - if (mr == mem_ref) { - // Skip when we are looking at same memory operation. - continue; - } - if (same_memory_slice(mem_ref, mr) && - memory_alignment(mr, iv_adjustment) != 0) - create_pack = false; - } - } - } - } - if (create_pack) { + if (can_create_pairs(mem_ref, iv_adjustment, align_to_ref_p, + best_align_to_mem_ref, best_iv_adjustment, + align_to_refs)) { + // Create initial pack pairs of memory operations for which alignment was set. for (uint i = 0; i < memops.size(); i++) { Node* s1 = memops.at(i); int align = alignment(s1); @@ -707,7 +665,9 @@ void SuperWord::find_adjacent_refs() { } } } - } else { // Don't create unaligned pack + } else { + // Cannot create pairs for mem_ref. Reject all related memops forever. + // First, remove remaining memory ops of the same memory slice from the list. for (int i = memops.size() - 1; i >= 0; i--) { MemNode* s = memops.at(i)->as_Mem(); @@ -794,6 +754,96 @@ void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best } #endif +// Check if we can create the pack pairs for mem_ref: +// If required, enforce strict alignment requirements of hardware. +// Else, only enforce alignment within a memory slice, so that there cannot be any +// memory-dependence between different vector "lanes". +bool SuperWord::can_create_pairs(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p, + MemNode* best_align_to_mem_ref, int best_iv_adjustment, + Node_List &align_to_refs) { + bool is_aligned_with_best = memory_alignment(mem_ref, best_iv_adjustment) == 0; + + if (vectors_should_be_aligned()) { + // All vectors need to be memory aligned, modulo their vector_width. This is more strict + // than the hardware probably requires. Most hardware at most requires 4-byte alignment. + // + // In the pre-loop, we align best_align_to_mem_ref to its vector_length. To ensure that + // all mem_ref's are memory aligned modulo their vector_width, we only need to check that + // they are all aligned to best_align_to_mem_ref, modulo their vector_width. For that, + // we check the following 3 conditions. + + // (1) All packs are aligned with best_align_to_mem_ref. + if (!is_aligned_with_best) { + return false; + } + // (2) All other vectors have vector_size less or equal to that of best_align_to_mem_ref. + int vw = vector_width(mem_ref); + int vw_best = vector_width(best_align_to_mem_ref); + if (vw > vw_best) { + // We only align to vector_width of best_align_to_mem_ref during pre-loop. + // A mem_ref with a larger vector_width might thus not be vector_width aligned. + return false; + } + // (3) Ensure that all vectors have the same invariant. We model memory accesses like this + // address = base + k*iv + constant [+ invar] + // memory_alignment ignores the invariant. + SWPointer p2(best_align_to_mem_ref, this, nullptr, false); + if (!align_to_ref_p.invar_equals(p2)) { + // Do not vectorize memory accesses with different invariants + // if unaligned memory accesses are not allowed. + return false; + } + return true; + } else { + // Alignment is not required by the hardware. + + // However, we need to ensure that the pack for mem_ref is independent, i.e. all members + // of the pack are mutually independent. + + if (_do_vector_loop) { + // Wait until combine_packs to check independence of packs. For now we just know that + // the adjacent pairs are independent. This allows us to vectorize when we do not have + // alignment modulo vector_width. For example (forward read): + // for (int i ...) { v[i] = v[i + 1] + 5; } + // The following will be filtered out in combine_packs (forward write): + // for (int i ...) { v[i + 1] = v[i] + 5; } + return true; + } + + // If all mem_ref's are modulo vector_width aligned with all other mem_ref's of their + // memory slice, then the VectorLoad / VectorStore regions are either exactly overlapping + // or completely non-overlapping. This ensures that there cannot be memory-dependencies + // between different vector "lanes". + // During SuperWord::filter_packs -> SuperWord::profitable -> SuperWord::is_vector_use, + // we check that all inputs are vectors that match on every element (with some reasonable + // exceptions). This ensures that every "lane" is isomorpic and independent to all other + // "lanes". This allows us to vectorize these cases: + // for (int i ...) { v[i] = v[i] + 5; } // same alignment + // for (int i ...) { v[i] = v[i + 32] + 5; } // alignment modulo vector_width + if (same_memory_slice(mem_ref, best_align_to_mem_ref)) { + return is_aligned_with_best; + } else { + return is_mem_ref_aligned_with_same_memory_slice(mem_ref, iv_adjustment, align_to_refs); + } + } +} + +// Check if alignment of mem_ref is consistent with the other packs of the same memory slice +bool SuperWord::is_mem_ref_aligned_with_same_memory_slice(MemNode* mem_ref, int iv_adjustment, + Node_List &align_to_refs) { + for (uint i = 0; i < align_to_refs.size(); i++) { + MemNode* mr = align_to_refs.at(i)->as_Mem(); + if (mr != mem_ref && + same_memory_slice(mr, mem_ref) && + memory_alignment(mr, iv_adjustment) != 0) { + // mem_ref is misaligned with mr, another ref of the same memory slice. + return false; + } + } + // No misalignment found. + return true; +} + //------------------------------find_align_to_ref--------------------------- // Find a memory reference to align the loop induction variable to. // Looks first at stores then at loads, looking for a memory reference @@ -1326,6 +1376,44 @@ bool SuperWord::independent(Node* s1, Node* s2) { return independent_path(shallow, deep); } +//------------------------------find_dependence--------------------- +// Is any s1 in p dependent on any s2 in p? Yes: return such a s2. No: return nullptr. +// We could query independent(s1, s2) for all pairs, but that results +// in O(p.size * p.size) graph traversals. We can do it all in one BFS! +// Start the BFS traversal at all nodes from the pack. Traverse DepPreds +// recursively, for nodes that have at least depth min_d, which is the +// smallest depth of all nodes from the pack. Once we have traversed all +// those nodes, and have not found another node from the pack, we know +// that all nodes in the pack are independent. +Node* SuperWord::find_dependence(Node_List* p) { + if (p->at(0)->is_reduction()) { + return nullptr; // ignore reductions + } + ResourceMark rm; + Unique_Node_List worklist; // traversal queue + int min_d = depth(p->at(0)); + visited_clear(); + for (uint k = 0; k < p->size(); k++) { + Node* n = p->at(k); + min_d = MIN2(min_d, depth(n)); + worklist.push(n); // start traversal at all nodes in p + visited_set(n); // mark node + } + for (uint i = 0; i < worklist.size(); i++) { + Node* n = worklist.at(i); + for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { + Node* pred = preds.current(); + if (in_bb(pred) && depth(pred) >= min_d) { + if (visited_test(pred)) { // marked as in p? + return pred; + } + worklist.push(pred); + } + } + } + return nullptr; +} + //--------------------------have_similar_inputs----------------------- // For a node pair (s1, s2) which is isomorphic and independent, // do s1 and s2 have similar input edges? @@ -1483,8 +1571,10 @@ bool SuperWord::follow_use_defs(Node_List* p) { int align = alignment(s1); Node* t1 = s1->in(j); Node* t2 = s2->in(j); - if (!in_bb(t1) || !in_bb(t2)) + if (!in_bb(t1) || !in_bb(t2) || t1->is_Mem() || t2->is_Mem()) { + // Only follow non-memory nodes in block - we do not want to resurrect misaligned packs. continue; + } align = adjust_alignment_for_type_conversion(s1, t1, align); if (stmts_can_pack(t1, t2, align)) { if (est_savings(t1, t2) >= 0) { @@ -1522,10 +1612,16 @@ bool SuperWord::follow_def_uses(Node_List* p) { for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { Node* t1 = s1->fast_out(i); num_s1_uses++; - if (!in_bb(t1)) continue; + if (!in_bb(t1) || t1->is_Mem()) { + // Only follow non-memory nodes in block - we do not want to resurrect misaligned packs. + continue; + } for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { Node* t2 = s2->fast_out(j); - if (!in_bb(t2)) continue; + if (!in_bb(t2) || t2->is_Mem()) { + // Only follow non-memory nodes in block - we do not want to resurrect misaligned packs. + continue; + } if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv if (!opnd_positions_match(s1, t1, s2, t2)) continue; @@ -1714,7 +1810,6 @@ void SuperWord::combine_packs() { for (int j = i + 1; j < _packset.length(); j++) { Node_List* p2 = _packset.at(j); if (p2 == nullptr) continue; - if (i == j) continue; if (p1->at(p1->size()-1) == p2->at(0)) { for (uint k = 1; k < p2->size(); k++) { p1->push(p2->at(k)); @@ -1755,6 +1850,32 @@ void SuperWord::combine_packs() { } } + if (_do_vector_loop) { + // Since we did not enforce exact alignment of the packsets, we only know that there + // is no dependence with distance 1, because we have checked independent(s1, s2) for + // all adjacent memops. But there could be a dependence of a different distance. + // Hence: remove the pack if there is a dependence. + for (int i = 0; i < _packset.length(); i++) { + Node_List* p = _packset.at(i); + if (p != nullptr) { + Node* dependence = find_dependence(p); + if (dependence != nullptr) { +#ifndef PRODUCT + if (TraceSuperWord) { + tty->cr(); + tty->print_cr("WARNING: Found dependency."); + tty->print_cr("Cannot vectorize despite compile directive Vectorize."); + dependence->dump(); + tty->print_cr("In pack[%d]", i); + print_pack(p); + } +#endif + _packset.at_put(i, nullptr); + } + } + } + } + // Compress list. for (int i = _packset.length() - 1; i >= 0; i--) { Node_List* p1 = _packset.at(i); @@ -1773,7 +1894,6 @@ void SuperWord::combine_packs() { // Construct the map from nodes to packs. Only valid after the // point where a node is only in one pack (after combine_packs). void SuperWord::construct_my_pack_map() { - Node_List* rslt = nullptr; for (int i = 0; i < _packset.length(); i++) { Node_List* p = _packset.at(i); for (uint j = 0; j < p->size(); j++) { @@ -2203,6 +2323,29 @@ bool SuperWord::profitable(Node_List* p) { return true; } +#ifdef ASSERT +void SuperWord::verify_packs() { + for (int i = 0; i < _packset.length(); i++) { + Node_List* p = _packset.at(i); + Node* dependence = find_dependence(p); + if (dependence != nullptr) { + tty->print_cr("Other nodes in pack have dependence on:"); + dependence->dump(); + tty->print_cr("The following nodes are not independent:"); + for (uint k = 0; k < p->size(); k++) { + Node* n = p->at(k); + if (!independent(n, dependence)) { + n->dump(); + } + } + tty->print_cr("They are all from pack[%d]", i); + print_pack(p); + } + assert(dependence == nullptr, "all nodes in pack must be mutually independent"); + } +} +#endif + //------------------------------schedule--------------------------- // Adjust the memory graph for the packed operations void SuperWord::schedule() { @@ -4027,7 +4170,11 @@ void SuperWord::print_packset() { for (int i = 0; i < _packset.length(); i++) { tty->print_cr("Pack: %d", i); Node_List* p = _packset.at(i); - print_pack(p); + if (p == nullptr) { + tty->print_cr(" nullptr"); + } else { + print_pack(p); + } } #endif } diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 6d24e528be5..e5ea1907c8d 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -476,6 +476,15 @@ class SuperWord : public ResourceObj { void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment); void print_loop(bool whole); #endif + // Check if we can create the pack pairs for mem_ref: + // If required, enforce strict alignment requirements of hardware. + // Else, only enforce alignment within a memory slice, so that there cannot be any + // memory-dependence between different vector "lanes". + bool can_create_pairs(MemNode* mem_ref, int iv_adjustment, SWPointer &align_to_ref_p, + MemNode* best_align_to_mem_ref, int best_iv_adjustment, + Node_List &align_to_refs); + // Check if alignment of mem_ref is consistent with the other packs of the same memory slice. + bool is_mem_ref_aligned_with_same_memory_slice(MemNode* mem_ref, int iv_adjustment, Node_List &align_to_refs); // Find a memory reference to align the loop induction variable to. MemNode* find_align_to_ref(Node_List &memops, int &idx); // Calculate loop's iv adjustment for this memory ops. @@ -512,6 +521,8 @@ class SuperWord : public ResourceObj { bool isomorphic(Node* s1, Node* s2); // Is there no data path from s1 to s2 or s2 to s1? bool independent(Node* s1, Node* s2); + // Is any s1 in p dependent on any s2 in p? Yes: return such a s2. No: return nullptr. + Node* find_dependence(Node_List* p); // For a node pair (s1, s2) which is isomorphic and independent, // do s1 and s2 have similar input edges? bool have_similar_inputs(Node* s1, Node* s2); @@ -543,6 +554,8 @@ class SuperWord : public ResourceObj { void filter_packs(); // Merge CMove into new vector-nodes void merge_packs_to_cmove(); + // Verify that for every pack, all nodes are mutually independent + DEBUG_ONLY(void verify_packs();) // Adjust the memory graph for the packed operations void schedule(); // Remove "current" from its current position in the memory graph and insert diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index b7d401b6d3b..6e326672282 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -166,6 +166,11 @@ public class IRNode { beforeMatchingNameRegex(ADD_VI, "AddVI"); } + public static final String ADD_VF = PREFIX + "ADD_VF" + POSTFIX; + static { + beforeMatchingNameRegex(ADD_VF, "AddVF"); + } + public static final String ADD_REDUCTION_V = PREFIX + "ADD_REDUCTION_V" + POSTFIX; static { beforeMatchingNameRegex(ADD_REDUCTION_V, "AddReductionV(B|S|I|L|F|D)"); @@ -714,6 +719,11 @@ public class IRNode { beforeMatchingNameRegex(MUL_VL, "MulVL"); } + public static final String MUL_VI = PREFIX + "MUL_VI" + POSTFIX; + static { + beforeMatchingNameRegex(MUL_VI, "MulVI"); + } + public static final String MUL_REDUCTION_VD = PREFIX + "MUL_REDUCTION_VD" + POSTFIX; static { superWordNodes(MUL_REDUCTION_VD, "MulReductionVD"); diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestCyclicDependency.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestCyclicDependency.java new file mode 100644 index 00000000000..803c7e9841d --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestCyclicDependency.java @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test + * @bug 8298935 + * @summary Writing forward on array creates cyclic dependency + * which leads to wrong result, when ignored. + * @requires vm.compiler2.enabled + * @library /test/lib / + * @run driver TestCyclicDependency + */ + +import jdk.test.lib.Asserts; +import compiler.lib.ir_framework.*; + +public class TestCyclicDependency { + static final int RANGE = 512; + static final int ITER = 100; + int[] goldI0 = new int[RANGE]; + float[] goldF0 = new float[RANGE]; + int[] goldI1 = new int[RANGE]; + float[] goldF1 = new float[RANGE]; + int[] goldI2 = new int[RANGE]; + float[] goldF2 = new float[RANGE]; + int[] goldI3 = new int[RANGE]; + float[] goldF3 = new float[RANGE]; + int[] goldI4 = new int[RANGE]; + float[] goldF4 = new float[RANGE]; + int[] goldI5a = new int[RANGE]; + float[] goldF5a = new float[RANGE]; + int[] goldI5b = new int[RANGE]; + float[] goldF5b = new float[RANGE]; + int[] goldI6a = new int[RANGE]; + float[] goldF6a = new float[RANGE]; + int[] goldI6b = new int[RANGE]; + float[] goldF6b = new float[RANGE]; + int[] goldI7 = new int[RANGE]; + float[] goldF7 = new float[RANGE]; + int[] goldI8 = new int[RANGE]; + float[] goldF8 = new float[RANGE]; + int[] goldI9 = new int[RANGE]; + float[] goldF9 = new float[RANGE]; + + public static void main(String args[]) { + TestFramework.runWithFlags("-XX:CompileCommand=compileonly,TestCyclicDependency::test*"); + } + + TestCyclicDependency() { + // compute the gold standard in interpreter mode + // test0 + init(goldI0, goldF0); + test0(goldI0, goldF0); + // test1 + init(goldI1, goldF1); + test1(goldI1, goldF1); + // test2 + init(goldI2, goldF2); + test2(goldI2, goldF2); + // test3 + init(goldI3, goldF3); + test3(goldI3, goldF3); + // test4 + init(goldI4, goldF4); + test4(goldI4, goldF4); + // test5a + init(goldI5a, goldF5a); + test5a(goldI5a, goldF5a); + // test5b + init(goldI5b, goldF5b); + test5b(goldI5b, goldF5b); + // test6a + init(goldI6a, goldF6a); + test6a(goldI6a, goldF6a); + // test6b + init(goldI6b, goldF6b); + test6b(goldI6b, goldF6b); + // test7 + init(goldI7, goldF7); + test7(goldI7, goldF7); + // test8 + init(goldI8, goldF8); + test8(goldI8, goldF8); + // test9 + init(goldI9, goldF9); + test9(goldI9, goldF9); + } + + @Run(test = "test0") + @Warmup(100) + public void runTest0() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test0(dataI, dataF); + verifyI("test0", dataI, goldI0); + verifyF("test0", dataF, goldF0); + } + + @Run(test = "test1") + @Warmup(100) + public void runTest1() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test1(dataI, dataF); + verifyI("test1", dataI, goldI1); + verifyF("test1", dataF, goldF1); + } + + @Run(test = "test2") + @Warmup(100) + public void runTest2() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test2(dataI, dataF); + verifyI("test2", dataI, goldI2); + verifyF("test2", dataF, goldF2); + } + + @Run(test = "test3") + @Warmup(100) + public void runTest3() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test3(dataI, dataF); + verifyI("test3", dataI, goldI3); + verifyF("test3", dataF, goldF3); + } + + @Run(test = "test4") + @Warmup(100) + public void runTest4() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test4(dataI, dataF); + verifyI("test4", dataI, goldI4); + verifyF("test4", dataF, goldF4); + } + + @Run(test = "test5a") + @Warmup(100) + public void runTest5a() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test5a(dataI, dataF); + verifyI("test5a", dataI, goldI5a); + verifyF("test5a", dataF, goldF5a); + } + + @Run(test = "test5b") + @Warmup(100) + public void runTest5b() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test5b(dataI, dataF); + verifyI("test5b", dataI, goldI5b); + verifyF("test5b", dataF, goldF5b); + } + + @Run(test = "test6a") + @Warmup(100) + public void runTest6a() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test6a(dataI, dataF); + verifyI("test6a", dataI, goldI6a); + verifyF("test6a", dataF, goldF6a); + } + + @Run(test = "test6b") + @Warmup(100) + public void runTest6b() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test6b(dataI, dataF); + verifyI("test6b", dataI, goldI6b); + verifyF("test6b", dataF, goldF6b); + } + + @Run(test = "test7") + @Warmup(100) + public void runTest7() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test7(dataI, dataF); + verifyI("test7", dataI, goldI7); + verifyF("test7", dataF, goldF7); + } + + @Run(test = "test8") + @Warmup(100) + public void runTest8() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test8(dataI, dataF); + verifyI("test8", dataI, goldI8); + verifyF("test8", dataF, goldF8); + } + + @Run(test = "test9") + @Warmup(100) + public void runTest9() { + int[] dataI = new int[RANGE]; + float[] dataF = new float[RANGE]; + init(dataI, dataF); + test9(dataI, dataF); + verifyI("test9", dataI, goldI9); + verifyF("test9", dataF, goldF9); + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test0(int[] dataI, float[] dataF) { + for (int i = 0; i < RANGE; i++) { + // All perfectly aligned, expect vectorization + int v = dataI[i]; + dataI[i] = v + 5; + } + } + + @Test + static void test1(int[] dataI, float[] dataF) { + for (int i = 0; i < RANGE - 1; i++) { + // dataI has cyclic dependency of distance 1 + int v = dataI[i]; + dataI[i + 1] = v; + dataF[i] = v; // let's not get confused by another type + } + } + + @Test + static void test2(int[] dataI, float[] dataF) { + for (int i = 0; i < RANGE - 2; i++) { + // dataI has cyclic dependency of distance 2 + int v = dataI[i]; + dataI[i + 2] = v; + dataF[i] = v; // let's not get confused by another type + } + } + + @Test + static void test3(int[] dataI, float[] dataF) { + for (int i = 0; i < RANGE - 3; i++) { + // dataI has cyclic dependency of distance 3 + int v = dataI[i]; + dataI[i + 3] = v; + dataF[i] = v; // let's not get confused by another type + } + } + + @Test + static void test4(int[] dataI, float[] dataF) { + for (int i = 1; i < RANGE - 1; i++) { + // dataI has cyclic dependency of distance 2 + int v = dataI[i - 1]; + dataI[i + 1] = v; + dataF[i] = v; // let's not get confused by another type + } + } + + @Test + static void test5a(int[] dataI, float[] dataF) { + for (int i = 2; i < RANGE; i++) { + // dataI has read / write distance 1, but no cyclic dependency + int v = dataI[i]; + dataI[i - 1] = v + 5; + } + } + + @Test + static void test5b(int[] dataI, float[] dataF) { + for (int i = 1; i < RANGE; i++) { + // dataI has read / write distance 1, but no cyclic dependency + int v = dataI[i]; + dataI[i - 1] = v; + dataF[i] = v; // let's not get confused by another type + } + } + + @Test + static void test6a(int[] dataI, float[] dataF) { + for (int i = 2; i < RANGE; i++) { + // dataI has read / write distance 2, but no cyclic dependency + int v = dataI[i]; + dataI[i - 2] = v + 5; + } + } + + @Test + static void test6b(int[] dataI, float[] dataF) { + for (int i = 2; i < RANGE; i++) { + // dataI has read / write distance 2, but no cyclic dependency + int v = dataI[i]; + dataI[i - 2] = v; + dataF[i] = v; // let's not get confused by another type + } + } + + @Test + @IR(counts = {IRNode.ADD_VI, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // Some aarch64 machines have AlignVector == true, like ThunderX2 + static void test7(int[] dataI, float[] dataF) { + for (int i = 0; i < RANGE - 32; i++) { + // write forward 32 -> more than vector size -> can vectorize + // write forward 3 -> cannot vectorize + // separate types should make decision separately if they vectorize or not + int v = dataI[i]; + dataI[i + 32] = v + 5; + float f = dataF[i]; + dataF[i + 3] = f + 3.5f; + } + } + + @Test + @IR(counts = {IRNode.ADD_VF, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + // Some aarch64 machines have AlignVector == true, like ThunderX2 + static void test8(int[] dataI, float[] dataF) { + for (int i = 0; i < RANGE - 32; i++) { + // write forward 32 -> more than vector size -> can vectorize + // write forward 3 -> cannot vectorize + // separate types should make decision separately if they vectorize or not + int v = dataI[i]; + dataI[i + 3] = v + 5; + float f = dataF[i]; + dataF[i + 32] = f + 3.5f; + } + } + + @Test + @IR(counts = {IRNode.ADD_REDUCTION_VI, "> 0"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test9(int[] dataI, float[] dataF) { + int sI = 666; + for (int i = 0; i < RANGE; i++) { + // self-cycle allowed for reduction + sI += dataI[i] * 2; // factor necessary to make it profitable + } + dataI[0] = sI; // write back + } + + public static void init(int[] dataI, float[] dataF) { + for (int j = 0; j < RANGE; j++) { + dataI[j] = j; + dataF[j] = j * 0.5f; + } + } + + static void verifyI(String name, int[] data, int[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: dataI[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + + static void verifyF(String name, float[] data, float[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } +} diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java new file mode 100644 index 00000000000..9130571a406 --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java @@ -0,0 +1,11389 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * Summary: + * Test SuperWord vectorization with different access offsets + * and various MaxVectorSize values, and +- AlignVector. + * Note: CompileCommand Option Vectorize is enabled. + * + * Note: this test is auto-generated. Please modify / generate with script: + * https://bugs.openjdk.org/browse/JDK-8298935 + * + * Types: int, long, short, char, byte, float, double + * Offsets: 0, -1, 1, -2, 2, -3, 3, -4, 4, -7, 7, -8, 8, -14, 14, -16, 16, -18, 18, -20, 20, -31, 31, -32, 32, -63, 63, -64, 64, -65, 65, -128, 128, -129, 129, -192, 192 + * + * Checking if we should vectorize is a bit complicated. It depends on + * Matcher::vector_width_in_bytes, of the respective platforms (eg. x86.ad) + * This vector_width can be further constrained by MaxVectorSize. + * + * With '-XX:-AlignVector', we vectorize if: + * - Vectors have at least 4 bytes: vector_width >= 4 + * - Vectors hold at least 2 elements: vector_width >= 2 * sizeofop(velt_type) + * -> min_vector_width = max(4, 2 * sizeofop(velt_type)) + * -> simplifies to: vector_width >= min_vector_width + * - No cyclic dependency: + * - Access: data[i + offset] = data[i] * fac; + * - byte_offset = offset * sizeofop(type) + * - Cyclic dependency if: 0 < byte_offset < vector_width + * + * Note: sizeofop(type) = sizeof(type), except sizeofop(char) = 2 + * + * Different types can lead to different vector_width. This depends on + * the CPU-features. Thus, we have a positive and negative IR rule per + * CPU-feature for each test. + * + * Definition: + * MaxVectorSize: limit through flag + * vector_width: limit given by specific CPU feature for a specific velt_type + * actual_vector_width: what is actually vectorized with + * min_vector_width: what is minimally required for vectorization + * + * min_vector_width = max(4, 2 * sizeofop(velt_type)) + * MaxVectorSize >= vector_width >= actual_vector_width >= min_vector_width + * + * In general, we cannot easily specify negative IR rules, that require no + * vectorization to happen. We may improve the SuperWord algorithm later, + * or some additional optimization collapses some Loads, and suddenly cyclic + * dependency disappears, and we can vectorize. + * + * With '-XX:+AlignVector', we would like to check that we vectorize exactly iff: + * byte_offset % actual_vector_width == 0 + * Because all vector_widths are powers of 2, this is equivalent to: + * pow2_factor(byte_offset) >= actual_vector_width + * where pow2_factor computes the largest power of 2 that is a factor of the number. + * + * Under these assumptions, we know there must be vectorization: + * pow2_factor(byte_offset) >= vector_width + * implies + * pow2_factor(byte_offset) >= actual_vector_width + * MaxVectorSize >= min_vector_size + * else any vectorization is impossible. + * + * And under the following conditions no vectorization is possible: + * byte_offset < 0: No cyclic dependency. + * Cyclic dependency could lead to Load removals, then only the store is vectorized. + * byte_offset % min_vector_width != 0 + * implies + * byte_offset % actual_vector_width != 0 + * + */ + +/* + * @test id=vanilla-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vanilla-A + */ + +/* + * @test id=vanilla-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vanilla-U + */ + +/* + * @test id=sse4-v016-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v016-A + */ + +/* + * @test id=sse4-v016-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v016-U + */ + +/* + * @test id=sse4-v008-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v008-A + */ + +/* + * @test id=sse4-v008-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v008-U + */ + +/* + * @test id=sse4-v004-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v004-A + */ + +/* + * @test id=sse4-v004-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v004-U + */ + +/* + * @test id=sse4-v002-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v002-A + */ + +/* + * @test id=sse4-v002-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*sse4.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets sse4-v002-U + */ + +/* + * @test id=avx1-v032-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v032-A + */ + +/* + * @test id=avx1-v032-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v032-U + */ + +/* + * @test id=avx1-v016-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v016-A + */ + +/* + * @test id=avx1-v016-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx1-v016-U + */ + +/* + * @test id=avx2-v032-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx2.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v032-A + */ + +/* + * @test id=avx2-v032-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx2.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v032-U + */ + +/* + * @test id=avx2-v016-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx2.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v016-A + */ + +/* + * @test id=avx2-v016-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx2.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx2-v016-U + */ + +/* + * @test id=avx512-v064-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v064-A + */ + +/* + * @test id=avx512-v064-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v064-U + */ + +/* + * @test id=avx512-v032-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v032-A + */ + +/* + * @test id=avx512-v032-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512-v032-U + */ + +/* + * @test id=avx512bw-v064-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512bw.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v064-A + */ + +/* + * @test id=avx512bw-v064-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512bw.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v064-U + */ + +/* + * @test id=avx512bw-v032-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512bw.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v032-A + */ + +/* + * @test id=avx512bw-v032-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") + * @requires vm.cpu.features ~= ".*avx512bw.*" + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets avx512bw-v032-U + */ + +/* + * @test id=vec-v064-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v064-A + */ + +/* + * @test id=vec-v064-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v064-U + */ + +/* + * @test id=vec-v032-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v032-A + */ + +/* + * @test id=vec-v032-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v032-U + */ + +/* + * @test id=vec-v016-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v016-A + */ + +/* + * @test id=vec-v016-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v016-U + */ + +/* + * @test id=vec-v008-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v008-A + */ + +/* + * @test id=vec-v008-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v008-U + */ + +/* + * @test id=vec-v004-A + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v004-A + */ + +/* + * @test id=vec-v004-U + * @bug 8298935 + * @summary Test SuperWord: vector size, offsets, dependencies, alignment. + * @requires vm.compiler2.enabled + * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") + * @library /test/lib / + * @run driver compiler.loopopts.superword.TestDependencyOffsets vec-v004-U + */ + +package compiler.loopopts.superword; +import compiler.lib.ir_framework.*; + +public class TestDependencyOffsets { + static final int RANGE = 512; + + static int[] goldIntP0 = new int[RANGE]; + static int[] goldIntM1 = new int[RANGE]; + static int[] goldIntP1 = new int[RANGE]; + static int[] goldIntM2 = new int[RANGE]; + static int[] goldIntP2 = new int[RANGE]; + static int[] goldIntM3 = new int[RANGE]; + static int[] goldIntP3 = new int[RANGE]; + static int[] goldIntM4 = new int[RANGE]; + static int[] goldIntP4 = new int[RANGE]; + static int[] goldIntM7 = new int[RANGE]; + static int[] goldIntP7 = new int[RANGE]; + static int[] goldIntM8 = new int[RANGE]; + static int[] goldIntP8 = new int[RANGE]; + static int[] goldIntM14 = new int[RANGE]; + static int[] goldIntP14 = new int[RANGE]; + static int[] goldIntM16 = new int[RANGE]; + static int[] goldIntP16 = new int[RANGE]; + static int[] goldIntM18 = new int[RANGE]; + static int[] goldIntP18 = new int[RANGE]; + static int[] goldIntM20 = new int[RANGE]; + static int[] goldIntP20 = new int[RANGE]; + static int[] goldIntM31 = new int[RANGE]; + static int[] goldIntP31 = new int[RANGE]; + static int[] goldIntM32 = new int[RANGE]; + static int[] goldIntP32 = new int[RANGE]; + static int[] goldIntM63 = new int[RANGE]; + static int[] goldIntP63 = new int[RANGE]; + static int[] goldIntM64 = new int[RANGE]; + static int[] goldIntP64 = new int[RANGE]; + static int[] goldIntM65 = new int[RANGE]; + static int[] goldIntP65 = new int[RANGE]; + static int[] goldIntM128 = new int[RANGE]; + static int[] goldIntP128 = new int[RANGE]; + static int[] goldIntM129 = new int[RANGE]; + static int[] goldIntP129 = new int[RANGE]; + static int[] goldIntM192 = new int[RANGE]; + static int[] goldIntP192 = new int[RANGE]; + static long[] goldLongP0 = new long[RANGE]; + static long[] goldLongM1 = new long[RANGE]; + static long[] goldLongP1 = new long[RANGE]; + static long[] goldLongM2 = new long[RANGE]; + static long[] goldLongP2 = new long[RANGE]; + static long[] goldLongM3 = new long[RANGE]; + static long[] goldLongP3 = new long[RANGE]; + static long[] goldLongM4 = new long[RANGE]; + static long[] goldLongP4 = new long[RANGE]; + static long[] goldLongM7 = new long[RANGE]; + static long[] goldLongP7 = new long[RANGE]; + static long[] goldLongM8 = new long[RANGE]; + static long[] goldLongP8 = new long[RANGE]; + static long[] goldLongM14 = new long[RANGE]; + static long[] goldLongP14 = new long[RANGE]; + static long[] goldLongM16 = new long[RANGE]; + static long[] goldLongP16 = new long[RANGE]; + static long[] goldLongM18 = new long[RANGE]; + static long[] goldLongP18 = new long[RANGE]; + static long[] goldLongM20 = new long[RANGE]; + static long[] goldLongP20 = new long[RANGE]; + static long[] goldLongM31 = new long[RANGE]; + static long[] goldLongP31 = new long[RANGE]; + static long[] goldLongM32 = new long[RANGE]; + static long[] goldLongP32 = new long[RANGE]; + static long[] goldLongM63 = new long[RANGE]; + static long[] goldLongP63 = new long[RANGE]; + static long[] goldLongM64 = new long[RANGE]; + static long[] goldLongP64 = new long[RANGE]; + static long[] goldLongM65 = new long[RANGE]; + static long[] goldLongP65 = new long[RANGE]; + static long[] goldLongM128 = new long[RANGE]; + static long[] goldLongP128 = new long[RANGE]; + static long[] goldLongM129 = new long[RANGE]; + static long[] goldLongP129 = new long[RANGE]; + static long[] goldLongM192 = new long[RANGE]; + static long[] goldLongP192 = new long[RANGE]; + static short[] goldShortP0 = new short[RANGE]; + static short[] goldShortM1 = new short[RANGE]; + static short[] goldShortP1 = new short[RANGE]; + static short[] goldShortM2 = new short[RANGE]; + static short[] goldShortP2 = new short[RANGE]; + static short[] goldShortM3 = new short[RANGE]; + static short[] goldShortP3 = new short[RANGE]; + static short[] goldShortM4 = new short[RANGE]; + static short[] goldShortP4 = new short[RANGE]; + static short[] goldShortM7 = new short[RANGE]; + static short[] goldShortP7 = new short[RANGE]; + static short[] goldShortM8 = new short[RANGE]; + static short[] goldShortP8 = new short[RANGE]; + static short[] goldShortM14 = new short[RANGE]; + static short[] goldShortP14 = new short[RANGE]; + static short[] goldShortM16 = new short[RANGE]; + static short[] goldShortP16 = new short[RANGE]; + static short[] goldShortM18 = new short[RANGE]; + static short[] goldShortP18 = new short[RANGE]; + static short[] goldShortM20 = new short[RANGE]; + static short[] goldShortP20 = new short[RANGE]; + static short[] goldShortM31 = new short[RANGE]; + static short[] goldShortP31 = new short[RANGE]; + static short[] goldShortM32 = new short[RANGE]; + static short[] goldShortP32 = new short[RANGE]; + static short[] goldShortM63 = new short[RANGE]; + static short[] goldShortP63 = new short[RANGE]; + static short[] goldShortM64 = new short[RANGE]; + static short[] goldShortP64 = new short[RANGE]; + static short[] goldShortM65 = new short[RANGE]; + static short[] goldShortP65 = new short[RANGE]; + static short[] goldShortM128 = new short[RANGE]; + static short[] goldShortP128 = new short[RANGE]; + static short[] goldShortM129 = new short[RANGE]; + static short[] goldShortP129 = new short[RANGE]; + static short[] goldShortM192 = new short[RANGE]; + static short[] goldShortP192 = new short[RANGE]; + static char[] goldCharP0 = new char[RANGE]; + static char[] goldCharM1 = new char[RANGE]; + static char[] goldCharP1 = new char[RANGE]; + static char[] goldCharM2 = new char[RANGE]; + static char[] goldCharP2 = new char[RANGE]; + static char[] goldCharM3 = new char[RANGE]; + static char[] goldCharP3 = new char[RANGE]; + static char[] goldCharM4 = new char[RANGE]; + static char[] goldCharP4 = new char[RANGE]; + static char[] goldCharM7 = new char[RANGE]; + static char[] goldCharP7 = new char[RANGE]; + static char[] goldCharM8 = new char[RANGE]; + static char[] goldCharP8 = new char[RANGE]; + static char[] goldCharM14 = new char[RANGE]; + static char[] goldCharP14 = new char[RANGE]; + static char[] goldCharM16 = new char[RANGE]; + static char[] goldCharP16 = new char[RANGE]; + static char[] goldCharM18 = new char[RANGE]; + static char[] goldCharP18 = new char[RANGE]; + static char[] goldCharM20 = new char[RANGE]; + static char[] goldCharP20 = new char[RANGE]; + static char[] goldCharM31 = new char[RANGE]; + static char[] goldCharP31 = new char[RANGE]; + static char[] goldCharM32 = new char[RANGE]; + static char[] goldCharP32 = new char[RANGE]; + static char[] goldCharM63 = new char[RANGE]; + static char[] goldCharP63 = new char[RANGE]; + static char[] goldCharM64 = new char[RANGE]; + static char[] goldCharP64 = new char[RANGE]; + static char[] goldCharM65 = new char[RANGE]; + static char[] goldCharP65 = new char[RANGE]; + static char[] goldCharM128 = new char[RANGE]; + static char[] goldCharP128 = new char[RANGE]; + static char[] goldCharM129 = new char[RANGE]; + static char[] goldCharP129 = new char[RANGE]; + static char[] goldCharM192 = new char[RANGE]; + static char[] goldCharP192 = new char[RANGE]; + static byte[] goldByteP0 = new byte[RANGE]; + static byte[] goldByteM1 = new byte[RANGE]; + static byte[] goldByteP1 = new byte[RANGE]; + static byte[] goldByteM2 = new byte[RANGE]; + static byte[] goldByteP2 = new byte[RANGE]; + static byte[] goldByteM3 = new byte[RANGE]; + static byte[] goldByteP3 = new byte[RANGE]; + static byte[] goldByteM4 = new byte[RANGE]; + static byte[] goldByteP4 = new byte[RANGE]; + static byte[] goldByteM7 = new byte[RANGE]; + static byte[] goldByteP7 = new byte[RANGE]; + static byte[] goldByteM8 = new byte[RANGE]; + static byte[] goldByteP8 = new byte[RANGE]; + static byte[] goldByteM14 = new byte[RANGE]; + static byte[] goldByteP14 = new byte[RANGE]; + static byte[] goldByteM16 = new byte[RANGE]; + static byte[] goldByteP16 = new byte[RANGE]; + static byte[] goldByteM18 = new byte[RANGE]; + static byte[] goldByteP18 = new byte[RANGE]; + static byte[] goldByteM20 = new byte[RANGE]; + static byte[] goldByteP20 = new byte[RANGE]; + static byte[] goldByteM31 = new byte[RANGE]; + static byte[] goldByteP31 = new byte[RANGE]; + static byte[] goldByteM32 = new byte[RANGE]; + static byte[] goldByteP32 = new byte[RANGE]; + static byte[] goldByteM63 = new byte[RANGE]; + static byte[] goldByteP63 = new byte[RANGE]; + static byte[] goldByteM64 = new byte[RANGE]; + static byte[] goldByteP64 = new byte[RANGE]; + static byte[] goldByteM65 = new byte[RANGE]; + static byte[] goldByteP65 = new byte[RANGE]; + static byte[] goldByteM128 = new byte[RANGE]; + static byte[] goldByteP128 = new byte[RANGE]; + static byte[] goldByteM129 = new byte[RANGE]; + static byte[] goldByteP129 = new byte[RANGE]; + static byte[] goldByteM192 = new byte[RANGE]; + static byte[] goldByteP192 = new byte[RANGE]; + static float[] goldFloatP0 = new float[RANGE]; + static float[] goldFloatM1 = new float[RANGE]; + static float[] goldFloatP1 = new float[RANGE]; + static float[] goldFloatM2 = new float[RANGE]; + static float[] goldFloatP2 = new float[RANGE]; + static float[] goldFloatM3 = new float[RANGE]; + static float[] goldFloatP3 = new float[RANGE]; + static float[] goldFloatM4 = new float[RANGE]; + static float[] goldFloatP4 = new float[RANGE]; + static float[] goldFloatM7 = new float[RANGE]; + static float[] goldFloatP7 = new float[RANGE]; + static float[] goldFloatM8 = new float[RANGE]; + static float[] goldFloatP8 = new float[RANGE]; + static float[] goldFloatM14 = new float[RANGE]; + static float[] goldFloatP14 = new float[RANGE]; + static float[] goldFloatM16 = new float[RANGE]; + static float[] goldFloatP16 = new float[RANGE]; + static float[] goldFloatM18 = new float[RANGE]; + static float[] goldFloatP18 = new float[RANGE]; + static float[] goldFloatM20 = new float[RANGE]; + static float[] goldFloatP20 = new float[RANGE]; + static float[] goldFloatM31 = new float[RANGE]; + static float[] goldFloatP31 = new float[RANGE]; + static float[] goldFloatM32 = new float[RANGE]; + static float[] goldFloatP32 = new float[RANGE]; + static float[] goldFloatM63 = new float[RANGE]; + static float[] goldFloatP63 = new float[RANGE]; + static float[] goldFloatM64 = new float[RANGE]; + static float[] goldFloatP64 = new float[RANGE]; + static float[] goldFloatM65 = new float[RANGE]; + static float[] goldFloatP65 = new float[RANGE]; + static float[] goldFloatM128 = new float[RANGE]; + static float[] goldFloatP128 = new float[RANGE]; + static float[] goldFloatM129 = new float[RANGE]; + static float[] goldFloatP129 = new float[RANGE]; + static float[] goldFloatM192 = new float[RANGE]; + static float[] goldFloatP192 = new float[RANGE]; + static double[] goldDoubleP0 = new double[RANGE]; + static double[] goldDoubleM1 = new double[RANGE]; + static double[] goldDoubleP1 = new double[RANGE]; + static double[] goldDoubleM2 = new double[RANGE]; + static double[] goldDoubleP2 = new double[RANGE]; + static double[] goldDoubleM3 = new double[RANGE]; + static double[] goldDoubleP3 = new double[RANGE]; + static double[] goldDoubleM4 = new double[RANGE]; + static double[] goldDoubleP4 = new double[RANGE]; + static double[] goldDoubleM7 = new double[RANGE]; + static double[] goldDoubleP7 = new double[RANGE]; + static double[] goldDoubleM8 = new double[RANGE]; + static double[] goldDoubleP8 = new double[RANGE]; + static double[] goldDoubleM14 = new double[RANGE]; + static double[] goldDoubleP14 = new double[RANGE]; + static double[] goldDoubleM16 = new double[RANGE]; + static double[] goldDoubleP16 = new double[RANGE]; + static double[] goldDoubleM18 = new double[RANGE]; + static double[] goldDoubleP18 = new double[RANGE]; + static double[] goldDoubleM20 = new double[RANGE]; + static double[] goldDoubleP20 = new double[RANGE]; + static double[] goldDoubleM31 = new double[RANGE]; + static double[] goldDoubleP31 = new double[RANGE]; + static double[] goldDoubleM32 = new double[RANGE]; + static double[] goldDoubleP32 = new double[RANGE]; + static double[] goldDoubleM63 = new double[RANGE]; + static double[] goldDoubleP63 = new double[RANGE]; + static double[] goldDoubleM64 = new double[RANGE]; + static double[] goldDoubleP64 = new double[RANGE]; + static double[] goldDoubleM65 = new double[RANGE]; + static double[] goldDoubleP65 = new double[RANGE]; + static double[] goldDoubleM128 = new double[RANGE]; + static double[] goldDoubleP128 = new double[RANGE]; + static double[] goldDoubleM129 = new double[RANGE]; + static double[] goldDoubleP129 = new double[RANGE]; + static double[] goldDoubleM192 = new double[RANGE]; + static double[] goldDoubleP192 = new double[RANGE]; + + static { + // compute the gold standard in interpreter mode + init(goldIntP0); + testIntP0(goldIntP0); + init(goldIntM1); + testIntM1(goldIntM1); + init(goldIntP1); + testIntP1(goldIntP1); + init(goldIntM2); + testIntM2(goldIntM2); + init(goldIntP2); + testIntP2(goldIntP2); + init(goldIntM3); + testIntM3(goldIntM3); + init(goldIntP3); + testIntP3(goldIntP3); + init(goldIntM4); + testIntM4(goldIntM4); + init(goldIntP4); + testIntP4(goldIntP4); + init(goldIntM7); + testIntM7(goldIntM7); + init(goldIntP7); + testIntP7(goldIntP7); + init(goldIntM8); + testIntM8(goldIntM8); + init(goldIntP8); + testIntP8(goldIntP8); + init(goldIntM14); + testIntM14(goldIntM14); + init(goldIntP14); + testIntP14(goldIntP14); + init(goldIntM16); + testIntM16(goldIntM16); + init(goldIntP16); + testIntP16(goldIntP16); + init(goldIntM18); + testIntM18(goldIntM18); + init(goldIntP18); + testIntP18(goldIntP18); + init(goldIntM20); + testIntM20(goldIntM20); + init(goldIntP20); + testIntP20(goldIntP20); + init(goldIntM31); + testIntM31(goldIntM31); + init(goldIntP31); + testIntP31(goldIntP31); + init(goldIntM32); + testIntM32(goldIntM32); + init(goldIntP32); + testIntP32(goldIntP32); + init(goldIntM63); + testIntM63(goldIntM63); + init(goldIntP63); + testIntP63(goldIntP63); + init(goldIntM64); + testIntM64(goldIntM64); + init(goldIntP64); + testIntP64(goldIntP64); + init(goldIntM65); + testIntM65(goldIntM65); + init(goldIntP65); + testIntP65(goldIntP65); + init(goldIntM128); + testIntM128(goldIntM128); + init(goldIntP128); + testIntP128(goldIntP128); + init(goldIntM129); + testIntM129(goldIntM129); + init(goldIntP129); + testIntP129(goldIntP129); + init(goldIntM192); + testIntM192(goldIntM192); + init(goldIntP192); + testIntP192(goldIntP192); + init(goldLongP0); + testLongP0(goldLongP0); + init(goldLongM1); + testLongM1(goldLongM1); + init(goldLongP1); + testLongP1(goldLongP1); + init(goldLongM2); + testLongM2(goldLongM2); + init(goldLongP2); + testLongP2(goldLongP2); + init(goldLongM3); + testLongM3(goldLongM3); + init(goldLongP3); + testLongP3(goldLongP3); + init(goldLongM4); + testLongM4(goldLongM4); + init(goldLongP4); + testLongP4(goldLongP4); + init(goldLongM7); + testLongM7(goldLongM7); + init(goldLongP7); + testLongP7(goldLongP7); + init(goldLongM8); + testLongM8(goldLongM8); + init(goldLongP8); + testLongP8(goldLongP8); + init(goldLongM14); + testLongM14(goldLongM14); + init(goldLongP14); + testLongP14(goldLongP14); + init(goldLongM16); + testLongM16(goldLongM16); + init(goldLongP16); + testLongP16(goldLongP16); + init(goldLongM18); + testLongM18(goldLongM18); + init(goldLongP18); + testLongP18(goldLongP18); + init(goldLongM20); + testLongM20(goldLongM20); + init(goldLongP20); + testLongP20(goldLongP20); + init(goldLongM31); + testLongM31(goldLongM31); + init(goldLongP31); + testLongP31(goldLongP31); + init(goldLongM32); + testLongM32(goldLongM32); + init(goldLongP32); + testLongP32(goldLongP32); + init(goldLongM63); + testLongM63(goldLongM63); + init(goldLongP63); + testLongP63(goldLongP63); + init(goldLongM64); + testLongM64(goldLongM64); + init(goldLongP64); + testLongP64(goldLongP64); + init(goldLongM65); + testLongM65(goldLongM65); + init(goldLongP65); + testLongP65(goldLongP65); + init(goldLongM128); + testLongM128(goldLongM128); + init(goldLongP128); + testLongP128(goldLongP128); + init(goldLongM129); + testLongM129(goldLongM129); + init(goldLongP129); + testLongP129(goldLongP129); + init(goldLongM192); + testLongM192(goldLongM192); + init(goldLongP192); + testLongP192(goldLongP192); + init(goldShortP0); + testShortP0(goldShortP0); + init(goldShortM1); + testShortM1(goldShortM1); + init(goldShortP1); + testShortP1(goldShortP1); + init(goldShortM2); + testShortM2(goldShortM2); + init(goldShortP2); + testShortP2(goldShortP2); + init(goldShortM3); + testShortM3(goldShortM3); + init(goldShortP3); + testShortP3(goldShortP3); + init(goldShortM4); + testShortM4(goldShortM4); + init(goldShortP4); + testShortP4(goldShortP4); + init(goldShortM7); + testShortM7(goldShortM7); + init(goldShortP7); + testShortP7(goldShortP7); + init(goldShortM8); + testShortM8(goldShortM8); + init(goldShortP8); + testShortP8(goldShortP8); + init(goldShortM14); + testShortM14(goldShortM14); + init(goldShortP14); + testShortP14(goldShortP14); + init(goldShortM16); + testShortM16(goldShortM16); + init(goldShortP16); + testShortP16(goldShortP16); + init(goldShortM18); + testShortM18(goldShortM18); + init(goldShortP18); + testShortP18(goldShortP18); + init(goldShortM20); + testShortM20(goldShortM20); + init(goldShortP20); + testShortP20(goldShortP20); + init(goldShortM31); + testShortM31(goldShortM31); + init(goldShortP31); + testShortP31(goldShortP31); + init(goldShortM32); + testShortM32(goldShortM32); + init(goldShortP32); + testShortP32(goldShortP32); + init(goldShortM63); + testShortM63(goldShortM63); + init(goldShortP63); + testShortP63(goldShortP63); + init(goldShortM64); + testShortM64(goldShortM64); + init(goldShortP64); + testShortP64(goldShortP64); + init(goldShortM65); + testShortM65(goldShortM65); + init(goldShortP65); + testShortP65(goldShortP65); + init(goldShortM128); + testShortM128(goldShortM128); + init(goldShortP128); + testShortP128(goldShortP128); + init(goldShortM129); + testShortM129(goldShortM129); + init(goldShortP129); + testShortP129(goldShortP129); + init(goldShortM192); + testShortM192(goldShortM192); + init(goldShortP192); + testShortP192(goldShortP192); + init(goldCharP0); + testCharP0(goldCharP0); + init(goldCharM1); + testCharM1(goldCharM1); + init(goldCharP1); + testCharP1(goldCharP1); + init(goldCharM2); + testCharM2(goldCharM2); + init(goldCharP2); + testCharP2(goldCharP2); + init(goldCharM3); + testCharM3(goldCharM3); + init(goldCharP3); + testCharP3(goldCharP3); + init(goldCharM4); + testCharM4(goldCharM4); + init(goldCharP4); + testCharP4(goldCharP4); + init(goldCharM7); + testCharM7(goldCharM7); + init(goldCharP7); + testCharP7(goldCharP7); + init(goldCharM8); + testCharM8(goldCharM8); + init(goldCharP8); + testCharP8(goldCharP8); + init(goldCharM14); + testCharM14(goldCharM14); + init(goldCharP14); + testCharP14(goldCharP14); + init(goldCharM16); + testCharM16(goldCharM16); + init(goldCharP16); + testCharP16(goldCharP16); + init(goldCharM18); + testCharM18(goldCharM18); + init(goldCharP18); + testCharP18(goldCharP18); + init(goldCharM20); + testCharM20(goldCharM20); + init(goldCharP20); + testCharP20(goldCharP20); + init(goldCharM31); + testCharM31(goldCharM31); + init(goldCharP31); + testCharP31(goldCharP31); + init(goldCharM32); + testCharM32(goldCharM32); + init(goldCharP32); + testCharP32(goldCharP32); + init(goldCharM63); + testCharM63(goldCharM63); + init(goldCharP63); + testCharP63(goldCharP63); + init(goldCharM64); + testCharM64(goldCharM64); + init(goldCharP64); + testCharP64(goldCharP64); + init(goldCharM65); + testCharM65(goldCharM65); + init(goldCharP65); + testCharP65(goldCharP65); + init(goldCharM128); + testCharM128(goldCharM128); + init(goldCharP128); + testCharP128(goldCharP128); + init(goldCharM129); + testCharM129(goldCharM129); + init(goldCharP129); + testCharP129(goldCharP129); + init(goldCharM192); + testCharM192(goldCharM192); + init(goldCharP192); + testCharP192(goldCharP192); + init(goldByteP0); + testByteP0(goldByteP0); + init(goldByteM1); + testByteM1(goldByteM1); + init(goldByteP1); + testByteP1(goldByteP1); + init(goldByteM2); + testByteM2(goldByteM2); + init(goldByteP2); + testByteP2(goldByteP2); + init(goldByteM3); + testByteM3(goldByteM3); + init(goldByteP3); + testByteP3(goldByteP3); + init(goldByteM4); + testByteM4(goldByteM4); + init(goldByteP4); + testByteP4(goldByteP4); + init(goldByteM7); + testByteM7(goldByteM7); + init(goldByteP7); + testByteP7(goldByteP7); + init(goldByteM8); + testByteM8(goldByteM8); + init(goldByteP8); + testByteP8(goldByteP8); + init(goldByteM14); + testByteM14(goldByteM14); + init(goldByteP14); + testByteP14(goldByteP14); + init(goldByteM16); + testByteM16(goldByteM16); + init(goldByteP16); + testByteP16(goldByteP16); + init(goldByteM18); + testByteM18(goldByteM18); + init(goldByteP18); + testByteP18(goldByteP18); + init(goldByteM20); + testByteM20(goldByteM20); + init(goldByteP20); + testByteP20(goldByteP20); + init(goldByteM31); + testByteM31(goldByteM31); + init(goldByteP31); + testByteP31(goldByteP31); + init(goldByteM32); + testByteM32(goldByteM32); + init(goldByteP32); + testByteP32(goldByteP32); + init(goldByteM63); + testByteM63(goldByteM63); + init(goldByteP63); + testByteP63(goldByteP63); + init(goldByteM64); + testByteM64(goldByteM64); + init(goldByteP64); + testByteP64(goldByteP64); + init(goldByteM65); + testByteM65(goldByteM65); + init(goldByteP65); + testByteP65(goldByteP65); + init(goldByteM128); + testByteM128(goldByteM128); + init(goldByteP128); + testByteP128(goldByteP128); + init(goldByteM129); + testByteM129(goldByteM129); + init(goldByteP129); + testByteP129(goldByteP129); + init(goldByteM192); + testByteM192(goldByteM192); + init(goldByteP192); + testByteP192(goldByteP192); + init(goldFloatP0); + testFloatP0(goldFloatP0); + init(goldFloatM1); + testFloatM1(goldFloatM1); + init(goldFloatP1); + testFloatP1(goldFloatP1); + init(goldFloatM2); + testFloatM2(goldFloatM2); + init(goldFloatP2); + testFloatP2(goldFloatP2); + init(goldFloatM3); + testFloatM3(goldFloatM3); + init(goldFloatP3); + testFloatP3(goldFloatP3); + init(goldFloatM4); + testFloatM4(goldFloatM4); + init(goldFloatP4); + testFloatP4(goldFloatP4); + init(goldFloatM7); + testFloatM7(goldFloatM7); + init(goldFloatP7); + testFloatP7(goldFloatP7); + init(goldFloatM8); + testFloatM8(goldFloatM8); + init(goldFloatP8); + testFloatP8(goldFloatP8); + init(goldFloatM14); + testFloatM14(goldFloatM14); + init(goldFloatP14); + testFloatP14(goldFloatP14); + init(goldFloatM16); + testFloatM16(goldFloatM16); + init(goldFloatP16); + testFloatP16(goldFloatP16); + init(goldFloatM18); + testFloatM18(goldFloatM18); + init(goldFloatP18); + testFloatP18(goldFloatP18); + init(goldFloatM20); + testFloatM20(goldFloatM20); + init(goldFloatP20); + testFloatP20(goldFloatP20); + init(goldFloatM31); + testFloatM31(goldFloatM31); + init(goldFloatP31); + testFloatP31(goldFloatP31); + init(goldFloatM32); + testFloatM32(goldFloatM32); + init(goldFloatP32); + testFloatP32(goldFloatP32); + init(goldFloatM63); + testFloatM63(goldFloatM63); + init(goldFloatP63); + testFloatP63(goldFloatP63); + init(goldFloatM64); + testFloatM64(goldFloatM64); + init(goldFloatP64); + testFloatP64(goldFloatP64); + init(goldFloatM65); + testFloatM65(goldFloatM65); + init(goldFloatP65); + testFloatP65(goldFloatP65); + init(goldFloatM128); + testFloatM128(goldFloatM128); + init(goldFloatP128); + testFloatP128(goldFloatP128); + init(goldFloatM129); + testFloatM129(goldFloatM129); + init(goldFloatP129); + testFloatP129(goldFloatP129); + init(goldFloatM192); + testFloatM192(goldFloatM192); + init(goldFloatP192); + testFloatP192(goldFloatP192); + init(goldDoubleP0); + testDoubleP0(goldDoubleP0); + init(goldDoubleM1); + testDoubleM1(goldDoubleM1); + init(goldDoubleP1); + testDoubleP1(goldDoubleP1); + init(goldDoubleM2); + testDoubleM2(goldDoubleM2); + init(goldDoubleP2); + testDoubleP2(goldDoubleP2); + init(goldDoubleM3); + testDoubleM3(goldDoubleM3); + init(goldDoubleP3); + testDoubleP3(goldDoubleP3); + init(goldDoubleM4); + testDoubleM4(goldDoubleM4); + init(goldDoubleP4); + testDoubleP4(goldDoubleP4); + init(goldDoubleM7); + testDoubleM7(goldDoubleM7); + init(goldDoubleP7); + testDoubleP7(goldDoubleP7); + init(goldDoubleM8); + testDoubleM8(goldDoubleM8); + init(goldDoubleP8); + testDoubleP8(goldDoubleP8); + init(goldDoubleM14); + testDoubleM14(goldDoubleM14); + init(goldDoubleP14); + testDoubleP14(goldDoubleP14); + init(goldDoubleM16); + testDoubleM16(goldDoubleM16); + init(goldDoubleP16); + testDoubleP16(goldDoubleP16); + init(goldDoubleM18); + testDoubleM18(goldDoubleM18); + init(goldDoubleP18); + testDoubleP18(goldDoubleP18); + init(goldDoubleM20); + testDoubleM20(goldDoubleM20); + init(goldDoubleP20); + testDoubleP20(goldDoubleP20); + init(goldDoubleM31); + testDoubleM31(goldDoubleM31); + init(goldDoubleP31); + testDoubleP31(goldDoubleP31); + init(goldDoubleM32); + testDoubleM32(goldDoubleM32); + init(goldDoubleP32); + testDoubleP32(goldDoubleP32); + init(goldDoubleM63); + testDoubleM63(goldDoubleM63); + init(goldDoubleP63); + testDoubleP63(goldDoubleP63); + init(goldDoubleM64); + testDoubleM64(goldDoubleM64); + init(goldDoubleP64); + testDoubleP64(goldDoubleP64); + init(goldDoubleM65); + testDoubleM65(goldDoubleM65); + init(goldDoubleP65); + testDoubleP65(goldDoubleP65); + init(goldDoubleM128); + testDoubleM128(goldDoubleM128); + init(goldDoubleP128); + testDoubleP128(goldDoubleP128); + init(goldDoubleM129); + testDoubleM129(goldDoubleM129); + init(goldDoubleP129); + testDoubleP129(goldDoubleP129); + init(goldDoubleM192); + testDoubleM192(goldDoubleM192); + init(goldDoubleP192); + testDoubleP192(goldDoubleP192); + } + + public static void main(String args[]) { + TestFramework framework = new TestFramework(TestDependencyOffsets.class); + framework.addFlags("-XX:-TieredCompilation", + "-XX:CompileCommand=option,compiler.loopopts.superword.TestDependencyOffsets::test*,Vectorize", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestDependencyOffsets::init", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestDependencyOffsets::test*", + "-XX:CompileCommand=compileonly,compiler.loopopts.superword.TestDependencyOffsets::verify", + "-XX:LoopUnrollLimit=250"); + + if (args.length != 1) { + throw new RuntimeException("Test requires exactly one argument!"); + } + + switch (args[0]) { + case "vanilla-A": + framework.addFlags("-XX:+AlignVector"); + break; + case "vanilla-U": + framework.addFlags("-XX:-AlignVector"); + break; + case "sse4-v016-A": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=16", "-XX:+AlignVector"); + break; + case "sse4-v016-U": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=16", "-XX:-AlignVector"); + break; + case "sse4-v008-A": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=8", "-XX:+AlignVector"); + break; + case "sse4-v008-U": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=8", "-XX:-AlignVector"); + break; + case "sse4-v004-A": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=4", "-XX:+AlignVector"); + break; + case "sse4-v004-U": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=4", "-XX:-AlignVector"); + break; + case "sse4-v002-A": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=4", "-XX:+AlignVector"); + break; + case "sse4-v002-U": + framework.addFlags("-XX:UseSSE=4", "-XX:MaxVectorSize=4", "-XX:-AlignVector"); + break; + case "avx1-v032-A": + framework.addFlags("-XX:UseAVX=1", "-XX:MaxVectorSize=32", "-XX:+AlignVector"); + break; + case "avx1-v032-U": + framework.addFlags("-XX:UseAVX=1", "-XX:MaxVectorSize=32", "-XX:-AlignVector"); + break; + case "avx1-v016-A": + framework.addFlags("-XX:UseAVX=1", "-XX:MaxVectorSize=16", "-XX:+AlignVector"); + break; + case "avx1-v016-U": + framework.addFlags("-XX:UseAVX=1", "-XX:MaxVectorSize=16", "-XX:-AlignVector"); + break; + case "avx2-v032-A": + framework.addFlags("-XX:UseAVX=2", "-XX:MaxVectorSize=32", "-XX:+AlignVector"); + break; + case "avx2-v032-U": + framework.addFlags("-XX:UseAVX=2", "-XX:MaxVectorSize=32", "-XX:-AlignVector"); + break; + case "avx2-v016-A": + framework.addFlags("-XX:UseAVX=2", "-XX:MaxVectorSize=16", "-XX:+AlignVector"); + break; + case "avx2-v016-U": + framework.addFlags("-XX:UseAVX=2", "-XX:MaxVectorSize=16", "-XX:-AlignVector"); + break; + case "avx512-v064-A": + framework.addFlags("-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=64", "-XX:+AlignVector"); + break; + case "avx512-v064-U": + framework.addFlags("-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=64", "-XX:-AlignVector"); + break; + case "avx512-v032-A": + framework.addFlags("-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=32", "-XX:+AlignVector"); + break; + case "avx512-v032-U": + framework.addFlags("-XX:UseAVX=3", "-XX:+UseKNLSetting", "-XX:MaxVectorSize=32", "-XX:-AlignVector"); + break; + case "avx512bw-v064-A": + framework.addFlags("-XX:UseAVX=3", "-XX:MaxVectorSize=64", "-XX:+AlignVector"); + break; + case "avx512bw-v064-U": + framework.addFlags("-XX:UseAVX=3", "-XX:MaxVectorSize=64", "-XX:-AlignVector"); + break; + case "avx512bw-v032-A": + framework.addFlags("-XX:UseAVX=3", "-XX:MaxVectorSize=32", "-XX:+AlignVector"); + break; + case "avx512bw-v032-U": + framework.addFlags("-XX:UseAVX=3", "-XX:MaxVectorSize=32", "-XX:-AlignVector"); + break; + case "vec-v064-A": + framework.addFlags("-XX:MaxVectorSize=64", "-XX:+AlignVector"); + break; + case "vec-v064-U": + framework.addFlags("-XX:MaxVectorSize=64", "-XX:-AlignVector"); + break; + case "vec-v032-A": + framework.addFlags("-XX:MaxVectorSize=32", "-XX:+AlignVector"); + break; + case "vec-v032-U": + framework.addFlags("-XX:MaxVectorSize=32", "-XX:-AlignVector"); + break; + case "vec-v016-A": + framework.addFlags("-XX:MaxVectorSize=16", "-XX:+AlignVector"); + break; + case "vec-v016-U": + framework.addFlags("-XX:MaxVectorSize=16", "-XX:-AlignVector"); + break; + case "vec-v008-A": + framework.addFlags("-XX:MaxVectorSize=8", "-XX:+AlignVector"); + break; + case "vec-v008-U": + framework.addFlags("-XX:MaxVectorSize=8", "-XX:-AlignVector"); + break; + case "vec-v004-A": + framework.addFlags("-XX:MaxVectorSize=4", "-XX:+AlignVector"); + break; + case "vec-v004-U": + framework.addFlags("-XX:MaxVectorSize=4", "-XX:-AlignVector"); + break; + default: + throw new RuntimeException("Test argument not recognized: " + args[0]); + } + framework.start(); + } + + // ------------------- Tests ------------------- + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP0(int[] data) { + for (int j = 0; j < RANGE; j++) { + data[j + 0] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP0") + @Warmup(0) + public static void runIntP0() { + int[] data = new int[RANGE]; + init(data); + testIntP0(data); + verify("testIntP0", data, goldIntP0); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM1(int[] data) { + for (int j = 1; j < RANGE; j++) { + data[j + -1] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM1") + @Warmup(0) + public static void runIntM1() { + int[] data = new int[RANGE]; + init(data); + testIntM1(data); + verify("testIntM1", data, goldIntM1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testIntP1(int[] data) { + for (int j = 0; j < RANGE - 1; j++) { + data[j + 1] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP1") + @Warmup(0) + public static void runIntP1() { + int[] data = new int[RANGE]; + init(data); + testIntP1(data); + verify("testIntP1", data, goldIntP1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM2(int[] data) { + for (int j = 2; j < RANGE; j++) { + data[j + -2] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM2") + @Warmup(0) + public static void runIntM2() { + int[] data = new int[RANGE]; + init(data); + testIntM2(data); + verify("testIntM2", data, goldIntM2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP2(int[] data) { + for (int j = 0; j < RANGE - 2; j++) { + data[j + 2] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP2") + @Warmup(0) + public static void runIntP2() { + int[] data = new int[RANGE]; + init(data); + testIntP2(data); + verify("testIntP2", data, goldIntP2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM3(int[] data) { + for (int j = 3; j < RANGE; j++) { + data[j + -3] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM3") + @Warmup(0) + public static void runIntM3() { + int[] data = new int[RANGE]; + init(data); + testIntM3(data); + verify("testIntM3", data, goldIntM3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP3(int[] data) { + for (int j = 0; j < RANGE - 3; j++) { + data[j + 3] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP3") + @Warmup(0) + public static void runIntP3() { + int[] data = new int[RANGE]; + init(data); + testIntP3(data); + verify("testIntP3", data, goldIntP3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM4(int[] data) { + for (int j = 4; j < RANGE; j++) { + data[j + -4] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM4") + @Warmup(0) + public static void runIntM4() { + int[] data = new int[RANGE]; + init(data); + testIntM4(data); + verify("testIntM4", data, goldIntM4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP4(int[] data) { + for (int j = 0; j < RANGE - 4; j++) { + data[j + 4] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP4") + @Warmup(0) + public static void runIntP4() { + int[] data = new int[RANGE]; + init(data); + testIntP4(data); + verify("testIntP4", data, goldIntP4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM7(int[] data) { + for (int j = 7; j < RANGE; j++) { + data[j + -7] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM7") + @Warmup(0) + public static void runIntM7() { + int[] data = new int[RANGE]; + init(data); + testIntM7(data); + verify("testIntM7", data, goldIntM7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP7(int[] data) { + for (int j = 0; j < RANGE - 7; j++) { + data[j + 7] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP7") + @Warmup(0) + public static void runIntP7() { + int[] data = new int[RANGE]; + init(data); + testIntP7(data); + verify("testIntP7", data, goldIntP7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM8(int[] data) { + for (int j = 8; j < RANGE; j++) { + data[j + -8] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM8") + @Warmup(0) + public static void runIntM8() { + int[] data = new int[RANGE]; + init(data); + testIntM8(data); + verify("testIntM8", data, goldIntM8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP8(int[] data) { + for (int j = 0; j < RANGE - 8; j++) { + data[j + 8] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP8") + @Warmup(0) + public static void runIntP8() { + int[] data = new int[RANGE]; + init(data); + testIntP8(data); + verify("testIntP8", data, goldIntP8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM14(int[] data) { + for (int j = 14; j < RANGE; j++) { + data[j + -14] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM14") + @Warmup(0) + public static void runIntM14() { + int[] data = new int[RANGE]; + init(data); + testIntM14(data); + verify("testIntM14", data, goldIntM14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP14(int[] data) { + for (int j = 0; j < RANGE - 14; j++) { + data[j + 14] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP14") + @Warmup(0) + public static void runIntP14() { + int[] data = new int[RANGE]; + init(data); + testIntP14(data); + verify("testIntP14", data, goldIntP14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM16(int[] data) { + for (int j = 16; j < RANGE; j++) { + data[j + -16] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM16") + @Warmup(0) + public static void runIntM16() { + int[] data = new int[RANGE]; + init(data); + testIntM16(data); + verify("testIntM16", data, goldIntM16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP16(int[] data) { + for (int j = 0; j < RANGE - 16; j++) { + data[j + 16] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP16") + @Warmup(0) + public static void runIntP16() { + int[] data = new int[RANGE]; + init(data); + testIntP16(data); + verify("testIntP16", data, goldIntP16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM18(int[] data) { + for (int j = 18; j < RANGE; j++) { + data[j + -18] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM18") + @Warmup(0) + public static void runIntM18() { + int[] data = new int[RANGE]; + init(data); + testIntM18(data); + verify("testIntM18", data, goldIntM18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP18(int[] data) { + for (int j = 0; j < RANGE - 18; j++) { + data[j + 18] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP18") + @Warmup(0) + public static void runIntP18() { + int[] data = new int[RANGE]; + init(data); + testIntP18(data); + verify("testIntP18", data, goldIntP18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM20(int[] data) { + for (int j = 20; j < RANGE; j++) { + data[j + -20] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM20") + @Warmup(0) + public static void runIntM20() { + int[] data = new int[RANGE]; + init(data); + testIntM20(data); + verify("testIntM20", data, goldIntM20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP20(int[] data) { + for (int j = 0; j < RANGE - 20; j++) { + data[j + 20] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP20") + @Warmup(0) + public static void runIntP20() { + int[] data = new int[RANGE]; + init(data); + testIntP20(data); + verify("testIntP20", data, goldIntP20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM31(int[] data) { + for (int j = 31; j < RANGE; j++) { + data[j + -31] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM31") + @Warmup(0) + public static void runIntM31() { + int[] data = new int[RANGE]; + init(data); + testIntM31(data); + verify("testIntM31", data, goldIntM31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP31(int[] data) { + for (int j = 0; j < RANGE - 31; j++) { + data[j + 31] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP31") + @Warmup(0) + public static void runIntP31() { + int[] data = new int[RANGE]; + init(data); + testIntP31(data); + verify("testIntP31", data, goldIntP31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM32(int[] data) { + for (int j = 32; j < RANGE; j++) { + data[j + -32] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM32") + @Warmup(0) + public static void runIntM32() { + int[] data = new int[RANGE]; + init(data); + testIntM32(data); + verify("testIntM32", data, goldIntM32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP32(int[] data) { + for (int j = 0; j < RANGE - 32; j++) { + data[j + 32] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP32") + @Warmup(0) + public static void runIntP32() { + int[] data = new int[RANGE]; + init(data); + testIntP32(data); + verify("testIntP32", data, goldIntP32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM63(int[] data) { + for (int j = 63; j < RANGE; j++) { + data[j + -63] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM63") + @Warmup(0) + public static void runIntM63() { + int[] data = new int[RANGE]; + init(data); + testIntM63(data); + verify("testIntM63", data, goldIntM63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP63(int[] data) { + for (int j = 0; j < RANGE - 63; j++) { + data[j + 63] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP63") + @Warmup(0) + public static void runIntP63() { + int[] data = new int[RANGE]; + init(data); + testIntP63(data); + verify("testIntP63", data, goldIntP63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM64(int[] data) { + for (int j = 64; j < RANGE; j++) { + data[j + -64] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM64") + @Warmup(0) + public static void runIntM64() { + int[] data = new int[RANGE]; + init(data); + testIntM64(data); + verify("testIntM64", data, goldIntM64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP64(int[] data) { + for (int j = 0; j < RANGE - 64; j++) { + data[j + 64] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP64") + @Warmup(0) + public static void runIntP64() { + int[] data = new int[RANGE]; + init(data); + testIntP64(data); + verify("testIntP64", data, goldIntP64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM65(int[] data) { + for (int j = 65; j < RANGE; j++) { + data[j + -65] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM65") + @Warmup(0) + public static void runIntM65() { + int[] data = new int[RANGE]; + init(data); + testIntM65(data); + verify("testIntM65", data, goldIntM65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP65(int[] data) { + for (int j = 0; j < RANGE - 65; j++) { + data[j + 65] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP65") + @Warmup(0) + public static void runIntP65() { + int[] data = new int[RANGE]; + init(data); + testIntP65(data); + verify("testIntP65", data, goldIntP65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM128(int[] data) { + for (int j = 128; j < RANGE; j++) { + data[j + -128] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM128") + @Warmup(0) + public static void runIntM128() { + int[] data = new int[RANGE]; + init(data); + testIntM128(data); + verify("testIntM128", data, goldIntM128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP128(int[] data) { + for (int j = 0; j < RANGE - 128; j++) { + data[j + 128] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP128") + @Warmup(0) + public static void runIntP128() { + int[] data = new int[RANGE]; + init(data); + testIntP128(data); + verify("testIntP128", data, goldIntP128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM129(int[] data) { + for (int j = 129; j < RANGE; j++) { + data[j + -129] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM129") + @Warmup(0) + public static void runIntM129() { + int[] data = new int[RANGE]; + init(data); + testIntM129(data); + verify("testIntM129", data, goldIntM129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP129(int[] data) { + for (int j = 0; j < RANGE - 129; j++) { + data[j + 129] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP129") + @Warmup(0) + public static void runIntP129() { + int[] data = new int[RANGE]; + init(data); + testIntP129(data); + verify("testIntP129", data, goldIntP129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntM192(int[] data) { + for (int j = 192; j < RANGE; j++) { + data[j + -192] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntM192") + @Warmup(0) + public static void runIntM192() { + int[] data = new int[RANGE]; + init(data); + testIntM192(data); + verify("testIntM192", data, goldIntM192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testIntP192(int[] data) { + for (int j = 0; j < RANGE - 192; j++) { + data[j + 192] = (int)(data[j] * (int)-11); + } + } + + @Run(test = "testIntP192") + @Warmup(0) + public static void runIntP192() { + int[] data = new int[RANGE]; + init(data); + testIntP192(data); + verify("testIntP192", data, goldIntP192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP0(long[] data) { + for (int j = 0; j < RANGE; j++) { + data[j + 0] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP0") + @Warmup(0) + public static void runLongP0() { + long[] data = new long[RANGE]; + init(data); + testLongP0(data); + verify("testLongP0", data, goldLongP0); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM1(long[] data) { + for (int j = 1; j < RANGE; j++) { + data[j + -1] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM1") + @Warmup(0) + public static void runLongM1() { + long[] data = new long[RANGE]; + init(data); + testLongM1(data); + verify("testLongM1", data, goldLongM1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testLongP1(long[] data) { + for (int j = 0; j < RANGE - 1; j++) { + data[j + 1] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP1") + @Warmup(0) + public static void runLongP1() { + long[] data = new long[RANGE]; + init(data); + testLongP1(data); + verify("testLongP1", data, goldLongP1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM2(long[] data) { + for (int j = 2; j < RANGE; j++) { + data[j + -2] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM2") + @Warmup(0) + public static void runLongM2() { + long[] data = new long[RANGE]; + init(data); + testLongM2(data); + verify("testLongM2", data, goldLongM2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP2(long[] data) { + for (int j = 0; j < RANGE - 2; j++) { + data[j + 2] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP2") + @Warmup(0) + public static void runLongP2() { + long[] data = new long[RANGE]; + init(data); + testLongP2(data); + verify("testLongP2", data, goldLongP2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM3(long[] data) { + for (int j = 3; j < RANGE; j++) { + data[j + -3] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM3") + @Warmup(0) + public static void runLongM3() { + long[] data = new long[RANGE]; + init(data); + testLongM3(data); + verify("testLongM3", data, goldLongM3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 24 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 24 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 24 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP3(long[] data) { + for (int j = 0; j < RANGE - 3; j++) { + data[j + 3] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP3") + @Warmup(0) + public static void runLongP3() { + long[] data = new long[RANGE]; + init(data); + testLongP3(data); + verify("testLongP3", data, goldLongP3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM4(long[] data) { + for (int j = 4; j < RANGE; j++) { + data[j + -4] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM4") + @Warmup(0) + public static void runLongM4() { + long[] data = new long[RANGE]; + init(data); + testLongM4(data); + verify("testLongM4", data, goldLongM4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP4(long[] data) { + for (int j = 0; j < RANGE - 4; j++) { + data[j + 4] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP4") + @Warmup(0) + public static void runLongP4() { + long[] data = new long[RANGE]; + init(data); + testLongP4(data); + verify("testLongP4", data, goldLongP4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM7(long[] data) { + for (int j = 7; j < RANGE; j++) { + data[j + -7] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM7") + @Warmup(0) + public static void runLongM7() { + long[] data = new long[RANGE]; + init(data); + testLongM7(data); + verify("testLongM7", data, goldLongM7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP7(long[] data) { + for (int j = 0; j < RANGE - 7; j++) { + data[j + 7] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP7") + @Warmup(0) + public static void runLongP7() { + long[] data = new long[RANGE]; + init(data); + testLongP7(data); + verify("testLongP7", data, goldLongP7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM8(long[] data) { + for (int j = 8; j < RANGE; j++) { + data[j + -8] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM8") + @Warmup(0) + public static void runLongM8() { + long[] data = new long[RANGE]; + init(data); + testLongM8(data); + verify("testLongM8", data, goldLongM8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP8(long[] data) { + for (int j = 0; j < RANGE - 8; j++) { + data[j + 8] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP8") + @Warmup(0) + public static void runLongP8() { + long[] data = new long[RANGE]; + init(data); + testLongP8(data); + verify("testLongP8", data, goldLongP8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM14(long[] data) { + for (int j = 14; j < RANGE; j++) { + data[j + -14] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM14") + @Warmup(0) + public static void runLongM14() { + long[] data = new long[RANGE]; + init(data); + testLongM14(data); + verify("testLongM14", data, goldLongM14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP14(long[] data) { + for (int j = 0; j < RANGE - 14; j++) { + data[j + 14] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP14") + @Warmup(0) + public static void runLongP14() { + long[] data = new long[RANGE]; + init(data); + testLongP14(data); + verify("testLongP14", data, goldLongP14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM16(long[] data) { + for (int j = 16; j < RANGE; j++) { + data[j + -16] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM16") + @Warmup(0) + public static void runLongM16() { + long[] data = new long[RANGE]; + init(data); + testLongM16(data); + verify("testLongM16", data, goldLongM16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP16(long[] data) { + for (int j = 0; j < RANGE - 16; j++) { + data[j + 16] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP16") + @Warmup(0) + public static void runLongP16() { + long[] data = new long[RANGE]; + init(data); + testLongP16(data); + verify("testLongP16", data, goldLongP16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM18(long[] data) { + for (int j = 18; j < RANGE; j++) { + data[j + -18] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM18") + @Warmup(0) + public static void runLongM18() { + long[] data = new long[RANGE]; + init(data); + testLongM18(data); + verify("testLongM18", data, goldLongM18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP18(long[] data) { + for (int j = 0; j < RANGE - 18; j++) { + data[j + 18] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP18") + @Warmup(0) + public static void runLongP18() { + long[] data = new long[RANGE]; + init(data); + testLongP18(data); + verify("testLongP18", data, goldLongP18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM20(long[] data) { + for (int j = 20; j < RANGE; j++) { + data[j + -20] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM20") + @Warmup(0) + public static void runLongM20() { + long[] data = new long[RANGE]; + init(data); + testLongM20(data); + verify("testLongM20", data, goldLongM20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP20(long[] data) { + for (int j = 0; j < RANGE - 20; j++) { + data[j + 20] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP20") + @Warmup(0) + public static void runLongP20() { + long[] data = new long[RANGE]; + init(data); + testLongP20(data); + verify("testLongP20", data, goldLongP20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM31(long[] data) { + for (int j = 31; j < RANGE; j++) { + data[j + -31] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM31") + @Warmup(0) + public static void runLongM31() { + long[] data = new long[RANGE]; + init(data); + testLongM31(data); + verify("testLongM31", data, goldLongM31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP31(long[] data) { + for (int j = 0; j < RANGE - 31; j++) { + data[j + 31] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP31") + @Warmup(0) + public static void runLongP31() { + long[] data = new long[RANGE]; + init(data); + testLongP31(data); + verify("testLongP31", data, goldLongP31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM32(long[] data) { + for (int j = 32; j < RANGE; j++) { + data[j + -32] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM32") + @Warmup(0) + public static void runLongM32() { + long[] data = new long[RANGE]; + init(data); + testLongM32(data); + verify("testLongM32", data, goldLongM32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP32(long[] data) { + for (int j = 0; j < RANGE - 32; j++) { + data[j + 32] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP32") + @Warmup(0) + public static void runLongP32() { + long[] data = new long[RANGE]; + init(data); + testLongP32(data); + verify("testLongP32", data, goldLongP32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM63(long[] data) { + for (int j = 63; j < RANGE; j++) { + data[j + -63] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM63") + @Warmup(0) + public static void runLongM63() { + long[] data = new long[RANGE]; + init(data); + testLongM63(data); + verify("testLongM63", data, goldLongM63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP63(long[] data) { + for (int j = 0; j < RANGE - 63; j++) { + data[j + 63] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP63") + @Warmup(0) + public static void runLongP63() { + long[] data = new long[RANGE]; + init(data); + testLongP63(data); + verify("testLongP63", data, goldLongP63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM64(long[] data) { + for (int j = 64; j < RANGE; j++) { + data[j + -64] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM64") + @Warmup(0) + public static void runLongM64() { + long[] data = new long[RANGE]; + init(data); + testLongM64(data); + verify("testLongM64", data, goldLongM64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP64(long[] data) { + for (int j = 0; j < RANGE - 64; j++) { + data[j + 64] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP64") + @Warmup(0) + public static void runLongP64() { + long[] data = new long[RANGE]; + init(data); + testLongP64(data); + verify("testLongP64", data, goldLongP64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM65(long[] data) { + for (int j = 65; j < RANGE; j++) { + data[j + -65] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM65") + @Warmup(0) + public static void runLongM65() { + long[] data = new long[RANGE]; + init(data); + testLongM65(data); + verify("testLongM65", data, goldLongM65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP65(long[] data) { + for (int j = 0; j < RANGE - 65; j++) { + data[j + 65] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP65") + @Warmup(0) + public static void runLongP65() { + long[] data = new long[RANGE]; + init(data); + testLongP65(data); + verify("testLongP65", data, goldLongP65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM128(long[] data) { + for (int j = 128; j < RANGE; j++) { + data[j + -128] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM128") + @Warmup(0) + public static void runLongM128() { + long[] data = new long[RANGE]; + init(data); + testLongM128(data); + verify("testLongM128", data, goldLongM128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP128(long[] data) { + for (int j = 0; j < RANGE - 128; j++) { + data[j + 128] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP128") + @Warmup(0) + public static void runLongP128() { + long[] data = new long[RANGE]; + init(data); + testLongP128(data); + verify("testLongP128", data, goldLongP128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.ADD_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM129(long[] data) { + for (int j = 129; j < RANGE; j++) { + data[j + -129] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM129") + @Warmup(0) + public static void runLongM129() { + long[] data = new long[RANGE]; + init(data); + testLongM129(data); + verify("testLongM129", data, goldLongM129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP129(long[] data) { + for (int j = 0; j < RANGE - 129; j++) { + data[j + 129] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP129") + @Warmup(0) + public static void runLongP129() { + long[] data = new long[RANGE]; + init(data); + testLongP129(data); + verify("testLongP129", data, goldLongP129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongM192(long[] data) { + for (int j = 192; j < RANGE; j++) { + data[j + -192] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongM192") + @Warmup(0) + public static void runLongM192() { + long[] data = new long[RANGE]; + init(data); + testLongM192(data); + verify("testLongM192", data, goldLongM192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testLongP192(long[] data) { + for (int j = 0; j < RANGE - 192; j++) { + data[j + 192] = (long)(data[j] + (long)-11); + } + } + + @Run(test = "testLongP192") + @Warmup(0) + public static void runLongP192() { + long[] data = new long[RANGE]; + init(data); + testLongP192(data); + verify("testLongP192", data, goldLongP192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP0(short[] data) { + for (int j = 0; j < RANGE; j++) { + data[j + 0] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP0") + @Warmup(0) + public static void runShortP0() { + short[] data = new short[RANGE]; + init(data); + testShortP0(data); + verify("testShortP0", data, goldShortP0); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM1(short[] data) { + for (int j = 1; j < RANGE; j++) { + data[j + -1] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM1") + @Warmup(0) + public static void runShortM1() { + short[] data = new short[RANGE]; + init(data); + testShortM1(data); + verify("testShortM1", data, goldShortM1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testShortP1(short[] data) { + for (int j = 0; j < RANGE - 1; j++) { + data[j + 1] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP1") + @Warmup(0) + public static void runShortP1() { + short[] data = new short[RANGE]; + init(data); + testShortP1(data); + verify("testShortP1", data, goldShortP1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM2(short[] data) { + for (int j = 2; j < RANGE; j++) { + data[j + -2] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM2") + @Warmup(0) + public static void runShortM2() { + short[] data = new short[RANGE]; + init(data); + testShortM2(data); + verify("testShortM2", data, goldShortM2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP2(short[] data) { + for (int j = 0; j < RANGE - 2; j++) { + data[j + 2] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP2") + @Warmup(0) + public static void runShortP2() { + short[] data = new short[RANGE]; + init(data); + testShortP2(data); + verify("testShortP2", data, goldShortP2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM3(short[] data) { + for (int j = 3; j < RANGE; j++) { + data[j + -3] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM3") + @Warmup(0) + public static void runShortM3() { + short[] data = new short[RANGE]; + init(data); + testShortM3(data); + verify("testShortM3", data, goldShortM3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP3(short[] data) { + for (int j = 0; j < RANGE - 3; j++) { + data[j + 3] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP3") + @Warmup(0) + public static void runShortP3() { + short[] data = new short[RANGE]; + init(data); + testShortP3(data); + verify("testShortP3", data, goldShortP3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM4(short[] data) { + for (int j = 4; j < RANGE; j++) { + data[j + -4] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM4") + @Warmup(0) + public static void runShortM4() { + short[] data = new short[RANGE]; + init(data); + testShortM4(data); + verify("testShortM4", data, goldShortM4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP4(short[] data) { + for (int j = 0; j < RANGE - 4; j++) { + data[j + 4] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP4") + @Warmup(0) + public static void runShortP4() { + short[] data = new short[RANGE]; + init(data); + testShortP4(data); + verify("testShortP4", data, goldShortP4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM7(short[] data) { + for (int j = 7; j < RANGE; j++) { + data[j + -7] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM7") + @Warmup(0) + public static void runShortM7() { + short[] data = new short[RANGE]; + init(data); + testShortM7(data); + verify("testShortM7", data, goldShortM7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP7(short[] data) { + for (int j = 0; j < RANGE - 7; j++) { + data[j + 7] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP7") + @Warmup(0) + public static void runShortP7() { + short[] data = new short[RANGE]; + init(data); + testShortP7(data); + verify("testShortP7", data, goldShortP7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM8(short[] data) { + for (int j = 8; j < RANGE; j++) { + data[j + -8] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM8") + @Warmup(0) + public static void runShortM8() { + short[] data = new short[RANGE]; + init(data); + testShortM8(data); + verify("testShortM8", data, goldShortM8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP8(short[] data) { + for (int j = 0; j < RANGE - 8; j++) { + data[j + 8] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP8") + @Warmup(0) + public static void runShortP8() { + short[] data = new short[RANGE]; + init(data); + testShortP8(data); + verify("testShortP8", data, goldShortP8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM14(short[] data) { + for (int j = 14; j < RANGE; j++) { + data[j + -14] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM14") + @Warmup(0) + public static void runShortM14() { + short[] data = new short[RANGE]; + init(data); + testShortM14(data); + verify("testShortM14", data, goldShortM14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP14(short[] data) { + for (int j = 0; j < RANGE - 14; j++) { + data[j + 14] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP14") + @Warmup(0) + public static void runShortP14() { + short[] data = new short[RANGE]; + init(data); + testShortP14(data); + verify("testShortP14", data, goldShortP14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM16(short[] data) { + for (int j = 16; j < RANGE; j++) { + data[j + -16] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM16") + @Warmup(0) + public static void runShortM16() { + short[] data = new short[RANGE]; + init(data); + testShortM16(data); + verify("testShortM16", data, goldShortM16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP16(short[] data) { + for (int j = 0; j < RANGE - 16; j++) { + data[j + 16] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP16") + @Warmup(0) + public static void runShortP16() { + short[] data = new short[RANGE]; + init(data); + testShortP16(data); + verify("testShortP16", data, goldShortP16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM18(short[] data) { + for (int j = 18; j < RANGE; j++) { + data[j + -18] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM18") + @Warmup(0) + public static void runShortM18() { + short[] data = new short[RANGE]; + init(data); + testShortM18(data); + verify("testShortM18", data, goldShortM18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 36 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP18(short[] data) { + for (int j = 0; j < RANGE - 18; j++) { + data[j + 18] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP18") + @Warmup(0) + public static void runShortP18() { + short[] data = new short[RANGE]; + init(data); + testShortP18(data); + verify("testShortP18", data, goldShortP18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM20(short[] data) { + for (int j = 20; j < RANGE; j++) { + data[j + -20] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM20") + @Warmup(0) + public static void runShortM20() { + short[] data = new short[RANGE]; + init(data); + testShortM20(data); + verify("testShortM20", data, goldShortM20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 40 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP20(short[] data) { + for (int j = 0; j < RANGE - 20; j++) { + data[j + 20] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP20") + @Warmup(0) + public static void runShortP20() { + short[] data = new short[RANGE]; + init(data); + testShortP20(data); + verify("testShortP20", data, goldShortP20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM31(short[] data) { + for (int j = 31; j < RANGE; j++) { + data[j + -31] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM31") + @Warmup(0) + public static void runShortM31() { + short[] data = new short[RANGE]; + init(data); + testShortM31(data); + verify("testShortM31", data, goldShortM31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 62 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP31(short[] data) { + for (int j = 0; j < RANGE - 31; j++) { + data[j + 31] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP31") + @Warmup(0) + public static void runShortP31() { + short[] data = new short[RANGE]; + init(data); + testShortP31(data); + verify("testShortP31", data, goldShortP31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM32(short[] data) { + for (int j = 32; j < RANGE; j++) { + data[j + -32] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM32") + @Warmup(0) + public static void runShortM32() { + short[] data = new short[RANGE]; + init(data); + testShortM32(data); + verify("testShortM32", data, goldShortM32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP32(short[] data) { + for (int j = 0; j < RANGE - 32; j++) { + data[j + 32] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP32") + @Warmup(0) + public static void runShortP32() { + short[] data = new short[RANGE]; + init(data); + testShortP32(data); + verify("testShortP32", data, goldShortP32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM63(short[] data) { + for (int j = 63; j < RANGE; j++) { + data[j + -63] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM63") + @Warmup(0) + public static void runShortM63() { + short[] data = new short[RANGE]; + init(data); + testShortM63(data); + verify("testShortM63", data, goldShortM63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP63(short[] data) { + for (int j = 0; j < RANGE - 63; j++) { + data[j + 63] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP63") + @Warmup(0) + public static void runShortP63() { + short[] data = new short[RANGE]; + init(data); + testShortP63(data); + verify("testShortP63", data, goldShortP63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM64(short[] data) { + for (int j = 64; j < RANGE; j++) { + data[j + -64] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM64") + @Warmup(0) + public static void runShortM64() { + short[] data = new short[RANGE]; + init(data); + testShortM64(data); + verify("testShortM64", data, goldShortM64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP64(short[] data) { + for (int j = 0; j < RANGE - 64; j++) { + data[j + 64] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP64") + @Warmup(0) + public static void runShortP64() { + short[] data = new short[RANGE]; + init(data); + testShortP64(data); + verify("testShortP64", data, goldShortP64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM65(short[] data) { + for (int j = 65; j < RANGE; j++) { + data[j + -65] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM65") + @Warmup(0) + public static void runShortM65() { + short[] data = new short[RANGE]; + init(data); + testShortM65(data); + verify("testShortM65", data, goldShortM65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP65(short[] data) { + for (int j = 0; j < RANGE - 65; j++) { + data[j + 65] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP65") + @Warmup(0) + public static void runShortP65() { + short[] data = new short[RANGE]; + init(data); + testShortP65(data); + verify("testShortP65", data, goldShortP65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM128(short[] data) { + for (int j = 128; j < RANGE; j++) { + data[j + -128] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM128") + @Warmup(0) + public static void runShortM128() { + short[] data = new short[RANGE]; + init(data); + testShortM128(data); + verify("testShortM128", data, goldShortM128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP128(short[] data) { + for (int j = 0; j < RANGE - 128; j++) { + data[j + 128] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP128") + @Warmup(0) + public static void runShortP128() { + short[] data = new short[RANGE]; + init(data); + testShortP128(data); + verify("testShortP128", data, goldShortP128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM129(short[] data) { + for (int j = 129; j < RANGE; j++) { + data[j + -129] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM129") + @Warmup(0) + public static void runShortM129() { + short[] data = new short[RANGE]; + init(data); + testShortM129(data); + verify("testShortM129", data, goldShortM129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP129(short[] data) { + for (int j = 0; j < RANGE - 129; j++) { + data[j + 129] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP129") + @Warmup(0) + public static void runShortP129() { + short[] data = new short[RANGE]; + init(data); + testShortP129(data); + verify("testShortP129", data, goldShortP129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortM192(short[] data) { + for (int j = 192; j < RANGE; j++) { + data[j + -192] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortM192") + @Warmup(0) + public static void runShortM192() { + short[] data = new short[RANGE]; + init(data); + testShortM192(data); + verify("testShortM192", data, goldShortM192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testShortP192(short[] data) { + for (int j = 0; j < RANGE - 192; j++) { + data[j + 192] = (short)(data[j] * (short)-11); + } + } + + @Run(test = "testShortP192") + @Warmup(0) + public static void runShortP192() { + short[] data = new short[RANGE]; + init(data); + testShortP192(data); + verify("testShortP192", data, goldShortP192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP0(char[] data) { + for (int j = 0; j < RANGE; j++) { + data[j + 0] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP0") + @Warmup(0) + public static void runCharP0() { + char[] data = new char[RANGE]; + init(data); + testCharP0(data); + verify("testCharP0", data, goldCharP0); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM1(char[] data) { + for (int j = 1; j < RANGE; j++) { + data[j + -1] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM1") + @Warmup(0) + public static void runCharM1() { + char[] data = new char[RANGE]; + init(data); + testCharM1(data); + verify("testCharM1", data, goldCharM1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testCharP1(char[] data) { + for (int j = 0; j < RANGE - 1; j++) { + data[j + 1] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP1") + @Warmup(0) + public static void runCharP1() { + char[] data = new char[RANGE]; + init(data); + testCharP1(data); + verify("testCharP1", data, goldCharP1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM2(char[] data) { + for (int j = 2; j < RANGE; j++) { + data[j + -2] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM2") + @Warmup(0) + public static void runCharM2() { + char[] data = new char[RANGE]; + init(data); + testCharM2(data); + verify("testCharM2", data, goldCharM2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP2(char[] data) { + for (int j = 0; j < RANGE - 2; j++) { + data[j + 2] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP2") + @Warmup(0) + public static void runCharP2() { + char[] data = new char[RANGE]; + init(data); + testCharP2(data); + verify("testCharP2", data, goldCharP2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM3(char[] data) { + for (int j = 3; j < RANGE; j++) { + data[j + -3] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM3") + @Warmup(0) + public static void runCharM3() { + char[] data = new char[RANGE]; + init(data); + testCharM3(data); + verify("testCharM3", data, goldCharM3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP3(char[] data) { + for (int j = 0; j < RANGE - 3; j++) { + data[j + 3] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP3") + @Warmup(0) + public static void runCharP3() { + char[] data = new char[RANGE]; + init(data); + testCharP3(data); + verify("testCharP3", data, goldCharP3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM4(char[] data) { + for (int j = 4; j < RANGE; j++) { + data[j + -4] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM4") + @Warmup(0) + public static void runCharM4() { + char[] data = new char[RANGE]; + init(data); + testCharM4(data); + verify("testCharM4", data, goldCharM4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP4(char[] data) { + for (int j = 0; j < RANGE - 4; j++) { + data[j + 4] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP4") + @Warmup(0) + public static void runCharP4() { + char[] data = new char[RANGE]; + init(data); + testCharP4(data); + verify("testCharP4", data, goldCharP4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM7(char[] data) { + for (int j = 7; j < RANGE; j++) { + data[j + -7] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM7") + @Warmup(0) + public static void runCharM7() { + char[] data = new char[RANGE]; + init(data); + testCharM7(data); + verify("testCharM7", data, goldCharM7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP7(char[] data) { + for (int j = 0; j < RANGE - 7; j++) { + data[j + 7] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP7") + @Warmup(0) + public static void runCharP7() { + char[] data = new char[RANGE]; + init(data); + testCharP7(data); + verify("testCharP7", data, goldCharP7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM8(char[] data) { + for (int j = 8; j < RANGE; j++) { + data[j + -8] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM8") + @Warmup(0) + public static void runCharM8() { + char[] data = new char[RANGE]; + init(data); + testCharM8(data); + verify("testCharM8", data, goldCharM8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP8(char[] data) { + for (int j = 0; j < RANGE - 8; j++) { + data[j + 8] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP8") + @Warmup(0) + public static void runCharP8() { + char[] data = new char[RANGE]; + init(data); + testCharP8(data); + verify("testCharP8", data, goldCharP8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM14(char[] data) { + for (int j = 14; j < RANGE; j++) { + data[j + -14] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM14") + @Warmup(0) + public static void runCharM14() { + char[] data = new char[RANGE]; + init(data); + testCharM14(data); + verify("testCharM14", data, goldCharM14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP14(char[] data) { + for (int j = 0; j < RANGE - 14; j++) { + data[j + 14] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP14") + @Warmup(0) + public static void runCharP14() { + char[] data = new char[RANGE]; + init(data); + testCharP14(data); + verify("testCharP14", data, goldCharP14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM16(char[] data) { + for (int j = 16; j < RANGE; j++) { + data[j + -16] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM16") + @Warmup(0) + public static void runCharM16() { + char[] data = new char[RANGE]; + init(data); + testCharM16(data); + verify("testCharM16", data, goldCharM16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP16(char[] data) { + for (int j = 0; j < RANGE - 16; j++) { + data[j + 16] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP16") + @Warmup(0) + public static void runCharP16() { + char[] data = new char[RANGE]; + init(data); + testCharP16(data); + verify("testCharP16", data, goldCharP16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM18(char[] data) { + for (int j = 18; j < RANGE; j++) { + data[j + -18] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM18") + @Warmup(0) + public static void runCharM18() { + char[] data = new char[RANGE]; + init(data); + testCharM18(data); + verify("testCharM18", data, goldCharM18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 36 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP18(char[] data) { + for (int j = 0; j < RANGE - 18; j++) { + data[j + 18] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP18") + @Warmup(0) + public static void runCharP18() { + char[] data = new char[RANGE]; + init(data); + testCharP18(data); + verify("testCharP18", data, goldCharP18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM20(char[] data) { + for (int j = 20; j < RANGE; j++) { + data[j + -20] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM20") + @Warmup(0) + public static void runCharM20() { + char[] data = new char[RANGE]; + init(data); + testCharM20(data); + verify("testCharM20", data, goldCharM20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 40 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP20(char[] data) { + for (int j = 0; j < RANGE - 20; j++) { + data[j + 20] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP20") + @Warmup(0) + public static void runCharP20() { + char[] data = new char[RANGE]; + init(data); + testCharP20(data); + verify("testCharP20", data, goldCharP20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM31(char[] data) { + for (int j = 31; j < RANGE; j++) { + data[j + -31] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM31") + @Warmup(0) + public static void runCharM31() { + char[] data = new char[RANGE]; + init(data); + testCharM31(data); + verify("testCharM31", data, goldCharM31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + // positive byte_offset 62 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP31(char[] data) { + for (int j = 0; j < RANGE - 31; j++) { + data[j + 31] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP31") + @Warmup(0) + public static void runCharP31() { + char[] data = new char[RANGE]; + init(data); + testCharP31(data); + verify("testCharP31", data, goldCharP31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM32(char[] data) { + for (int j = 32; j < RANGE; j++) { + data[j + -32] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM32") + @Warmup(0) + public static void runCharM32() { + char[] data = new char[RANGE]; + init(data); + testCharM32(data); + verify("testCharM32", data, goldCharM32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP32(char[] data) { + for (int j = 0; j < RANGE - 32; j++) { + data[j + 32] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP32") + @Warmup(0) + public static void runCharP32() { + char[] data = new char[RANGE]; + init(data); + testCharP32(data); + verify("testCharP32", data, goldCharP32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM63(char[] data) { + for (int j = 63; j < RANGE; j++) { + data[j + -63] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM63") + @Warmup(0) + public static void runCharM63() { + char[] data = new char[RANGE]; + init(data); + testCharM63(data); + verify("testCharM63", data, goldCharM63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP63(char[] data) { + for (int j = 0; j < RANGE - 63; j++) { + data[j + 63] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP63") + @Warmup(0) + public static void runCharP63() { + char[] data = new char[RANGE]; + init(data); + testCharP63(data); + verify("testCharP63", data, goldCharP63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM64(char[] data) { + for (int j = 64; j < RANGE; j++) { + data[j + -64] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM64") + @Warmup(0) + public static void runCharM64() { + char[] data = new char[RANGE]; + init(data); + testCharM64(data); + verify("testCharM64", data, goldCharM64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP64(char[] data) { + for (int j = 0; j < RANGE - 64; j++) { + data[j + 64] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP64") + @Warmup(0) + public static void runCharP64() { + char[] data = new char[RANGE]; + init(data); + testCharP64(data); + verify("testCharP64", data, goldCharP64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM65(char[] data) { + for (int j = 65; j < RANGE; j++) { + data[j + -65] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM65") + @Warmup(0) + public static void runCharM65() { + char[] data = new char[RANGE]; + init(data); + testCharM65(data); + verify("testCharM65", data, goldCharM65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP65(char[] data) { + for (int j = 0; j < RANGE - 65; j++) { + data[j + 65] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP65") + @Warmup(0) + public static void runCharP65() { + char[] data = new char[RANGE]; + init(data); + testCharP65(data); + verify("testCharP65", data, goldCharP65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM128(char[] data) { + for (int j = 128; j < RANGE; j++) { + data[j + -128] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM128") + @Warmup(0) + public static void runCharM128() { + char[] data = new char[RANGE]; + init(data); + testCharM128(data); + verify("testCharM128", data, goldCharM128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP128(char[] data) { + for (int j = 0; j < RANGE - 128; j++) { + data[j + 128] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP128") + @Warmup(0) + public static void runCharP128() { + char[] data = new char[RANGE]; + init(data); + testCharP128(data); + verify("testCharP128", data, goldCharP128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM129(char[] data) { + for (int j = 129; j < RANGE; j++) { + data[j + -129] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM129") + @Warmup(0) + public static void runCharM129() { + char[] data = new char[RANGE]; + init(data); + testCharM129(data); + verify("testCharM129", data, goldCharM129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP129(char[] data) { + for (int j = 0; j < RANGE - 129; j++) { + data[j + 129] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP129") + @Warmup(0) + public static void runCharP129() { + char[] data = new char[RANGE]; + init(data); + testCharP129(data); + verify("testCharP129", data, goldCharP129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharM192(char[] data) { + for (int j = 192; j < RANGE; j++) { + data[j + -192] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharM192") + @Warmup(0) + public static void runCharM192() { + char[] data = new char[RANGE]; + init(data); + testCharM192(data); + verify("testCharM192", data, goldCharM192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testCharP192(char[] data) { + for (int j = 0; j < RANGE - 192; j++) { + data[j + 192] = (char)(data[j] * (char)-11); + } + } + + @Run(test = "testCharP192") + @Warmup(0) + public static void runCharP192() { + char[] data = new char[RANGE]; + init(data); + testCharP192(data); + verify("testCharP192", data, goldCharP192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP0(byte[] data) { + for (int j = 0; j < RANGE; j++) { + data[j + 0] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP0") + @Warmup(0) + public static void runByteP0() { + byte[] data = new byte[RANGE]; + init(data); + testByteP0(data); + verify("testByteP0", data, goldByteP0); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM1(byte[] data) { + for (int j = 1; j < RANGE; j++) { + data[j + -1] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM1") + @Warmup(0) + public static void runByteM1() { + byte[] data = new byte[RANGE]; + init(data); + testByteM1(data); + verify("testByteM1", data, goldByteM1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 1 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 1 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 1 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 1 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testByteP1(byte[] data) { + for (int j = 0; j < RANGE - 1; j++) { + data[j + 1] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP1") + @Warmup(0) + public static void runByteP1() { + byte[] data = new byte[RANGE]; + init(data); + testByteP1(data); + verify("testByteP1", data, goldByteP1); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM2(byte[] data) { + for (int j = 2; j < RANGE; j++) { + data[j + -2] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM2") + @Warmup(0) + public static void runByteM2() { + byte[] data = new byte[RANGE]; + init(data); + testByteM2(data); + verify("testByteM2", data, goldByteM2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testByteP2(byte[] data) { + for (int j = 0; j < RANGE - 2; j++) { + data[j + 2] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP2") + @Warmup(0) + public static void runByteP2() { + byte[] data = new byte[RANGE]; + init(data); + testByteP2(data); + verify("testByteP2", data, goldByteP2); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM3(byte[] data) { + for (int j = 3; j < RANGE; j++) { + data[j + -3] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM3") + @Warmup(0) + public static void runByteM3() { + byte[] data = new byte[RANGE]; + init(data); + testByteM3(data); + verify("testByteM3", data, goldByteM3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 3 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 3 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 3 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 3 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testByteP3(byte[] data) { + for (int j = 0; j < RANGE - 3; j++) { + data[j + 3] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP3") + @Warmup(0) + public static void runByteP3() { + byte[] data = new byte[RANGE]; + init(data); + testByteP3(data); + verify("testByteP3", data, goldByteP3); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM4(byte[] data) { + for (int j = 4; j < RANGE; j++) { + data[j + -4] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM4") + @Warmup(0) + public static void runByteM4() { + byte[] data = new byte[RANGE]; + init(data); + testByteM4(data); + verify("testByteM4", data, goldByteM4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP4(byte[] data) { + for (int j = 0; j < RANGE - 4; j++) { + data[j + 4] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP4") + @Warmup(0) + public static void runByteP4() { + byte[] data = new byte[RANGE]; + init(data); + testByteP4(data); + verify("testByteP4", data, goldByteP4); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM7(byte[] data) { + for (int j = 7; j < RANGE; j++) { + data[j + -7] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM7") + @Warmup(0) + public static void runByteM7() { + byte[] data = new byte[RANGE]; + init(data); + testByteM7(data); + verify("testByteM7", data, goldByteM7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 7 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 7 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 7 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 7 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP7(byte[] data) { + for (int j = 0; j < RANGE - 7; j++) { + data[j + 7] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP7") + @Warmup(0) + public static void runByteP7() { + byte[] data = new byte[RANGE]; + init(data); + testByteP7(data); + verify("testByteP7", data, goldByteP7); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM8(byte[] data) { + for (int j = 8; j < RANGE; j++) { + data[j + -8] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM8") + @Warmup(0) + public static void runByteM8() { + byte[] data = new byte[RANGE]; + init(data); + testByteM8(data); + verify("testByteM8", data, goldByteM8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP8(byte[] data) { + for (int j = 0; j < RANGE - 8; j++) { + data[j + 8] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP8") + @Warmup(0) + public static void runByteP8() { + byte[] data = new byte[RANGE]; + init(data); + testByteP8(data); + verify("testByteP8", data, goldByteP8); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM14(byte[] data) { + for (int j = 14; j < RANGE; j++) { + data[j + -14] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM14") + @Warmup(0) + public static void runByteM14() { + byte[] data = new byte[RANGE]; + init(data); + testByteM14(data); + verify("testByteM14", data, goldByteM14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP14(byte[] data) { + for (int j = 0; j < RANGE - 14; j++) { + data[j + 14] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP14") + @Warmup(0) + public static void runByteP14() { + byte[] data = new byte[RANGE]; + init(data); + testByteP14(data); + verify("testByteP14", data, goldByteP14); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM16(byte[] data) { + for (int j = 16; j < RANGE; j++) { + data[j + -16] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM16") + @Warmup(0) + public static void runByteM16() { + byte[] data = new byte[RANGE]; + init(data); + testByteM16(data); + verify("testByteM16", data, goldByteM16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP16(byte[] data) { + for (int j = 0; j < RANGE - 16; j++) { + data[j + 16] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP16") + @Warmup(0) + public static void runByteP16() { + byte[] data = new byte[RANGE]; + init(data); + testByteP16(data); + verify("testByteP16", data, goldByteP16); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM18(byte[] data) { + for (int j = 18; j < RANGE; j++) { + data[j + -18] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM18") + @Warmup(0) + public static void runByteM18() { + byte[] data = new byte[RANGE]; + init(data); + testByteM18(data); + verify("testByteM18", data, goldByteM18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 18 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 18"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 18 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 18"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 18 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 18"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP18(byte[] data) { + for (int j = 0; j < RANGE - 18; j++) { + data[j + 18] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP18") + @Warmup(0) + public static void runByteP18() { + byte[] data = new byte[RANGE]; + init(data); + testByteP18(data); + verify("testByteP18", data, goldByteP18); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM20(byte[] data) { + for (int j = 20; j < RANGE; j++) { + data[j + -20] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM20") + @Warmup(0) + public static void runByteM20() { + byte[] data = new byte[RANGE]; + init(data); + testByteM20(data); + verify("testByteM20", data, goldByteM20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 20 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 20"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 20 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 20"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 20 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 20"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP20(byte[] data) { + for (int j = 0; j < RANGE - 20; j++) { + data[j + 20] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP20") + @Warmup(0) + public static void runByteP20() { + byte[] data = new byte[RANGE]; + init(data); + testByteP20(data); + verify("testByteP20", data, goldByteP20); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM31(byte[] data) { + for (int j = 31; j < RANGE; j++) { + data[j + -31] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM31") + @Warmup(0) + public static void runByteM31() { + byte[] data = new byte[RANGE]; + init(data); + testByteM31(data); + verify("testByteM31", data, goldByteM31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 31 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 31"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 31 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 31"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // positive byte_offset 31 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 31"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP31(byte[] data) { + for (int j = 0; j < RANGE - 31; j++) { + data[j + 31] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP31") + @Warmup(0) + public static void runByteP31() { + byte[] data = new byte[RANGE]; + init(data); + testByteP31(data); + verify("testByteP31", data, goldByteP31); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM32(byte[] data) { + for (int j = 32; j < RANGE; j++) { + data[j + -32] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM32") + @Warmup(0) + public static void runByteM32() { + byte[] data = new byte[RANGE]; + init(data); + testByteM32(data); + verify("testByteM32", data, goldByteM32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP32(byte[] data) { + for (int j = 0; j < RANGE - 32; j++) { + data[j + 32] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP32") + @Warmup(0) + public static void runByteP32() { + byte[] data = new byte[RANGE]; + init(data); + testByteP32(data); + verify("testByteP32", data, goldByteP32); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM63(byte[] data) { + for (int j = 63; j < RANGE; j++) { + data[j + -63] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM63") + @Warmup(0) + public static void runByteM63() { + byte[] data = new byte[RANGE]; + init(data); + testByteM63(data); + verify("testByteM63", data, goldByteM63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + // positive byte_offset 63 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 63"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP63(byte[] data) { + for (int j = 0; j < RANGE - 63; j++) { + data[j + 63] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP63") + @Warmup(0) + public static void runByteP63() { + byte[] data = new byte[RANGE]; + init(data); + testByteP63(data); + verify("testByteP63", data, goldByteP63); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM64(byte[] data) { + for (int j = 64; j < RANGE; j++) { + data[j + -64] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM64") + @Warmup(0) + public static void runByteM64() { + byte[] data = new byte[RANGE]; + init(data); + testByteM64(data); + verify("testByteM64", data, goldByteM64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP64(byte[] data) { + for (int j = 0; j < RANGE - 64; j++) { + data[j + 64] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP64") + @Warmup(0) + public static void runByteP64() { + byte[] data = new byte[RANGE]; + init(data); + testByteP64(data); + verify("testByteP64", data, goldByteP64); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM65(byte[] data) { + for (int j = 65; j < RANGE; j++) { + data[j + -65] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM65") + @Warmup(0) + public static void runByteM65() { + byte[] data = new byte[RANGE]; + init(data); + testByteM65(data); + verify("testByteM65", data, goldByteM65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP65(byte[] data) { + for (int j = 0; j < RANGE - 65; j++) { + data[j + 65] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP65") + @Warmup(0) + public static void runByteP65() { + byte[] data = new byte[RANGE]; + init(data); + testByteP65(data); + verify("testByteP65", data, goldByteP65); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM128(byte[] data) { + for (int j = 128; j < RANGE; j++) { + data[j + -128] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM128") + @Warmup(0) + public static void runByteM128() { + byte[] data = new byte[RANGE]; + init(data); + testByteM128(data); + verify("testByteM128", data, goldByteM128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP128(byte[] data) { + for (int j = 0; j < RANGE - 128; j++) { + data[j + 128] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP128") + @Warmup(0) + public static void runByteP128() { + byte[] data = new byte[RANGE]; + init(data); + testByteP128(data); + verify("testByteP128", data, goldByteP128); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM129(byte[] data) { + for (int j = 129; j < RANGE; j++) { + data[j + -129] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM129") + @Warmup(0) + public static void runByteM129() { + byte[] data = new byte[RANGE]; + init(data); + testByteM129(data); + verify("testByteM129", data, goldByteM129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP129(byte[] data) { + for (int j = 0; j < RANGE - 129; j++) { + data[j + 129] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP129") + @Warmup(0) + public static void runByteP129() { + byte[] data = new byte[RANGE]; + init(data); + testByteP129(data); + verify("testByteP129", data, goldByteP129); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteM192(byte[] data) { + for (int j = 192; j < RANGE; j++) { + data[j + -192] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteM192") + @Warmup(0) + public static void runByteM192() { + byte[] data = new byte[RANGE]; + init(data); + testByteM192(data); + verify("testByteM192", data, goldByteM192); + } + + @Test + // CPU: sse4.1 to avx -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx2", "false"}) + // CPU: avx2 to avx512 without avx512bw -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"avx2", "true", "avx512bw", "false"}) + // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"avx512bw", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testByteP192(byte[] data) { + for (int j = 0; j < RANGE - 192; j++) { + data[j + 192] = (byte)(data[j] * (byte)11); + } + } + + @Run(test = "testByteP192") + @Warmup(0) + public static void runByteP192() { + byte[] data = new byte[RANGE]; + init(data); + testByteP192(data); + verify("testByteP192", data, goldByteP192); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP0(float[] data) { + for (int j = 0; j < RANGE; j++) { + data[j + 0] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP0") + @Warmup(0) + public static void runFloatP0() { + float[] data = new float[RANGE]; + init(data); + testFloatP0(data); + verify("testFloatP0", data, goldFloatP0); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM1(float[] data) { + for (int j = 1; j < RANGE; j++) { + data[j + -1] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM1") + @Warmup(0) + public static void runFloatM1() { + float[] data = new float[RANGE]; + init(data); + testFloatM1(data); + verify("testFloatM1", data, goldFloatM1); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testFloatP1(float[] data) { + for (int j = 0; j < RANGE - 1; j++) { + data[j + 1] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP1") + @Warmup(0) + public static void runFloatP1() { + float[] data = new float[RANGE]; + init(data); + testFloatP1(data); + verify("testFloatP1", data, goldFloatP1); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM2(float[] data) { + for (int j = 2; j < RANGE; j++) { + data[j + -2] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM2") + @Warmup(0) + public static void runFloatM2() { + float[] data = new float[RANGE]; + init(data); + testFloatM2(data); + verify("testFloatM2", data, goldFloatM2); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP2(float[] data) { + for (int j = 0; j < RANGE - 2; j++) { + data[j + 2] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP2") + @Warmup(0) + public static void runFloatP2() { + float[] data = new float[RANGE]; + init(data); + testFloatP2(data); + verify("testFloatP2", data, goldFloatP2); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM3(float[] data) { + for (int j = 3; j < RANGE; j++) { + data[j + -3] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM3") + @Warmup(0) + public static void runFloatM3() { + float[] data = new float[RANGE]; + init(data); + testFloatM3(data); + verify("testFloatM3", data, goldFloatM3); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP3(float[] data) { + for (int j = 0; j < RANGE - 3; j++) { + data[j + 3] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP3") + @Warmup(0) + public static void runFloatP3() { + float[] data = new float[RANGE]; + init(data); + testFloatP3(data); + verify("testFloatP3", data, goldFloatP3); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM4(float[] data) { + for (int j = 4; j < RANGE; j++) { + data[j + -4] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM4") + @Warmup(0) + public static void runFloatM4() { + float[] data = new float[RANGE]; + init(data); + testFloatM4(data); + verify("testFloatM4", data, goldFloatM4); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP4(float[] data) { + for (int j = 0; j < RANGE - 4; j++) { + data[j + 4] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP4") + @Warmup(0) + public static void runFloatP4() { + float[] data = new float[RANGE]; + init(data); + testFloatP4(data); + verify("testFloatP4", data, goldFloatP4); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM7(float[] data) { + for (int j = 7; j < RANGE; j++) { + data[j + -7] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM7") + @Warmup(0) + public static void runFloatM7() { + float[] data = new float[RANGE]; + init(data); + testFloatM7(data); + verify("testFloatM7", data, goldFloatM7); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // positive byte_offset 28 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP7(float[] data) { + for (int j = 0; j < RANGE - 7; j++) { + data[j + 7] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP7") + @Warmup(0) + public static void runFloatP7() { + float[] data = new float[RANGE]; + init(data); + testFloatP7(data); + verify("testFloatP7", data, goldFloatP7); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM8(float[] data) { + for (int j = 8; j < RANGE; j++) { + data[j + -8] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM8") + @Warmup(0) + public static void runFloatM8() { + float[] data = new float[RANGE]; + init(data); + testFloatM8(data); + verify("testFloatM8", data, goldFloatM8); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP8(float[] data) { + for (int j = 0; j < RANGE - 8; j++) { + data[j + 8] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP8") + @Warmup(0) + public static void runFloatP8() { + float[] data = new float[RANGE]; + init(data); + testFloatP8(data); + verify("testFloatP8", data, goldFloatP8); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM14(float[] data) { + for (int j = 14; j < RANGE; j++) { + data[j + -14] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM14") + @Warmup(0) + public static void runFloatM14() { + float[] data = new float[RANGE]; + init(data); + testFloatM14(data); + verify("testFloatM14", data, goldFloatM14); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP14(float[] data) { + for (int j = 0; j < RANGE - 14; j++) { + data[j + 14] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP14") + @Warmup(0) + public static void runFloatP14() { + float[] data = new float[RANGE]; + init(data); + testFloatP14(data); + verify("testFloatP14", data, goldFloatP14); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM16(float[] data) { + for (int j = 16; j < RANGE; j++) { + data[j + -16] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM16") + @Warmup(0) + public static void runFloatM16() { + float[] data = new float[RANGE]; + init(data); + testFloatM16(data); + verify("testFloatM16", data, goldFloatM16); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP16(float[] data) { + for (int j = 0; j < RANGE - 16; j++) { + data[j + 16] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP16") + @Warmup(0) + public static void runFloatP16() { + float[] data = new float[RANGE]; + init(data); + testFloatP16(data); + verify("testFloatP16", data, goldFloatP16); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM18(float[] data) { + for (int j = 18; j < RANGE; j++) { + data[j + -18] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM18") + @Warmup(0) + public static void runFloatM18() { + float[] data = new float[RANGE]; + init(data); + testFloatM18(data); + verify("testFloatM18", data, goldFloatM18); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP18(float[] data) { + for (int j = 0; j < RANGE - 18; j++) { + data[j + 18] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP18") + @Warmup(0) + public static void runFloatP18() { + float[] data = new float[RANGE]; + init(data); + testFloatP18(data); + verify("testFloatP18", data, goldFloatP18); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM20(float[] data) { + for (int j = 20; j < RANGE; j++) { + data[j + -20] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM20") + @Warmup(0) + public static void runFloatM20() { + float[] data = new float[RANGE]; + init(data); + testFloatM20(data); + verify("testFloatM20", data, goldFloatM20); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP20(float[] data) { + for (int j = 0; j < RANGE - 20; j++) { + data[j + 20] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP20") + @Warmup(0) + public static void runFloatP20() { + float[] data = new float[RANGE]; + init(data); + testFloatP20(data); + verify("testFloatP20", data, goldFloatP20); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM31(float[] data) { + for (int j = 31; j < RANGE; j++) { + data[j + -31] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM31") + @Warmup(0) + public static void runFloatM31() { + float[] data = new float[RANGE]; + init(data); + testFloatM31(data); + verify("testFloatM31", data, goldFloatM31); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP31(float[] data) { + for (int j = 0; j < RANGE - 31; j++) { + data[j + 31] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP31") + @Warmup(0) + public static void runFloatP31() { + float[] data = new float[RANGE]; + init(data); + testFloatP31(data); + verify("testFloatP31", data, goldFloatP31); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM32(float[] data) { + for (int j = 32; j < RANGE; j++) { + data[j + -32] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM32") + @Warmup(0) + public static void runFloatM32() { + float[] data = new float[RANGE]; + init(data); + testFloatM32(data); + verify("testFloatM32", data, goldFloatM32); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP32(float[] data) { + for (int j = 0; j < RANGE - 32; j++) { + data[j + 32] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP32") + @Warmup(0) + public static void runFloatP32() { + float[] data = new float[RANGE]; + init(data); + testFloatP32(data); + verify("testFloatP32", data, goldFloatP32); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM63(float[] data) { + for (int j = 63; j < RANGE; j++) { + data[j + -63] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM63") + @Warmup(0) + public static void runFloatM63() { + float[] data = new float[RANGE]; + init(data); + testFloatM63(data); + verify("testFloatM63", data, goldFloatM63); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP63(float[] data) { + for (int j = 0; j < RANGE - 63; j++) { + data[j + 63] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP63") + @Warmup(0) + public static void runFloatP63() { + float[] data = new float[RANGE]; + init(data); + testFloatP63(data); + verify("testFloatP63", data, goldFloatP63); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM64(float[] data) { + for (int j = 64; j < RANGE; j++) { + data[j + -64] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM64") + @Warmup(0) + public static void runFloatM64() { + float[] data = new float[RANGE]; + init(data); + testFloatM64(data); + verify("testFloatM64", data, goldFloatM64); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP64(float[] data) { + for (int j = 0; j < RANGE - 64; j++) { + data[j + 64] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP64") + @Warmup(0) + public static void runFloatP64() { + float[] data = new float[RANGE]; + init(data); + testFloatP64(data); + verify("testFloatP64", data, goldFloatP64); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM65(float[] data) { + for (int j = 65; j < RANGE; j++) { + data[j + -65] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM65") + @Warmup(0) + public static void runFloatM65() { + float[] data = new float[RANGE]; + init(data); + testFloatM65(data); + verify("testFloatM65", data, goldFloatM65); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP65(float[] data) { + for (int j = 0; j < RANGE - 65; j++) { + data[j + 65] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP65") + @Warmup(0) + public static void runFloatP65() { + float[] data = new float[RANGE]; + init(data); + testFloatP65(data); + verify("testFloatP65", data, goldFloatP65); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM128(float[] data) { + for (int j = 128; j < RANGE; j++) { + data[j + -128] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM128") + @Warmup(0) + public static void runFloatM128() { + float[] data = new float[RANGE]; + init(data); + testFloatM128(data); + verify("testFloatM128", data, goldFloatM128); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP128(float[] data) { + for (int j = 0; j < RANGE - 128; j++) { + data[j + 128] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP128") + @Warmup(0) + public static void runFloatP128() { + float[] data = new float[RANGE]; + init(data); + testFloatP128(data); + verify("testFloatP128", data, goldFloatP128); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM129(float[] data) { + for (int j = 129; j < RANGE; j++) { + data[j + -129] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM129") + @Warmup(0) + public static void runFloatM129() { + float[] data = new float[RANGE]; + init(data); + testFloatM129(data); + verify("testFloatM129", data, goldFloatM129); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP129(float[] data) { + for (int j = 0; j < RANGE - 129; j++) { + data[j + 129] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP129") + @Warmup(0) + public static void runFloatP129() { + float[] data = new float[RANGE]; + init(data); + testFloatP129(data); + verify("testFloatP129", data, goldFloatP129); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatM192(float[] data) { + for (int j = 192; j < RANGE; j++) { + data[j + -192] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatM192") + @Warmup(0) + public static void runFloatM192() { + float[] data = new float[RANGE]; + init(data); + testFloatM192(data); + verify("testFloatM192", data, goldFloatM192); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testFloatP192(float[] data) { + for (int j = 0; j < RANGE - 192; j++) { + data[j + 192] = (float)(data[j] * (float)1.001f); + } + } + + @Run(test = "testFloatP192") + @Warmup(0) + public static void runFloatP192() { + float[] data = new float[RANGE]; + init(data); + testFloatP192(data); + verify("testFloatP192", data, goldFloatP192); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP0(double[] data) { + for (int j = 0; j < RANGE; j++) { + data[j + 0] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP0") + @Warmup(0) + public static void runDoubleP0() { + double[] data = new double[RANGE]; + init(data); + testDoubleP0(data); + verify("testDoubleP0", data, goldDoubleP0); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM1(double[] data) { + for (int j = 1; j < RANGE; j++) { + data[j + -1] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM1") + @Warmup(0) + public static void runDoubleM1() { + double[] data = new double[RANGE]; + init(data); + testDoubleM1(data); + verify("testDoubleM1", data, goldDoubleM1); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + public static void testDoubleP1(double[] data) { + for (int j = 0; j < RANGE - 1; j++) { + data[j + 1] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP1") + @Warmup(0) + public static void runDoubleP1() { + double[] data = new double[RANGE]; + init(data); + testDoubleP1(data); + verify("testDoubleP1", data, goldDoubleP1); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM2(double[] data) { + for (int j = 2; j < RANGE; j++) { + data[j + -2] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM2") + @Warmup(0) + public static void runDoubleM2() { + double[] data = new double[RANGE]; + init(data); + testDoubleM2(data); + verify("testDoubleM2", data, goldDoubleM2); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 16 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP2(double[] data) { + for (int j = 0; j < RANGE - 2; j++) { + data[j + 2] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP2") + @Warmup(0) + public static void runDoubleP2() { + double[] data = new double[RANGE]; + init(data); + testDoubleP2(data); + verify("testDoubleP2", data, goldDoubleP2); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM3(double[] data) { + for (int j = 3; j < RANGE; j++) { + data[j + -3] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM3") + @Warmup(0) + public static void runDoubleM3() { + double[] data = new double[RANGE]; + init(data); + testDoubleM3(data); + verify("testDoubleM3", data, goldDoubleM3); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 24 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 24 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // positive byte_offset 24 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP3(double[] data) { + for (int j = 0; j < RANGE - 3; j++) { + data[j + 3] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP3") + @Warmup(0) + public static void runDoubleP3() { + double[] data = new double[RANGE]; + init(data); + testDoubleP3(data); + verify("testDoubleP3", data, goldDoubleP3); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM4(double[] data) { + for (int j = 4; j < RANGE; j++) { + data[j + -4] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM4") + @Warmup(0) + public static void runDoubleM4() { + double[] data = new double[RANGE]; + init(data); + testDoubleM4(data); + verify("testDoubleM4", data, goldDoubleM4); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP4(double[] data) { + for (int j = 0; j < RANGE - 4; j++) { + data[j + 4] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP4") + @Warmup(0) + public static void runDoubleP4() { + double[] data = new double[RANGE]; + init(data); + testDoubleP4(data); + verify("testDoubleP4", data, goldDoubleP4); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM7(double[] data) { + for (int j = 7; j < RANGE; j++) { + data[j + -7] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM7") + @Warmup(0) + public static void runDoubleM7() { + double[] data = new double[RANGE]; + init(data); + testDoubleM7(data); + verify("testDoubleM7", data, goldDoubleM7); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP7(double[] data) { + for (int j = 0; j < RANGE - 7; j++) { + data[j + 7] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP7") + @Warmup(0) + public static void runDoubleP7() { + double[] data = new double[RANGE]; + init(data); + testDoubleP7(data); + verify("testDoubleP7", data, goldDoubleP7); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM8(double[] data) { + for (int j = 8; j < RANGE; j++) { + data[j + -8] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM8") + @Warmup(0) + public static void runDoubleM8() { + double[] data = new double[RANGE]; + init(data); + testDoubleM8(data); + verify("testDoubleM8", data, goldDoubleM8); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP8(double[] data) { + for (int j = 0; j < RANGE - 8; j++) { + data[j + 8] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP8") + @Warmup(0) + public static void runDoubleP8() { + double[] data = new double[RANGE]; + init(data); + testDoubleP8(data); + verify("testDoubleP8", data, goldDoubleP8); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM14(double[] data) { + for (int j = 14; j < RANGE; j++) { + data[j + -14] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM14") + @Warmup(0) + public static void runDoubleM14() { + double[] data = new double[RANGE]; + init(data); + testDoubleM14(data); + verify("testDoubleM14", data, goldDoubleM14); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP14(double[] data) { + for (int j = 0; j < RANGE - 14; j++) { + data[j + 14] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP14") + @Warmup(0) + public static void runDoubleP14() { + double[] data = new double[RANGE]; + init(data); + testDoubleP14(data); + verify("testDoubleP14", data, goldDoubleP14); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM16(double[] data) { + for (int j = 16; j < RANGE; j++) { + data[j + -16] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM16") + @Warmup(0) + public static void runDoubleM16() { + double[] data = new double[RANGE]; + init(data); + testDoubleM16(data); + verify("testDoubleM16", data, goldDoubleM16); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP16(double[] data) { + for (int j = 0; j < RANGE - 16; j++) { + data[j + 16] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP16") + @Warmup(0) + public static void runDoubleP16() { + double[] data = new double[RANGE]; + init(data); + testDoubleP16(data); + verify("testDoubleP16", data, goldDoubleP16); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM18(double[] data) { + for (int j = 18; j < RANGE; j++) { + data[j + -18] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM18") + @Warmup(0) + public static void runDoubleM18() { + double[] data = new double[RANGE]; + init(data); + testDoubleM18(data); + verify("testDoubleM18", data, goldDoubleM18); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP18(double[] data) { + for (int j = 0; j < RANGE - 18; j++) { + data[j + 18] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP18") + @Warmup(0) + public static void runDoubleP18() { + double[] data = new double[RANGE]; + init(data); + testDoubleP18(data); + verify("testDoubleP18", data, goldDoubleP18); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM20(double[] data) { + for (int j = 20; j < RANGE; j++) { + data[j + -20] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM20") + @Warmup(0) + public static void runDoubleM20() { + double[] data = new double[RANGE]; + init(data); + testDoubleM20(data); + verify("testDoubleM20", data, goldDoubleM20); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP20(double[] data) { + for (int j = 0; j < RANGE - 20; j++) { + data[j + 20] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP20") + @Warmup(0) + public static void runDoubleP20() { + double[] data = new double[RANGE]; + init(data); + testDoubleP20(data); + verify("testDoubleP20", data, goldDoubleP20); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM31(double[] data) { + for (int j = 31; j < RANGE; j++) { + data[j + -31] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM31") + @Warmup(0) + public static void runDoubleM31() { + double[] data = new double[RANGE]; + init(data); + testDoubleM31(data); + verify("testDoubleM31", data, goldDoubleM31); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP31(double[] data) { + for (int j = 0; j < RANGE - 31; j++) { + data[j + 31] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP31") + @Warmup(0) + public static void runDoubleP31() { + double[] data = new double[RANGE]; + init(data); + testDoubleP31(data); + verify("testDoubleP31", data, goldDoubleP31); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM32(double[] data) { + for (int j = 32; j < RANGE; j++) { + data[j + -32] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM32") + @Warmup(0) + public static void runDoubleM32() { + double[] data = new double[RANGE]; + init(data); + testDoubleM32(data); + verify("testDoubleM32", data, goldDoubleM32); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP32(double[] data) { + for (int j = 0; j < RANGE - 32; j++) { + data[j + 32] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP32") + @Warmup(0) + public static void runDoubleP32() { + double[] data = new double[RANGE]; + init(data); + testDoubleP32(data); + verify("testDoubleP32", data, goldDoubleP32); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM63(double[] data) { + for (int j = 63; j < RANGE; j++) { + data[j + -63] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM63") + @Warmup(0) + public static void runDoubleM63() { + double[] data = new double[RANGE]; + init(data); + testDoubleM63(data); + verify("testDoubleM63", data, goldDoubleM63); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP63(double[] data) { + for (int j = 0; j < RANGE - 63; j++) { + data[j + 63] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP63") + @Warmup(0) + public static void runDoubleP63() { + double[] data = new double[RANGE]; + init(data); + testDoubleP63(data); + verify("testDoubleP63", data, goldDoubleP63); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM64(double[] data) { + for (int j = 64; j < RANGE; j++) { + data[j + -64] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM64") + @Warmup(0) + public static void runDoubleM64() { + double[] data = new double[RANGE]; + init(data); + testDoubleM64(data); + verify("testDoubleM64", data, goldDoubleM64); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP64(double[] data) { + for (int j = 0; j < RANGE - 64; j++) { + data[j + 64] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP64") + @Warmup(0) + public static void runDoubleP64() { + double[] data = new double[RANGE]; + init(data); + testDoubleP64(data); + verify("testDoubleP64", data, goldDoubleP64); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM65(double[] data) { + for (int j = 65; j < RANGE; j++) { + data[j + -65] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM65") + @Warmup(0) + public static void runDoubleM65() { + double[] data = new double[RANGE]; + init(data); + testDoubleM65(data); + verify("testDoubleM65", data, goldDoubleM65); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP65(double[] data) { + for (int j = 0; j < RANGE - 65; j++) { + data[j + 65] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP65") + @Warmup(0) + public static void runDoubleP65() { + double[] data = new double[RANGE]; + init(data); + testDoubleP65(data); + verify("testDoubleP65", data, goldDoubleP65); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM128(double[] data) { + for (int j = 128; j < RANGE; j++) { + data[j + -128] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM128") + @Warmup(0) + public static void runDoubleM128() { + double[] data = new double[RANGE]; + init(data); + testDoubleM128(data); + verify("testDoubleM128", data, goldDoubleM128); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP128(double[] data) { + for (int j = 0; j < RANGE - 128; j++) { + data[j + 128] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP128") + @Warmup(0) + public static void runDoubleP128() { + double[] data = new double[RANGE]; + init(data); + testDoubleP128(data); + verify("testDoubleP128", data, goldDoubleP128); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR, IRNode.MUL_V, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM129(double[] data) { + for (int j = 129; j < RANGE; j++) { + data[j + -129] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM129") + @Warmup(0) + public static void runDoubleM129() { + double[] data = new double[RANGE]; + init(data); + testDoubleM129(data); + verify("testDoubleM129", data, goldDoubleM129); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP129(double[] data) { + for (int j = 0; j < RANGE - 129; j++) { + data[j + 129] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP129") + @Warmup(0) + public static void runDoubleP129() { + double[] data = new double[RANGE]; + init(data); + testDoubleP129(data); + verify("testDoubleP129", data, goldDoubleP129); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleM192(double[] data) { + for (int j = 192; j < RANGE; j++) { + data[j + -192] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleM192") + @Warmup(0) + public static void runDoubleM192() { + double[] data = new double[RANGE]; + init(data); + testDoubleM192(data); + verify("testDoubleM192", data, goldDoubleM192); + } + + @Test + // CPU: sse4.1 -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"sse4.1", "true", "avx", "false"}) + // CPU: avx and avx2 -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"avx", "true", "avx512", "false"}) + // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"avx512", "true"}) + // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.MUL_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"asimd", "true"}) + public static void testDoubleP192(double[] data) { + for (int j = 0; j < RANGE - 192; j++) { + data[j + 192] = (double)(data[j] * (double)1.001); + } + } + + @Run(test = "testDoubleP192") + @Warmup(0) + public static void runDoubleP192() { + double[] data = new double[RANGE]; + init(data); + testDoubleP192(data); + verify("testDoubleP192", data, goldDoubleP192); + } + + // ------------------- Initialization ------------------- + + static void init(int[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (int)j; + } + } + + static void init(long[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (long)j; + } + } + + static void init(short[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (short)j; + } + } + + static void init(char[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (char)j; + } + } + + static void init(byte[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (byte)j; + } + } + + static void init(float[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (float)j; + } + } + + static void init(double[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (double)j; + } + } + + // ------------------- Verification ------------------- + + static void verify(String context, int[] data, int[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + context + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + static void verify(String context, long[] data, long[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + context + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + static void verify(String context, short[] data, short[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + context + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + static void verify(String context, char[] data, char[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + context + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + static void verify(String context, byte[] data, byte[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + context + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + static void verify(String context, float[] data, float[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + context + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + static void verify(String context, double[] data, double[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + context + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } +} diff --git a/test/hotspot/jtreg/compiler/vectorization/TestForEachRem.java b/test/hotspot/jtreg/compiler/vectorization/TestForEachRem.java index 12c5e5ba9ad..368b0ae654a 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestForEachRem.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestForEachRem.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,10 +27,12 @@ * @summary Test vectorization of Streams$RangeIntSpliterator::forEachRemaining * @requires vm.compiler2.enabled & vm.compMode != "Xint" * - * @run main compiler.vectorization.TestForEachRem test1 - * @run main compiler.vectorization.TestForEachRem test2 - * @run main compiler.vectorization.TestForEachRem test3 - * @run main compiler.vectorization.TestForEachRem test4 + * @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test1 + * @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test2 + * @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test3 + * @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test4 + * @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test5 + * @run main/othervm -Xbatch compiler.vectorization.TestForEachRem test6 */ package compiler.vectorization; @@ -65,6 +67,25 @@ public class TestForEachRem { }); } + static void test5(int[] data) { + IntStream.range(0, RANGE - 2).forEach(j -> { + data[j + 2] = data[j]; + }); + } + + static void initByte(byte[] data) { + IntStream.range(0, RANGE).parallel().forEach(j -> { + data[j] = (byte)j; + }); + } + + static void test6(byte[] data) { + // 2-byte offset -> can only vectorize if alignment not required by hardware + IntStream.range(0, RANGE - 2).forEach(j -> { + data[j] = data[j + 2]; + }); + } + static void verify(String name, int[] data, int[] gold) { for (int i = 0; i < RANGE; i++) { if (data[i] != gold[i]) { @@ -73,12 +94,22 @@ public class TestForEachRem { } } + static void verify(String name, byte[] data, byte[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + public static void main(String[] args) { int[] data = new int[RANGE]; int[] gold = new int[RANGE]; + byte[] dataB = new byte[RANGE]; + byte[] goldB = new byte[RANGE]; if (args.length == 0) { - throw new RuntimeException(" Missing test name: test1, test2, test3, test4"); + throw new RuntimeException(" Missing test name: test1, test2, test3, test4, test5"); } if (args[0].equals("test1")) { @@ -126,5 +157,30 @@ public class TestForEachRem { verify("test4", data, gold); System.out.println(" Finished test4."); } + + if (args[0].equals("test5")) { + System.out.println(" Run test5 ..."); + test1(gold); // reset + test5(gold); + for (int i = 0; i < ITER; i++) { + test1(data); // reset + test5(data); + } + verify("test5", data, gold); + System.out.println(" Finished test5."); + } + + if (args[0].equals("test6")) { + System.out.println(" Run test6 ..."); + initByte(goldB); // reset + test6(goldB); + for (int i = 0; i < ITER; i++) { + initByte(dataB); // reset + test6(dataB); + } + verify("test6", dataB, goldB); + System.out.println(" Finished test6."); + } + } } diff --git a/test/hotspot/jtreg/compiler/vectorization/TestOptionVectorizeIR.java b/test/hotspot/jtreg/compiler/vectorization/TestOptionVectorizeIR.java new file mode 100644 index 00000000000..8c5217c1d51 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorization/TestOptionVectorizeIR.java @@ -0,0 +1,803 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8298935 + * @summary Test forced vectorization, and check IR for vector instructions + * @requires vm.compiler2.enabled + * @library /test/lib / + * @run driver compiler.vectorization.TestOptionVectorizeIR + */ + +package compiler.vectorization; +import compiler.lib.ir_framework.*; + +public class TestOptionVectorizeIR { + static final int RANGE = 512; + static final int ITER = 100; + int[] gold1 = new int[RANGE]; + int[] gold2 = new int[RANGE]; + int[] gold3 = new int[RANGE]; + int[] gold4 = new int[RANGE]; + int[] gold5 = new int[RANGE]; + int[] gold6 = new int[RANGE]; + + long[] gold10 = new long[RANGE]; + long[] gold11 = new long[RANGE]; + long[] gold12 = new long[RANGE]; + long[] gold13 = new long[RANGE]; + + short[] gold20 = new short[RANGE]; + short[] gold21 = new short[RANGE]; + short[] gold22 = new short[RANGE]; + short[] gold23 = new short[RANGE]; + + byte[] gold30 = new byte[RANGE]; + byte[] gold31 = new byte[RANGE]; + byte[] gold32 = new byte[RANGE]; + byte[] gold33 = new byte[RANGE]; + + char[] gold40 = new char[RANGE]; + char[] gold41 = new char[RANGE]; + char[] gold42 = new char[RANGE]; + char[] gold43 = new char[RANGE]; + + float[] gold50 = new float[RANGE]; + float[] gold51 = new float[RANGE]; + float[] gold52 = new float[RANGE]; + float[] gold53 = new float[RANGE]; + + double[] gold60 = new double[RANGE]; + double[] gold61 = new double[RANGE]; + double[] gold62 = new double[RANGE]; + double[] gold63 = new double[RANGE]; + + public static void main(String args[]) { + TestFramework.runWithFlags("-XX:CompileCommand=option,compiler.vectorization.TestOptionVectorizeIR::test*,Vectorize"); + } + + TestOptionVectorizeIR() { + // compute the gold standard in interpreter mode + // test1 + test1(gold1); + // test2 + test1(gold2); + test2(gold2); + // test3 + test1(gold3); + test3(gold3, 2, 3); + // test4 + test1(gold4); + test4(gold4); + // test5 + test1(gold5); + test5(gold5); + // test6 + test1(gold6); + test6(gold6); + + // long + init(gold10); + test10(gold10); + init(gold11); + test11(gold11); + init(gold12); + test12(gold12); + init(gold13); + test13(gold13); + + // short + init(gold20); + test20(gold20); + init(gold21); + test21(gold21); + init(gold22); + test22(gold22); + init(gold23); + test23(gold23); + + // byte + init(gold30); + test30(gold30); + init(gold31); + test31(gold31); + init(gold32); + test32(gold32); + init(gold33); + test33(gold33); + + // char + init(gold40); + test40(gold40); + init(gold41); + test41(gold41); + init(gold42); + test42(gold42); + init(gold43); + test43(gold43); + + // float + init(gold50); + test50(gold50); + init(gold51); + test51(gold51); + init(gold52); + test52(gold52); + init(gold53); + test53(gold53); + + // double + init(gold60); + test60(gold60); + init(gold61); + test61(gold61); + init(gold62); + test62(gold62); + init(gold63); + test63(gold63); + } + + @Run(test = "test1") + @Warmup(100) + public void runTest1() { + int[] data = new int[RANGE]; + test1(data); + verify("test1", data, gold1); + } + + @Run(test = "test2") + @Warmup(100) + public void runTest2() { + int[] data = new int[RANGE]; + test1(data); + test2(data); + verify("test2", data, gold2); + } + + @Run(test = "test3") + @Warmup(100) + public void runTest3() { + int[] data = new int[RANGE]; + test1(data); + test3(data, 2, 3); + verify("test3", data, gold3); + } + + @Run(test = "test4") + @Warmup(100) + public void runTest4() { + int[] data = new int[RANGE]; + test1(data); + test4(data); + verify("test4", data, gold4); + } + + @Run(test = "test5") + @Warmup(100) + public void runTest5() { + int[] data = new int[RANGE]; + test1(data); + test5(data); + verify("test5", data, gold5); + } + + @Run(test = "test6") + @Warmup(100) + public void runTest6() { + int[] data = new int[RANGE]; + test1(data); + test6(data); + verify("test6", data, gold6); + } + + @Test + static void test1(int[] data) { + for (int j = 0; j < RANGE; j++) { + // Vectorizes even if it is not forced + data[j] = j; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test2(int[] data) { + for (int j = 0; j < RANGE - 1; j++) { + // Only vectorizes if forced, because of offset by 1 + data[j] = data[j] + data[j + 1]; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.REPLICATE_I, "> 0", IRNode.ADD_VI, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test3(int[] data, int A, int B) { + for (int j = 0; j < RANGE - 1; j++) { + // Only vectorizes if forced, because of offset by 1 + data[j] = A * data[j] + B * data[j + 1]; + } + } + + @Test + static void test4(int[] data) { + for (int j = 0; j < RANGE - 1; j++) { + // write forward -> cyclic dependency -> cannot vectorize + // independent(s1, s2) for adjacent loads should detect this + data[j + 1] = data[j]; + } + } + + @Test + static void test5(int[] data) { + for (int j = 0; j < RANGE - 3; j++) { + // write forward -> cyclic dependency -> cannot vectorize + // independent(s1, s2) for adjacent loads cannot detect this + // Checks with memory_alignment are disabled via compile option + data[j + 2] = data[j]; + } + } + + @Test + static void test6(int[] data) { + for (int j = 0; j < RANGE - 3; j++) { + // write forward -> cyclic dependency -> cannot vectorize + // independent(s1, s2) for adjacent loads cannot detect this + // Checks with memory_alignment are disabled via compile option + data[j + 3] = data[j]; + } + } + + // ------------------------- Long ----------------------------- + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test10(long[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 2]; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test11(long[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 1]; + } + } + + @Test + static void test12(long[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 1]; + } + } + + @Test + static void test13(long[] data) { + // 128-bit vectors -> can vectorize because only 2 elements + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 2]; + } + } + + @Run(test = "test10") + @Warmup(100) + public void runTest10() { + long[] data = new long[RANGE]; + init(data); + test10(data); + verify("test10", data, gold10); + } + + @Run(test = "test11") + @Warmup(100) + public void runTest11() { + long[] data = new long[RANGE]; + init(data); + test11(data); + verify("test11", data, gold11); + } + + @Run(test = "test12") + @Warmup(100) + public void runTest12() { + long[] data = new long[RANGE]; + init(data); + test12(data); + verify("test12", data, gold12); + } + + @Run(test = "test13") + @Warmup(100) + public void runTest13() { + long[] data = new long[RANGE]; + init(data); + test13(data); + verify("test13", data, gold13); + } + + + // ------------------------- Short ----------------------------- + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test20(short[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 2]; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test21(short[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 1]; + } + } + + @Test + static void test22(short[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 1]; + } + } + + @Test + static void test23(short[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 2]; + } + } + + @Run(test = "test20") + @Warmup(100) + public void runTest20() { + short[] data = new short[RANGE]; + init(data); + test20(data); + verify("test20", data, gold20); + } + + @Run(test = "test21") + @Warmup(100) + public void runTest21() { + short[] data = new short[RANGE]; + init(data); + test21(data); + verify("test21", data, gold21); + } + + @Run(test = "test22") + @Warmup(100) + public void runTest22() { + short[] data = new short[RANGE]; + init(data); + test22(data); + verify("test22", data, gold22); + } + + @Run(test = "test23") + @Warmup(100) + public void runTest23() { + short[] data = new short[RANGE]; + init(data); + test23(data); + verify("test23", data, gold23); + } + + + // ------------------------- Byte ----------------------------- + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test30(byte[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 2]; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test31(byte[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 1]; + } + } + + @Test + static void test32(byte[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 1]; + } + } + + @Test + static void test33(byte[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 2]; + } + } + + @Run(test = "test30") + @Warmup(100) + public void runTest30() { + byte[] data = new byte[RANGE]; + init(data); + test30(data); + verify("test30", data, gold30); + } + + @Run(test = "test31") + @Warmup(100) + public void runTest31() { + byte[] data = new byte[RANGE]; + init(data); + test31(data); + verify("test31", data, gold31); + } + + @Run(test = "test32") + @Warmup(100) + public void runTest32() { + byte[] data = new byte[RANGE]; + init(data); + test32(data); + verify("test32", data, gold32); + } + + @Run(test = "test33") + @Warmup(100) + public void runTest33() { + byte[] data = new byte[RANGE]; + init(data); + test33(data); + verify("test33", data, gold33); + } + + + // ------------------------- Char ----------------------------- + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test40(char[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 2]; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test41(char[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 1]; + } + } + + @Test + static void test42(char[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 1]; + } + } + + @Test + static void test43(char[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 2]; + } + } + + @Run(test = "test40") + @Warmup(100) + public void runTest40() { + char[] data = new char[RANGE]; + init(data); + test40(data); + verify("test40", data, gold40); + } + + @Run(test = "test41") + @Warmup(100) + public void runTest41() { + char[] data = new char[RANGE]; + init(data); + test41(data); + verify("test41", data, gold41); + } + + @Run(test = "test42") + @Warmup(100) + public void runTest42() { + char[] data = new char[RANGE]; + init(data); + test42(data); + verify("test42", data, gold42); + } + + @Run(test = "test43") + @Warmup(100) + public void runTest43() { + char[] data = new char[RANGE]; + init(data); + test43(data); + verify("test43", data, gold43); + } + + // ------------------------- Float ----------------------------- + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test50(float[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 2]; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test51(float[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 1]; + } + } + + @Test + static void test52(float[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 1]; + } + } + + @Test + static void test53(float[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 2]; + } + } + + @Run(test = "test50") + @Warmup(100) + public void runTest50() { + float[] data = new float[RANGE]; + init(data); + test50(data); + verify("test50", data, gold50); + } + + @Run(test = "test51") + @Warmup(100) + public void runTest51() { + float[] data = new float[RANGE]; + init(data); + test51(data); + verify("test51", data, gold51); + } + + @Run(test = "test52") + @Warmup(100) + public void runTest52() { + float[] data = new float[RANGE]; + init(data); + test52(data); + verify("test52", data, gold52); + } + + @Run(test = "test53") + @Warmup(100) + public void runTest53() { + float[] data = new float[RANGE]; + init(data); + test53(data); + verify("test53", data, gold53); + } + + // ------------------------- Double ----------------------------- + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test60(double[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 2]; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, "> 0", IRNode.ADD_V, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIf = {"AlignVector", "false"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) + static void test61(double[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j + 1]; + } + } + + @Test + static void test62(double[] data) { + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 1]; + } + } + + @Test + static void test63(double[] data) { + // 128-bit vectors -> can vectorize because only 2 elements + for (int j = 2; j < RANGE - 2; j++) { + data[j] += data[j - 2]; + } + } + + @Run(test = "test60") + @Warmup(100) + public void runTest60() { + double[] data = new double[RANGE]; + init(data); + test60(data); + verify("test60", data, gold60); + } + + @Run(test = "test61") + @Warmup(100) + public void runTest61() { + double[] data = new double[RANGE]; + init(data); + test61(data); + verify("test61", data, gold61); + } + + @Run(test = "test62") + @Warmup(100) + public void runTest62() { + double[] data = new double[RANGE]; + init(data); + test62(data); + verify("test62", data, gold62); + } + + @Run(test = "test63") + @Warmup(100) + public void runTest63() { + double[] data = new double[RANGE]; + init(data); + test63(data); + verify("test63", data, gold63); + } + + static void init(long[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = j; + } + } + + static void init(short[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (short)j; + } + } + + static void init(byte[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (byte)j; + } + } + + static void init(char[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = (char)j; + } + } + + + static void init(float[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = j; + } + } + + + static void init(double[] data) { + for (int j = 0; j < RANGE; j++) { + data[j] = j; + } + } + + static void verify(String name, int[] data, int[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + + static void verify(String name, long[] data, long[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + + static void verify(String name, short[] data, short[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + + static void verify(String name, byte[] data, byte[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + + static void verify(String name, char[] data, char[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + + static void verify(String name, float[] data, float[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } + + static void verify(String name, double[] data, double[] gold) { + for (int i = 0; i < RANGE; i++) { + if (data[i] != gold[i]) { + throw new RuntimeException(" Invalid " + name + " result: data[" + i + "]: " + data[i] + " != " + gold[i]); + } + } + } +}