From beb75e651f1e4a9bd21f611f9abc7ca28afbae31 Mon Sep 17 00:00:00 2001 From: Emanuel Peter Date: Wed, 24 May 2023 07:00:27 +0000 Subject: [PATCH] 8306302: C2 Superword fix: use VectorMaskCmp and VectorBlend instead of CMoveVF/D Reviewed-by: fgao, jbhateja --- src/hotspot/cpu/aarch64/aarch64_vector.ad | 43 -- src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 | 43 -- src/hotspot/cpu/x86/x86.ad | 75 -- src/hotspot/share/adlc/formssel.cpp | 1 - src/hotspot/share/opto/classes.hpp | 2 - src/hotspot/share/opto/matcher.cpp | 14 - src/hotspot/share/opto/superword.cpp | 455 ++++-------- src/hotspot/share/opto/superword.hpp | 27 +- src/hotspot/share/opto/vectornode.cpp | 10 +- src/hotspot/share/opto/vectornode.hpp | 16 - src/hotspot/share/runtime/vmStructs.cpp | 2 - .../c2/irTests/TestVectorConditionalMove.java | 700 +++++++++++++++++- .../compiler/lib/ir_framework/IRNode.java | 15 +- 13 files changed, 820 insertions(+), 583 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index 8633626fd0a..138eabb7553 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -5992,49 +5992,6 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{ ins_pipe(pipe_slow); %} -// ------------------------- Vector conditional move -------------------------- - -instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{ - predicate(UseSVE == 0 || - (VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) && - n->in(1)->in(2)->get_int() != BoolTest::ne)); - match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2))); - match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2))); - effect(TEMP_DEF dst); - format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %} - ins_encode %{ - Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant); - BasicType bt = Matcher::vector_element_basic_type(this); - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - assert(length_in_bytes == 8 || length_in_bytes == 16, "must be"); - __ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister, - $src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16); - __ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $src2$$FloatRegister, $src1$$FloatRegister); - %} - ins_pipe(pipe_slow); -%} - -instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{ - predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) || - (UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne)); - match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2))); - match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2))); - effect(TEMP pgtmp); - format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %} - ins_encode %{ - assert(UseSVE > 0, "must be sve"); - Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant); - BasicType bt = Matcher::vector_element_basic_type(this); - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - __ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister, - $src2$$FloatRegister, condition); - __ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt), - $pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister); - %} - ins_pipe(pipe_slow); -%} - // ------------------------------ Vector round --------------------------------- // vector Math.round diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 53a224a2cbe..1c91ed0d1c9 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -4258,49 +4258,6 @@ instruct vblend_sve(vReg dst, vReg src1, vReg src2, pReg pg) %{ ins_pipe(pipe_slow); %} -// ------------------------- Vector conditional move -------------------------- - -instruct vcmove_neon(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd) %{ - predicate(UseSVE == 0 || - (VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) && - n->in(1)->in(2)->get_int() != BoolTest::ne)); - match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2))); - match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2))); - effect(TEMP_DEF dst); - format %{ "vcmove_neon.$copnd $dst, $src1, $src2\t# vector conditional move fp" %} - ins_encode %{ - Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant); - BasicType bt = Matcher::vector_element_basic_type(this); - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - assert(length_in_bytes == 8 || length_in_bytes == 16, "must be"); - __ neon_compare($dst$$FloatRegister, bt, $src1$$FloatRegister, - $src2$$FloatRegister, condition, /* isQ */ length_in_bytes == 16); - __ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $src2$$FloatRegister, $src1$$FloatRegister); - %} - ins_pipe(pipe_slow); -%} - -instruct vcmove_sve(vReg dst, vReg src1, vReg src2, immI cond, cmpOp copnd, pRegGov pgtmp) %{ - predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n)) || - (UseSVE > 0 && n->in(1)->in(2)->get_int() == BoolTest::ne)); - match(Set dst (CMoveVF (Binary copnd cond) (Binary src1 src2))); - match(Set dst (CMoveVD (Binary copnd cond) (Binary src1 src2))); - effect(TEMP pgtmp); - format %{ "vcmove_sve.$copnd $dst, $src1, $src2\t# vector conditional move fp. KILL $pgtmp" %} - ins_encode %{ - assert(UseSVE > 0, "must be sve"); - Assembler::Condition condition = to_assembler_cond((BoolTest::mask)$cond$$constant); - BasicType bt = Matcher::vector_element_basic_type(this); - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - __ sve_compare($pgtmp$$PRegister, bt, ptrue, $src1$$FloatRegister, - $src2$$FloatRegister, condition); - __ sve_sel($dst$$FloatRegister, __ elemType_to_regVariant(bt), - $pgtmp$$PRegister, $src2$$FloatRegister, $src1$$FloatRegister); - %} - ins_pipe(pipe_slow); -%} - // ------------------------------ Vector round --------------------------------- // vector Math.round diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index b1b90093a95..4d63afa7303 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1504,12 +1504,6 @@ const bool Matcher::match_rule_supported(int opcode) { return false; } break; - case Op_CMoveVF: - case Op_CMoveVD: - if (UseAVX < 1) { // enabled for AVX only - return false; - } - break; case Op_StrIndexOf: if (!UseSSE42Intrinsics) { return false; @@ -1740,11 +1734,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; // 512bit vpmullq, vandpd and vxorpd are not available } break; - case Op_CMoveVF: - if (vlen != 8) { - return false; // implementation limitation (only vcmov8F_reg is present) - } - break; case Op_RotateRightV: case Op_RotateLeftV: if (bt != T_INT && bt != T_LONG) { @@ -1772,11 +1761,6 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType return false; } break; - case Op_CMoveVD: - if (vlen != 4) { - return false; // implementation limitation (only vcmov4D_reg is present) - } - break; case Op_MaxV: case Op_MinV: if (UseSSE < 4 && is_integral_type(bt)) { @@ -2947,29 +2931,6 @@ operand legVecZ() %{ interface(REG_INTER); %} -// Comparison Code for FP conditional move -operand cmpOp_vcmppd() %{ - match(Bool); - - predicate(n->as_Bool()->_test._test != BoolTest::overflow && - n->as_Bool()->_test._test != BoolTest::no_overflow); - format %{ "" %} - interface(COND_INTER) %{ - equal (0x0, "eq"); - less (0x1, "lt"); - less_equal (0x2, "le"); - not_equal (0xC, "ne"); - greater_equal(0xD, "ge"); - greater (0xE, "gt"); - //TODO cannot compile (adlc breaks) without two next lines with error: - // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ - // equal' for overflow. - overflow (0x20, "o"); // not really supported by the instruction - no_overflow (0x21, "no"); // not really supported by the instruction - %} -%} - - // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) // ============================================================================ @@ -5983,42 +5944,6 @@ instruct vmulD_mem(vec dst, vec src, memory mem) %{ ins_pipe( pipe_slow ); %} -instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ - predicate(Matcher::vector_length(n) == 8); - match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); - effect(TEMP dst, USE src1, USE src2); - format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" - "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" - %} - ins_encode %{ - assert(UseAVX > 0, "required"); - - int vlen_enc = Assembler::AVX_256bit; - int cond = (Assembler::Condition)($copnd$$cmpcode); - __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); - __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - -instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ - predicate(Matcher::vector_length(n) == 4); - match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); - effect(TEMP dst, USE src1, USE src2); - format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" - "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" - %} - ins_encode %{ - assert(UseAVX > 0, "required"); - - int vlen_enc = Assembler::AVX_256bit; - int cond = (Assembler::Condition)($copnd$$cmpcode); - __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); - __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - // --------------------------------- DIV -------------------------------------- // Floats vector div diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index 316fb06b2db..8a5d6ed6174 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -4203,7 +4203,6 @@ bool MatchRule::is_vector() const { "AddVB","AddVS","AddVI","AddVL","AddVF","AddVD", "SubVB","SubVS","SubVI","SubVL","SubVF","SubVD", "MulVB","MulVS","MulVI","MulVL","MulVF","MulVD", - "CMoveVD", "CMoveVF", "DivVF","DivVD", "AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD", "NegVF","NegVD","NegVI","NegVL", diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index 4531b0ecd8f..ca8d0614def 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -81,9 +81,7 @@ macro(CompressBitsV) macro(ExpandBitsV) macro(ConstraintCast) macro(CMoveD) -macro(CMoveVD) macro(CMoveF) -macro(CMoveVF) macro(CMoveI) macro(CMoveL) macro(CMoveP) diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index 15cea8a2555..dad482b9b4d 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -2385,20 +2385,6 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) { n->del_req(3); break; } - case Op_CMoveVF: - case Op_CMoveVD: { - // Restructure into a binary tree for Matching: - // CMoveVF (Binary bool mask) (Binary src1 src2) - Node* in_cc = n->in(1); - assert(in_cc->is_Con(), "The condition input of cmove vector node must be a constant."); - Node* bol = new BoolNode(in_cc, (BoolTest::mask)in_cc->get_int()); - Node* pair1 = new BinaryNode(bol, in_cc); - n->set_req(1, pair1); - Node* pair2 = new BinaryNode(n->in(2), n->in(3)); - n->set_req(2, pair2); - n->del_req(3); - break; - } case Op_MacroLogicV: { Node* pair1 = new BinaryNode(n->in(1), n->in(2)); Node* pair2 = new BinaryNode(n->in(3), n->in(4)); diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index fd1b6c35afd..a3625afe815 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -60,7 +60,6 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) : _mem_slice_tail(arena(), 8, 0, nullptr), // memory slice tails _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node _clone_map(phase->C->clone_map()), // map of nodes created in cloning - _cmovev_kit(_arena, this), // map to facilitate CMoveV creation _align_to_ref(nullptr), // memory reference to align vectors to _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs _dg(_arena), // dependence graph @@ -619,9 +618,6 @@ bool SuperWord::SLP_extract() { combine_packs(); construct_my_pack_map(); - if (UseVectorCmov) { - merge_packs_to_cmove(); - } filter_packs(); @@ -1580,18 +1576,6 @@ void SuperWord::set_alignment(Node* s1, Node* s2, int align) { //------------------------------data_size--------------------------- int SuperWord::data_size(Node* s) { - Node* use = nullptr; //test if the node is a candidate for CMoveV optimization, then return the size of CMov - if (UseVectorCmov) { - use = _cmovev_kit.is_Bool_candidate(s); - if (use != nullptr) { - return data_size(use); - } - use = _cmovev_kit.is_Cmp_candidate(s); - if (use != nullptr) { - return data_size(use); - } - } - int bsize = type2aelembytes(velt_basic_type(s)); assert(bsize != 0, "valid size"); return bsize; @@ -2052,213 +2036,6 @@ void SuperWord::filter_packs() { #endif } -//------------------------------merge_packs_to_cmove--------------------------- -// Merge qualified CMove into new vector-nodes -// We want to catch this pattern and subsume Cmp and Bool into CMove -// -// Sub Con -// / | / -// / | / / -// / | / / -// / | / / -// / / / -// / / | / -// v / | / -// Cmp | / -// | | / -// v | / -// Bool | / -// \ | / -// \ | / -// \ | / -// \ | / -// \ v / -// CMove -// - -void SuperWord::merge_packs_to_cmove() { - for (int i = _packset.length() - 1; i >= 0; i--) { - Node_List* pk = _packset.at(i); - if (_cmovev_kit.can_merge_cmove_pack(pk)) { - _cmovev_kit.make_cmove_pack(pk); - } - } - - #ifndef PRODUCT - if (TraceSuperWord) { - tty->print_cr("\nSuperWord::merge_packs_to_cmove(): After merge"); - print_packset(); - tty->cr(); - } - #endif -} - -Node* CMoveKit::is_Bool_candidate(Node* def) const { - Node* use = nullptr; - if (!def->is_Bool() || def->in(0) != nullptr || def->outcnt() != 1) { - return nullptr; - } - for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { - use = def->fast_out(j); - if (!_sw->same_generation(def, use) || !use->is_CMove()) { - return nullptr; - } - } - return use; -} - -Node* CMoveKit::is_Cmp_candidate(Node* def) const { - Node* use = nullptr; - if (!def->is_Cmp() || def->in(0) != nullptr || def->outcnt() != 1) { - return nullptr; - } - for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { - use = def->fast_out(j); - if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == nullptr || !_sw->same_generation(def, use)) { - return nullptr; - } - } - return use; -} - -// Determine if the current pack is an ideal cmove pack, and if its related packs, -// i.e. bool node pack and cmp node pack, can be successfully merged for vectorization. -bool CMoveKit::can_merge_cmove_pack(Node_List* cmove_pk) { - Node* cmove = cmove_pk->at(0); - - if (!SuperWord::is_cmove_fp_opcode(cmove->Opcode()) || - pack(cmove) != nullptr /* already in the cmove pack */) { - return false; - } - - if (cmove->in(0) != nullptr) { - NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: CMove %d has control flow, escaping...", cmove->_idx); cmove->dump();}) - return false; - } - - Node* bol = cmove->as_CMove()->in(CMoveNode::Condition); - if (!bol->is_Bool() || - bol->outcnt() != 1 || - !_sw->same_generation(bol, cmove) || - bol->in(0) != nullptr || // Bool node has control flow!! - _sw->my_pack(bol) == nullptr) { - NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: Bool %d does not fit CMove %d for building vector, escaping...", bol->_idx, cmove->_idx); bol->dump();}) - return false; - } - Node_List* bool_pk = _sw->my_pack(bol); - if (bool_pk->size() != cmove_pk->size() ) { - return false; - } - - Node* cmp = bol->in(1); - if (!cmp->is_Cmp() || - cmp->outcnt() != 1 || - !_sw->same_generation(cmp, cmove) || - cmp->in(0) != nullptr || // Cmp node has control flow!! - _sw->my_pack(cmp) == nullptr) { - NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: Cmp %d does not fit CMove %d for building vector, escaping...", cmp->_idx, cmove->_idx); cmp->dump();}) - return false; - } - Node_List* cmp_pk = _sw->my_pack(cmp); - if (cmp_pk->size() != cmove_pk->size() ) { - return false; - } - - if (!test_cmp_pack(cmp_pk, cmove_pk)) { - NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::can_merge_cmove_pack: cmp pack for Cmp %d failed vectorization test", cmp->_idx); cmp->dump();}) - return false; - } - - return true; -} - -// Create a new cmove pack to substitute the old one, map all info to the -// new pack and delete the old cmove pack and related packs from the packset. -void CMoveKit::make_cmove_pack(Node_List* cmove_pk) { - Node* cmove = cmove_pk->at(0); - Node* bol = cmove->as_CMove()->in(CMoveNode::Condition); - Node_List* bool_pk = _sw->my_pack(bol); - Node* cmp = bol->in(1); - Node_List* cmp_pk = _sw->my_pack(cmp); - - Node_List* new_cmove_pk = new Node_List(); - uint sz = cmove_pk->size() - 1; - for (uint i = 0; i <= sz; ++i) { - Node* cmov = cmove_pk->at(i); - Node* bol = bool_pk->at(i); - Node* cmp = cmp_pk->at(i); - - new_cmove_pk->insert(i, cmov); - - map(cmov, new_cmove_pk); - map(bol, new_cmove_pk); - map(cmp, new_cmove_pk); - - _sw->set_my_pack(cmov, new_cmove_pk); // and keep old packs for cmp and bool - } - _sw->_packset.remove(cmove_pk); - _sw->_packset.remove(bool_pk); - _sw->_packset.remove(cmp_pk); - _sw->_packset.append(new_cmove_pk); - NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmove_pack: added syntactic CMove pack"); _sw->print_pack(new_cmove_pk);}) -} - -bool CMoveKit::test_cmp_pack(Node_List* cmp_pk, Node_List* cmove_pk) { - Node* cmp0 = cmp_pk->at(0); - assert(cmp0->is_Cmp(), "CMoveKit::test_cmp_pack: should be Cmp Node"); - assert(cmove_pk->at(0)->is_CMove(), "CMoveKit::test_cmp_pack: should be CMove"); - assert(cmp_pk->size() == cmove_pk->size(), "CMoveKit::test_cmp_pack: should be same size"); - Node* in1 = cmp0->in(1); - Node* in2 = cmp0->in(2); - Node_List* in1_pk = _sw->my_pack(in1); - Node_List* in2_pk = _sw->my_pack(in2); - - if ( (in1_pk != nullptr && in1_pk->size() != cmp_pk->size()) - || (in2_pk != nullptr && in2_pk->size() != cmp_pk->size()) ) { - return false; - } - - // test if "all" in1 are in the same pack or the same node - if (in1_pk == nullptr) { - for (uint j = 1; j < cmp_pk->size(); j++) { - if (cmp_pk->at(j)->in(1) != in1) { - return false; - } - }//for: in1_pk is not pack but all Cmp nodes in the pack have the same in(1) - } - // test if "all" in2 are in the same pack or the same node - if (in2_pk == nullptr) { - for (uint j = 1; j < cmp_pk->size(); j++) { - if (cmp_pk->at(j)->in(2) != in2) { - return false; - } - }//for: in2_pk is not pack but all Cmp nodes in the pack have the same in(2) - } - //now check if cmp_pk may be subsumed in vector built for cmove_pk - int cmove_ind1, cmove_ind2; - if (cmp_pk->at(0)->in(1) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) - && cmp_pk->at(0)->in(2) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { - cmove_ind1 = CMoveNode::IfFalse; - cmove_ind2 = CMoveNode::IfTrue; - } else if (cmp_pk->at(0)->in(2) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse) - && cmp_pk->at(0)->in(1) == cmove_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) { - cmove_ind2 = CMoveNode::IfFalse; - cmove_ind1 = CMoveNode::IfTrue; - } - else { - return false; - } - - for (uint j = 1; j < cmp_pk->size(); j++) { - if (cmp_pk->at(j)->in(1) != cmove_pk->at(j)->as_CMove()->in(cmove_ind1) - || cmp_pk->at(j)->in(2) != cmove_pk->at(j)->as_CMove()->in(cmove_ind2)) { - return false; - }//if - } - NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmp_pack: cmp pack for 1st Cmp %d is OK for vectorization: ", cmp0->_idx); cmp0->dump(); }) - return true; -} - //------------------------------implemented--------------------------- // Can code be generated for pack p? bool SuperWord::implemented(Node_List* p) { @@ -2283,9 +2060,9 @@ bool SuperWord::implemented(Node_List* p) { // integer subword types with superword vectorization. // See JDK-8294816 for miscompilation issues with shorts. return false; - } else if (is_cmove_fp_opcode(opc)) { - retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0)); - NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);}) + } else if (p0->is_Cmp()) { + // Cmp -> Bool -> Cmove + retValue = UseVectorCmov; } else if (requires_long_to_int_conversion(opc)) { // Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros // returns int type, but Vector API for them returns long type. To unify @@ -2308,10 +2085,6 @@ bool SuperWord::implemented(Node_List* p) { return retValue; } -bool SuperWord::is_cmov_pack(Node_List* p) { - return _cmovev_kit.pack(p->at(0)) != nullptr; -} - bool SuperWord::requires_long_to_int_conversion(int opc) { switch(opc) { case Op_PopCountL: @@ -2385,9 +2158,6 @@ bool SuperWord::profitable(Node_List* p) { // just the ones outside the block.) for (uint i = 0; i < p->size(); i++) { Node* def = p->at(i); - if (is_cmov_pack_internal_node(p, def)) { - continue; - } for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { Node* use = def->fast_out(j); for (uint k = 0; k < use->req(); k++) { @@ -2408,11 +2178,30 @@ bool SuperWord::profitable(Node_List* p) { } } } + if (p0->is_Cmp()) { + // Verify that Cmp pack only has Bool pack uses + for (DUIterator_Fast jmax, j = p0->fast_outs(jmax); j < jmax; j++) { + Node* bol = p0->fast_out(j); + if (!bol->is_Bool() || bol->in(0) != nullptr || !is_vector_use(bol, 1)) { + return false; + } + } + } + if (p0->is_Bool()) { + // Verify that Bool pack only has CMove pack uses + for (DUIterator_Fast jmax, j = p0->fast_outs(jmax); j < jmax; j++) { + Node* cmove = p0->fast_out(j); + if (!cmove->is_CMove() || cmove->in(0) != nullptr || !is_vector_use(cmove, 1)) { + return false; + } + } + } return true; } #ifdef ASSERT void SuperWord::verify_packs() { + // Verify independence at pack level. for (int i = 0; i < _packset.length(); i++) { Node_List* p = _packset.at(i); Node* dependence = find_dependence(p); @@ -2431,6 +2220,27 @@ void SuperWord::verify_packs() { } assert(dependence == nullptr, "all nodes in pack must be mutually independent"); } + + // Verify all nodes in packset have my_pack set correctly. + Unique_Node_List processed; + for (int i = 0; i < _packset.length(); i++) { + Node_List* p = _packset.at(i); + for (uint k = 0; k < p->size(); k++) { + Node* n = p->at(k); + assert(in_bb(n), "only nodes in bb can be in packset"); + assert(!processed.member(n), "node should only occur once in packset"); + assert(my_pack(n) == p, "n has consisten packset info"); + processed.push(n); + } + } + + // Check that no other node has my_pack set. + for (int i = 0; i < _block.length(); i++) { + Node* n = _block.at(i); + if (!processed.member(n)) { + assert(my_pack(n) == nullptr, "should not have pack if not in packset"); + } + } } #endif @@ -2535,7 +2345,7 @@ public: if (pid == 0) { pid = new_pid(); set_pid(n, pid); - assert(_slp->my_pack(n) == nullptr || UseVectorCmov, "no packset"); + assert(_slp->my_pack(n) == nullptr, "no packset"); } } @@ -2953,7 +2763,89 @@ bool SuperWord::output() { Node* one = vector_opd(p, 3); vn = VectorNode::make(opc, in, zero, one, vlen, velt_basic_type(n)); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); - } else if (n->req() == 3 && !is_cmov_pack(p)) { + } else if (n->is_Cmp()) { + // Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend + continue; + } else if (n->is_Bool()) { + // Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend + continue; + } else if (n->is_CMove()) { + // Bool + Cmp + CMove -> VectorMaskCmp + VectorBlend + + BoolNode* bol = n->in(1)->as_Bool(); + assert(bol != nullptr, "must have Bool above CMove"); + BoolTest::mask bol_test = bol->_test._test; + assert(bol_test == BoolTest::eq || + bol_test == BoolTest::ne || + bol_test == BoolTest::ge || + bol_test == BoolTest::gt || + bol_test == BoolTest::lt || + bol_test == BoolTest::le, + "CMove bool should be one of: eq,ne,ge,ge,lt,le"); + Node_List* p_bol = my_pack(bol); + assert(p_bol != nullptr, "CMove must have matching Bool pack"); + + CmpNode* cmp = bol->in(1)->as_Cmp(); + assert(cmp != nullptr, "must have cmp above CMove"); + Node_List* p_cmp = my_pack(cmp); + assert(p_cmp != nullptr, "Bool must have matching Cmp pack"); + + Node* cmp_in1 = vector_opd(p_cmp, 1); + Node* cmp_in2 = vector_opd(p_cmp, 2); + + Node* blend_in1 = vector_opd(p, 2); + Node* blend_in2 = vector_opd(p, 3); + + if (cmp->Opcode() == Op_CmpF || cmp->Opcode() == Op_CmpD) { + // If we have a Float or Double comparison, we must be careful with + // handling NaN's correctly. CmpF and CmpD have a return code, as + // they are based on the java bytecodes fcmpl/dcmpl: + // -1: cmp_in1 < cmp_in2, or at least one of the two is a NaN + // 0: cmp_in1 == cmp_in2 (no NaN) + // 1: cmp_in1 > cmp_in2 (no NaN) + // + // The "bol_test" selects which of the [-1, 0, 1] cases lead to "true". + // + // Note: ordered (O) comparison returns "false" if either input is NaN. + // unordered (U) comparison returns "true" if either input is NaN. + // + // The VectorMaskCmpNode does a comparison directly on in1 and in2, in the java + // standard way (all comparisons are ordered, except NEQ is unordered). + // + // In the following, "bol_test" already matches the cmp code for VectorMaskCmpNode: + // BoolTest::eq: Case 0 -> EQ_O + // BoolTest::ne: Case -1, 1 -> NEQ_U + // BoolTest::ge: Case 0, 1 -> GE_O + // BoolTest::gt: Case 1 -> GT_O + // + // But the lt and le comparisons must be converted from unordered to ordered: + // BoolTest::lt: Case -1 -> LT_U -> VectorMaskCmp would interpret lt as LT_O + // BoolTest::le: Case -1, 0 -> LE_U -> VectorMaskCmp would interpret le as LE_O + // + if (bol_test == BoolTest::lt || bol_test == BoolTest::le) { + // Negating the bol_test and swapping the blend-inputs leaves all non-NaN cases equal, + // but converts the unordered (U) to an ordered (O) comparison. + // VectorBlend(VectorMaskCmp(LT_U, in1_cmp, in2_cmp), in1_blend, in2_blend) + // <==> VectorBlend(VectorMaskCmp(GE_O, in1_cmp, in2_cmp), in2_blend, in1_blend) + // VectorBlend(VectorMaskCmp(LE_U, in1_cmp, in2_cmp), in1_blend, in2_blend) + // <==> VectorBlend(VectorMaskCmp(GT_O, in1_cmp, in2_cmp), in2_blend, in1_blend) + bol_test = bol->_test.negate(); + swap(blend_in1, blend_in2); + } + } + + // VectorMaskCmp + ConINode* bol_test_node = _igvn.intcon((int)bol_test); + BasicType bt = velt_basic_type(cmp); + const TypeVect* vt = TypeVect::make(bt, vlen); + VectorNode* mask = new VectorMaskCmpNode(bol_test, cmp_in1, cmp_in2, bol_test_node, vt); + _igvn.register_new_node_with_optimizer(mask); + _phase->set_ctrl(mask, _phase->get_ctrl(p->at(0))); + _igvn._worklist.push(mask); + + // VectorBlend + vn = new VectorBlendNode(blend_in1, blend_in2, mask); + } else if (n->req() == 3) { // Promote operands to vector Node* in1 = nullptr; bool node_isa_reduction = is_marked_reduction(n); @@ -3037,85 +2929,6 @@ bool SuperWord::output() { int vopc = VectorCastNode::opcode(opc, in->bottom_type()->is_vect()->element_basic_type()); vn = VectorCastNode::make(vopc, in, bt, vlen); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); - } else if (is_cmov_pack(p)) { - if (cl->is_rce_post_loop()) { - // do not refactor of flow in post loop context - return false; - } - if (!n->is_CMove()) { - continue; - } - // place here CMoveVDNode - NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);}) - Node* bol = n->in(CMoveNode::Condition); - if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) { - NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();}) - bol = bol->in(1); //may be ExtractNode - } - - assert(bol->is_Bool(), "should be BoolNode - too late to bail out!"); - if (!bol->is_Bool()) { - if (do_reserve_copy()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();}) - return false; //and reverse to backup IG - } - ShouldNotReachHere(); - } - - BoolTest boltest = bol->as_Bool()->_test; - BoolTest::mask cond = boltest._test; - Node* cmp = bol->in(1); - // When the src order of cmp node and cmove node are the same: - // cmp: CmpD src1 src2 - // bool: Bool cmp mask - // cmove: CMoveD bool scr1 src2 - // =====> vectorized, equivalent to - // cmovev: CMoveVD mask src_vector1 src_vector2 - // - // When the src order of cmp node and cmove node are different: - // cmp: CmpD src2 src1 - // bool: Bool cmp mask - // cmove: CMoveD bool scr1 src2 - // =====> equivalent to - // cmp: CmpD src1 src2 - // bool: Bool cmp negate(mask) - // cmove: CMoveD bool scr1 src2 - // (Note: when mask is ne or eq, we don't need to negate it even after swapping.) - // =====> vectorized, equivalent to - // cmovev: CMoveVD negate(mask) src_vector1 src_vector2 - if (cmp->in(2) == n->in(CMoveNode::IfFalse) && cond != BoolTest::ne && cond != BoolTest::eq) { - assert(cmp->in(1) == n->in(CMoveNode::IfTrue), "cmpnode and cmovenode don't share the same inputs."); - cond = boltest.negate(); - } - Node* cc = _igvn.intcon((int)cond); - NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", cc->_idx); cc->dump();}) - - Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse - if (src1 == nullptr) { - if (do_reserve_copy()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be null, exiting SuperWord");}) - return false; //and reverse to backup IG - } - ShouldNotReachHere(); - } - Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue - if (src2 == nullptr) { - if (do_reserve_copy()) { - NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be null, exiting SuperWord");}) - return false; //and reverse to backup IG - } - ShouldNotReachHere(); - } - BasicType bt = velt_basic_type(n); - const TypeVect* vt = TypeVect::make(bt, vlen); - assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported"); - if (bt == T_FLOAT) { - vn = new CMoveVFNode(cc, src1, src2, vt); - } else { - assert(bt == T_DOUBLE, "Expected double"); - vn = new CMoveVDNode(cc, src1, src2, vt); - } - NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();}) } else if (opc == Op_FmaD || opc == Op_FmaF) { // Promote operands to vector Node* in1 = vector_opd(p, 1); @@ -3455,7 +3268,7 @@ void SuperWord::insert_extracts(Node_List* p) { Node* n = use->in(k); if (def == n) { Node_List* u_pk = my_pack(use); - if ((u_pk == nullptr || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) { + if ((u_pk == nullptr || use->is_CMove()) && !is_vector_use(use, k)) { _n_idx_list.push(use, k); } } @@ -3886,6 +3699,18 @@ void SuperWord::compute_vector_element_type() { } } } + for (int i = 0; i < _block.length(); i++) { + Node* n = _block.at(i); + Node* nn = n; + if (nn->is_Bool() && nn->in(0) == nullptr) { + nn = nn->in(1); + assert(nn->is_Cmp(), "always have Cmp above Bool"); + } + if (nn->is_Cmp() && nn->in(0) == nullptr) { + nn = nn->in(1); + set_velt_type(n, velt_type(nn)); + } + } #ifndef PRODUCT if (TraceSuperWord && Verbose) { for (int i = 0; i < _block.length(); i++) { diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 3ad9722d4d9..6e2689b19ad 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -203,24 +203,6 @@ class SWNodeInfo { }; class SuperWord; -class CMoveKit { - friend class SuperWord; - private: - SuperWord* _sw; - Dict* _dict; - CMoveKit(Arena* a, SuperWord* sw) : _sw(sw) {_dict = new Dict(cmpkey, hashkey, a);} - void* _2p(Node* key) const { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy - Dict* dict() const { return _dict; } - void map(Node* key, Node_List* val) { assert(_dict->operator[](_2p(key)) == nullptr, "key existed"); _dict->Insert(_2p(key), (void*)val); } - void unmap(Node* key) { _dict->Delete(_2p(key)); } - Node_List* pack(Node* key) const { return (Node_List*)_dict->operator[](_2p(key)); } - Node* is_Bool_candidate(Node* nd) const; // if it is the right candidate return corresponding CMove* , - Node* is_Cmp_candidate(Node* nd) const; // otherwise return null - // Determine if the current pack is a cmove candidate that can be vectorized. - bool can_merge_cmove_pack(Node_List* cmove_pk); - void make_cmove_pack(Node_List* cmove_pk); - bool test_cmp_pack(Node_List* cmp_pk, Node_List* cmove_pk); -};//class CMoveKit // JVMCI: OrderedPair is moved up to deal with compilation issues on Windows //------------------------------OrderedPair--------------------------- @@ -309,7 +291,6 @@ class SuperWord : public ResourceObj { GrowableArray _mem_slice_tail; // Memory slice tail nodes GrowableArray _node_info; // Info needed per node CloneMap& _clone_map; // map of nodes created in cloning - CMoveKit _cmovev_kit; // support for vectorization of CMov MemNode* _align_to_ref; // Memory reference that pre-loop will align to GrowableArray _disjoint_ptrs; // runtime disambiguated pointer pairs @@ -458,9 +439,6 @@ class SuperWord : public ResourceObj { private: void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } // is pack good for converting into one vector node replacing bunches of Cmp, Bool, CMov nodes. - bool is_cmov_pack(Node_List* p); - bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); } - static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); } static bool requires_long_to_int_conversion(int opc); // For pack p, are all idx operands the same? bool same_inputs(Node_List* p, int idx); @@ -595,9 +573,8 @@ private: void construct_my_pack_map(); // Remove packs that are not implemented or not profitable. void filter_packs(); - // Merge CMove into new vector-nodes - void merge_packs_to_cmove(); - // Verify that for every pack, all nodes are mutually independent + // Verify that for every pack, all nodes are mutually independent. + // Also verify that packset and my_pack are consistent. DEBUG_ONLY(void verify_packs();) // Adjust the memory graph for the packed operations void schedule(); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 52078b21b46..f8a28f5af04 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -82,9 +82,11 @@ int VectorNode::opcode(int sopc, BasicType bt) { case Op_FmaF: return (bt == T_FLOAT ? Op_FmaVF : 0); case Op_CMoveF: - return (bt == T_FLOAT ? Op_CMoveVF : 0); + return (bt == T_FLOAT ? Op_VectorBlend : 0); case Op_CMoveD: - return (bt == T_DOUBLE ? Op_CMoveVD : 0); + return (bt == T_DOUBLE ? Op_VectorBlend : 0); + case Op_Bool: + return Op_VectorMaskCmp; case Op_DivF: return (bt == T_FLOAT ? Op_DivVF : 0); case Op_DivD: @@ -683,10 +685,6 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) { *start = 1; *end = 3; // 2 vector operands break; - case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD: - *start = 2; - *end = n->req(); - break; case Op_FmaD: case Op_FmaF: *start = 1; diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 8efa5b40dde..ffa6a6f4e85 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -392,22 +392,6 @@ public: virtual int Opcode() const; }; -//------------------------------CMoveVFNode-------------------------------------- -// Vector float conditional move -class CMoveVFNode : public VectorNode { -public: - CMoveVFNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {} - virtual int Opcode() const; -}; - -//------------------------------CMoveVDNode-------------------------------------- -// Vector double conditional move -class CMoveVDNode : public VectorNode { -public: - CMoveVDNode(Node* in1, Node* in2, Node* in3, const TypeVect* vt) : VectorNode(in1, in2, in3, vt) {} - virtual int Opcode() const; -}; - //------------------------------MulReductionVINode-------------------------------------- // Vector multiply byte, short and int as a reduction class MulReductionVINode : public UnorderedReductionNode { diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index e36846c6b3b..5761b0a2ba2 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -1768,8 +1768,6 @@ declare_c2_type(NegVDNode, NegVNode) \ declare_c2_type(FmaVDNode, VectorNode) \ declare_c2_type(FmaVFNode, VectorNode) \ - declare_c2_type(CMoveVFNode, VectorNode) \ - declare_c2_type(CMoveVDNode, VectorNode) \ declare_c2_type(CompressVNode, VectorNode) \ declare_c2_type(CompressMNode, VectorNode) \ declare_c2_type(ExpandVNode, VectorNode) \ diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java index c685d51a96d..7f94e5a2d1f 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestVectorConditionalMove.java @@ -30,30 +30,25 @@ import jdk.test.lib.Utils; /* * @test - * @bug 8289422 + * @bug 8289422 8306088 * @key randomness - * @summary Auto-vectorization enhancement to support vector conditional move on AArch64 - * @requires os.arch=="aarch64" + * @summary Auto-vectorization enhancement to support vector conditional move. * @library /test/lib / * @run driver compiler.c2.irTests.TestVectorConditionalMove */ public class TestVectorConditionalMove { - final private static int SIZE = 3000; + final private static int SIZE = 1024; private static final Random RANDOM = Utils.getRandomInstance(); - private static float[] floata = new float[SIZE]; - private static float[] floatb = new float[SIZE]; - private static float[] floatc = new float[SIZE]; - private static double[] doublea = new double[SIZE]; - private static double[] doubleb = new double[SIZE]; - private static double[] doublec = new double[SIZE]; - public static void main(String[] args) { - TestFramework.runWithFlags("-Xcomp", "-XX:-TieredCompilation", "-XX:+UseCMoveUnconditionally", - "-XX:+UseVectorCmov", "-XX:CompileCommand=exclude,*.cmove*"); + TestFramework.runWithFlags("-XX:-TieredCompilation", + "-XX:+UseCMoveUnconditionally", + "-XX:+UseVectorCmov", + "-XX:CompileCommand=compileonly,*.TestVectorConditionalMove.test*"); } + // Compare 2 values, and pick one of them private float cmoveFloatGT(float a, float b) { return (a > b) ? a : b; } @@ -94,8 +89,124 @@ public class TestVectorConditionalMove { return (a != b) ? a : b; } + // Extensions: compare 2 values, and pick from 2 consts + private float cmoveFGTforFConst(float a, float b) { + return (a > b) ? 0.1f : -0.1f; + } + + private float cmoveFGEforFConst(float a, float b) { + return (a >= b) ? 0.1f : -0.1f; + } + + private float cmoveFLTforFConst(float a, float b) { + return (a < b) ? 0.1f : -0.1f; + } + + private float cmoveFLEforFConst(float a, float b) { + return (a <= b) ? 0.1f : -0.1f; + } + + private float cmoveFEQforFConst(float a, float b) { + return (a == b) ? 0.1f : -0.1f; + } + + private float cmoveFNEQforFConst(float a, float b) { + return (a != b) ? 0.1f : -0.1f; + } + + private double cmoveDGTforDConst(double a, double b) { + return (a > b) ? 0.1 : -0.1; + } + + private double cmoveDGEforDConst(double a, double b) { + return (a >= b) ? 0.1 : -0.1; + } + + private double cmoveDLTforDConst(double a, double b) { + return (a < b) ? 0.1 : -0.1; + } + + private double cmoveDLEforDConst(double a, double b) { + return (a <= b) ? 0.1 : -0.1; + } + + private double cmoveDEQforDConst(double a, double b) { + return (a == b) ? 0.1 : -0.1; + } + + private double cmoveDNEQforDConst(double a, double b) { + return (a != b) ? 0.1 : -0.1; + } + + // Extension: Compare 2 ILFD values, and pick from 2 ILFD values + private int cmoveIGTforI(int a, int b, int c, int d) { + return (a > b) ? c : d; + } + + private long cmoveIGTforL(int a, int b, long c, long d) { + return (a > b) ? c : d; + } + + private float cmoveIGTforF(int a, int b, float c, float d) { + return (a > b) ? c : d; + } + + private double cmoveIGTforD(int a, int b, double c, double d) { + return (a > b) ? c : d; + } + + private int cmoveLGTforI(long a, long b, int c, int d) { + return (a > b) ? c : d; + } + + private long cmoveLGTforL(long a, long b, long c, long d) { + return (a > b) ? c : d; + } + + private float cmoveLGTforF(long a, long b, float c, float d) { + return (a > b) ? c : d; + } + + private double cmoveLGTforD(long a, long b, double c, double d) { + return (a > b) ? c : d; + } + + private int cmoveFGTforI(float a, float b, int c, int d) { + return (a > b) ? c : d; + } + + private long cmoveFGTforL(float a, float b, long c, long d) { + return (a > b) ? c : d; + } + + private float cmoveFGTforF(float a, float b, float c, float d) { + return (a > b) ? c : d; + } + + private double cmoveFGTforD(float a, float b, double c, double d) { + return (a > b) ? c : d; + } + + private int cmoveDGTforI(double a, double b, int c, int d) { + return (a > b) ? c : d; + } + + private long cmoveDGTforL(double a, double b, long c, long d) { + return (a > b) ? c : d; + } + + private float cmoveDGTforF(double a, double b, float c, float d) { + return (a > b) ? c : d; + } + + private double cmoveDGTforD(double a, double b, double c, double d) { + return (a > b) ? c : d; + } + + // Compare 2 values, and pick one of them @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVFGT(float[] a, float[] b, float[] c) { for (int i = 0; i < a.length; i++) { c[i] = (a[i] > b[i]) ? a[i] : b[i]; @@ -103,7 +214,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVFGTSwap(float[] a, float[] b, float[] c) { for (int i = 0; i < a.length; i++) { c[i] = (b[i] > a[i]) ? a[i] : b[i]; @@ -111,7 +223,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVFLT(float[] a, float[] b, float[] c) { for (int i = 0; i < a.length; i++) { c[i] = (a[i] < b[i]) ? a[i] : b[i]; @@ -119,7 +232,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVFLTSwap(float[] a, float[] b, float[] c) { for (int i = 0; i < a.length; i++) { c[i] = (b[i] < a[i]) ? a[i] : b[i]; @@ -127,7 +241,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VF, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVFEQ(float[] a, float[] b, float[] c) { for (int i = 0; i < a.length; i++) { c[i] = (a[i] == b[i]) ? a[i] : b[i]; @@ -135,7 +250,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVDLE(double[] a, double[] b, double[] c) { for (int i = 0; i < a.length; i++) { c[i] = (a[i] <= b[i]) ? a[i] : b[i]; @@ -143,7 +259,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVDLESwap(double[] a, double[] b, double[] c) { for (int i = 0; i < a.length; i++) { c[i] = (b[i] <= a[i]) ? a[i] : b[i]; @@ -151,7 +268,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVDGE(double[] a, double[] b, double[] c) { for (int i = 0; i < a.length; i++) { c[i] = (a[i] >= b[i]) ? a[i] : b[i]; @@ -159,7 +277,8 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVDGESwap(double[] a, double[] b, double[] c) { for (int i = 0; i < a.length; i++) { c[i] = (b[i] >= a[i]) ? a[i] : b[i]; @@ -167,31 +286,339 @@ public class TestVectorConditionalMove { } @Test - @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.CMOVE_VD, ">0", IRNode.STORE_VECTOR, ">0"}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) private static void testCMoveVDNE(double[] a, double[] b, double[] c) { for (int i = 0; i < a.length; i++) { c[i] = (a[i] != b[i]) ? a[i] : b[i]; } } + // Extensions: compare 2 values, and pick from 2 consts @Test - @IR(failOn = {IRNode.CMOVE_VD}) + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveFGTforFConst(float[] a, float[] b, float[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] > b[i]) ? 0.1f : -0.1f; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveFGEforFConst(float[] a, float[] b, float[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] >= b[i]) ? 0.1f : -0.1f; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveFLTforFConst(float[] a, float[] b, float[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] < b[i]) ? 0.1f : -0.1f; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveFLEforFConst(float[] a, float[] b, float[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] <= b[i]) ? 0.1f : -0.1f; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveFEQforFConst(float[] a, float[] b, float[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] == b[i]) ? 0.1f : -0.1f; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveFNEQforFConst(float[] a, float[] b, float[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] != b[i]) ? 0.1f : -0.1f; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveDGTforDConst(double[] a, double[] b, double[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] > b[i]) ? 0.1 : -0.1; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveDGEforDConst(double[] a, double[] b, double[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] >= b[i]) ? 0.1 : -0.1; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveDLTforDConst(double[] a, double[] b, double[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] < b[i]) ? 0.1 : -0.1; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveDLEforDConst(double[] a, double[] b, double[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] <= b[i]) ? 0.1 : -0.1; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveDEQforDConst(double[] a, double[] b, double[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] == b[i]) ? 0.1 : -0.1; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveDNEQforDConst(double[] a, double[] b, double[] c) { + for (int i = 0; i < a.length; i++) { + c[i] = (a[i] != b[i]) ? 0.1 : -0.1; + } + } + + // Extension: Compare 2 ILFD values, and pick from 2 ILFD values + // Note: + // To guarantee that CMove is introduced, I need to perform the loads before the branch. To ensure they + // do not float down into the branches, I compute a value, and store it to r2 (same as r, except that the + // compilation does not know that). + // So far, vectorization only works for CMoveF/D, with same data-width comparison (F/I for F, D/L for D). + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveIGTforI(int[] a, int[] b, int[] c, int[] d, int[] r, int[] r2) { + for (int i = 0; i < a.length; i++) { + int cc = c[i]; + int dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveIGTforL(int[] a, int[] b, long[] c, long[] d, long[] r, long[] r2) { + for (int i = 0; i < a.length; i++) { + long cc = c[i]; + long dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveIGTforF(int[] a, int[] b, float[] c, float[] d, float[] r, float[] r2) { + for (int i = 0; i < a.length; i++) { + float cc = c[i]; + float dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveIGTforD(int[] a, int[] b, double[] c, double[] d, double[] r, double[] r2) { + for (int i = 0; i < a.length; i++) { + double cc = c[i]; + double dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveLGTforI(long[] a, long[] b, int[] c, int[] d, int[] r, int[] r2) { + for (int i = 0; i < a.length; i++) { + int cc = c[i]; + int dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveLGTforL(long[] a, long[] b, long[] c, long[] d, long[] r, long[] r2) { + for (int i = 0; i < a.length; i++) { + long cc = c[i]; + long dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveLGTforF(long[] a, long[] b, float[] c, float[] d, float[] r, float[] r2) { + for (int i = 0; i < a.length; i++) { + float cc = c[i]; + float dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) + // Requires avx2, else L is restricted to 16 byte, and D has 32. That leads to a vector elements mismatch of 2 to 4. + private static void testCMoveLGTforD(long[] a, long[] b, double[] c, double[] d, double[] r, double[] r2) { + for (int i = 0; i < a.length; i++) { + double cc = c[i]; + double dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveFGTforI(float[] a, float[] b, int[] c, int[] d, int[] r, int[] r2) { + for (int i = 0; i < a.length; i++) { + int cc = c[i]; + int dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveFGTforL(float[] a, float[] b, long[] c, long[] d, long[] r, long[] r2) { + for (int i = 0; i < a.length; i++) { + long cc = c[i]; + long dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveFGTforF(float[] a, float[] b, float[] c, float[] d, float[] r, float[] r2) { + for (int i = 0; i < a.length; i++) { + float cc = c[i]; + float dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveFGTforD(float[] a, float[] b, double[] c, double[] d, double[] r, double[] r2) { + for (int i = 0; i < a.length; i++) { + double cc = c[i]; + double dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveDGTforI(double[] a, double[] b, int[] c, int[] d, int[] r, int[] r2) { + for (int i = 0; i < a.length; i++) { + int cc = c[i]; + int dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveDGTforL(double[] a, double[] b, long[] c, long[] d, long[] r, long[] r2) { + for (int i = 0; i < a.length; i++) { + long cc = c[i]; + long dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) + private static void testCMoveDGTforF(double[] a, double[] b, float[] c, float[] d, float[] r, float[] r2) { + for (int i = 0; i < a.length; i++) { + float cc = c[i]; + float dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(counts = {IRNode.LOAD_VECTOR, ">0", IRNode.VECTOR_MASK_CMP, ">0", IRNode.VECTOR_BLEND, ">0", IRNode.STORE_VECTOR, ">0"}, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static void testCMoveDGTforD(double[] a, double[] b, double[] c, double[] d, double[] r, double[] r2) { + for (int i = 0; i < a.length; i++) { + double cc = c[i]; + double dd = d[i]; + r2[i] = cc + dd; + r[i] = (a[i] > b[i]) ? cc : dd; + } + } + + @Test + @IR(failOn = {IRNode.VECTOR_MASK_CMP, IRNode.VECTOR_BLEND}) private static void testCMoveVDUnsupported() { + double[] doublec = new double[SIZE]; int seed = 1001; for (int i = 0; i < doublec.length; i++) { doublec[i] = (i % 2 == 0) ? seed + i : seed - i; } } + @Warmup(0) @Run(test = {"testCMoveVFGT", "testCMoveVFLT","testCMoveVDLE", "testCMoveVDGE", "testCMoveVFEQ", "testCMoveVDNE", - "testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap"}) + "testCMoveVFGTSwap", "testCMoveVFLTSwap","testCMoveVDLESwap", "testCMoveVDGESwap", + "testCMoveFGTforFConst", "testCMoveFGEforFConst", "testCMoveFLTforFConst", + "testCMoveFLEforFConst", "testCMoveFEQforFConst", "testCMoveFNEQforFConst", + "testCMoveDGTforDConst", "testCMoveDGEforDConst", "testCMoveDLTforDConst", + "testCMoveDLEforDConst", "testCMoveDEQforDConst", "testCMoveDNEQforDConst"}) private void testCMove_runner() { - for (int i = 0; i < SIZE; i++) { - floata[i] = RANDOM.nextFloat(); - floatb[i] = RANDOM.nextFloat(); - doublea[i] = RANDOM.nextDouble(); - doubleb[i] = RANDOM.nextDouble(); - } + float[] floata = new float[SIZE]; + float[] floatb = new float[SIZE]; + float[] floatc = new float[SIZE]; + double[] doublea = new double[SIZE]; + double[] doubleb = new double[SIZE]; + double[] doublec = new double[SIZE]; + + init(floata); + init(floatb); + init(doublea); + init(doubleb); testCMoveVFGT(floata, floatb, floatc); testCMoveVDLE(doublea, doubleb, doublec); @@ -207,6 +634,7 @@ public class TestVectorConditionalMove { Asserts.assertEquals(doublec[i], cmoveDoubleGE(doublea[i], doubleb[i])); } + // Ensure we frequently have equals for (int i = 0; i < SIZE; i++) { if (i % 3 == 0) { floatb[i] = floata[i]; @@ -234,5 +662,215 @@ public class TestVectorConditionalMove { Asserts.assertEquals(floatc[i], cmoveFloatLTSwap(floata[i], floatb[i])); Asserts.assertEquals(doublec[i], cmoveDoubleGESwap(doublea[i], doubleb[i])); } + + // Extensions: compare 2 values, and pick from 2 consts + testCMoveFGTforFConst(floata, floatb, floatc); + testCMoveDGTforDConst(doublea, doubleb, doublec); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(floatc[i], cmoveFGTforFConst(floata[i], floatb[i])); + Asserts.assertEquals(doublec[i], cmoveDGTforDConst(doublea[i], doubleb[i])); + } + + testCMoveFGEforFConst(floata, floatb, floatc); + testCMoveDGEforDConst(doublea, doubleb, doublec); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(floatc[i], cmoveFGEforFConst(floata[i], floatb[i])); + Asserts.assertEquals(doublec[i], cmoveDGEforDConst(doublea[i], doubleb[i])); + } + + testCMoveFLTforFConst(floata, floatb, floatc); + testCMoveDLTforDConst(doublea, doubleb, doublec); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(floatc[i], cmoveFLTforFConst(floata[i], floatb[i])); + Asserts.assertEquals(doublec[i], cmoveDLTforDConst(doublea[i], doubleb[i])); + } + + testCMoveFLEforFConst(floata, floatb, floatc); + testCMoveDLEforDConst(doublea, doubleb, doublec); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(floatc[i], cmoveFLEforFConst(floata[i], floatb[i])); + Asserts.assertEquals(doublec[i], cmoveDLEforDConst(doublea[i], doubleb[i])); + } + + testCMoveFEQforFConst(floata, floatb, floatc); + testCMoveDEQforDConst(doublea, doubleb, doublec); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(floatc[i], cmoveFEQforFConst(floata[i], floatb[i])); + Asserts.assertEquals(doublec[i], cmoveDEQforDConst(doublea[i], doubleb[i])); + } + + testCMoveFNEQforFConst(floata, floatb, floatc); + testCMoveDNEQforDConst(doublea, doubleb, doublec); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(floatc[i], cmoveFNEQforFConst(floata[i], floatb[i])); + Asserts.assertEquals(doublec[i], cmoveDNEQforDConst(doublea[i], doubleb[i])); + } + } + + @Warmup(0) + @Run(test = {"testCMoveIGTforI", + "testCMoveIGTforL", + "testCMoveIGTforF", + "testCMoveIGTforD", + "testCMoveLGTforI", + "testCMoveLGTforL", + "testCMoveLGTforF", + "testCMoveLGTforD", + "testCMoveFGTforI", + "testCMoveFGTforL", + "testCMoveFGTforF", + "testCMoveFGTforD", + "testCMoveDGTforI", + "testCMoveDGTforL", + "testCMoveDGTforF", + "testCMoveDGTforD"}) + private void testCMove_runner_two() { + int[] aI = new int[SIZE]; + int[] bI = new int[SIZE]; + int[] cI = new int[SIZE]; + int[] dI = new int[SIZE]; + int[] rI = new int[SIZE]; + long[] aL = new long[SIZE]; + long[] bL = new long[SIZE]; + long[] cL = new long[SIZE]; + long[] dL = new long[SIZE]; + long[] rL = new long[SIZE]; + float[] aF = new float[SIZE]; + float[] bF = new float[SIZE]; + float[] cF = new float[SIZE]; + float[] dF = new float[SIZE]; + float[] rF = new float[SIZE]; + double[] aD = new double[SIZE]; + double[] bD = new double[SIZE]; + double[] cD = new double[SIZE]; + double[] dD = new double[SIZE]; + double[] rD = new double[SIZE]; + + init(aI); + init(bI); + init(cI); + init(dI); + init(aL); + init(bL); + init(cL); + init(dL); + init(aF); + init(bF); + init(cF); + init(dF); + init(aD); + init(bD); + init(cD); + init(dD); + + testCMoveIGTforI(aI, bI, cI, dI, rI, rI); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rI[i], cmoveIGTforI(aI[i], bI[i], cI[i], dI[i])); + } + + testCMoveIGTforL(aI, bI, cL, dL, rL, rL); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rL[i], cmoveIGTforL(aI[i], bI[i], cL[i], dL[i])); + } + + testCMoveIGTforF(aI, bI, cF, dF, rF, rF); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rF[i], cmoveIGTforF(aI[i], bI[i], cF[i], dF[i])); + } + + testCMoveIGTforD(aI, bI, cD, dD, rD, rD); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rD[i], cmoveIGTforD(aI[i], bI[i], cD[i], dD[i])); + } + + testCMoveLGTforI(aL, bL, cI, dI, rI, rI); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rI[i], cmoveLGTforI(aL[i], bL[i], cI[i], dI[i])); + } + + testCMoveLGTforL(aL, bL, cL, dL, rL, rL); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rL[i], cmoveLGTforL(aL[i], bL[i], cL[i], dL[i])); + } + + testCMoveLGTforF(aL, bL, cF, dF, rF, rF); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rF[i], cmoveLGTforF(aL[i], bL[i], cF[i], dF[i])); + } + + testCMoveLGTforD(aL, bL, cD, dD, rD, rD); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rD[i], cmoveLGTforD(aL[i], bL[i], cD[i], dD[i])); + } + + testCMoveFGTforI(aF, bF, cI, dI, rI, rI); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rI[i], cmoveFGTforI(aF[i], bF[i], cI[i], dI[i])); + } + + testCMoveFGTforL(aF, bF, cL, dL, rL, rL); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rL[i], cmoveFGTforL(aF[i], bF[i], cL[i], dL[i])); + } + + testCMoveFGTforF(aF, bF, cF, dF, rF, rF); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rF[i], cmoveFGTforF(aF[i], bF[i], cF[i], dF[i])); + } + + testCMoveFGTforD(aF, bF, cD, dD, rD, rD); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rD[i], cmoveFGTforD(aF[i], bF[i], cD[i], dD[i])); + } + + testCMoveDGTforI(aD, bD, cI, dI, rI, rI); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rI[i], cmoveDGTforI(aD[i], bD[i], cI[i], dI[i])); + } + + testCMoveDGTforL(aD, bD, cL, dL, rL, rL); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rL[i], cmoveDGTforL(aD[i], bD[i], cL[i], dL[i])); + } + + testCMoveDGTforF(aD, bD, cF, dF, rF, rF); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rF[i], cmoveDGTforF(aD[i], bD[i], cF[i], dF[i])); + } + + testCMoveDGTforD(aD, bD, cD, dD, rD, rD); + for (int i = 0; i < SIZE; i++) { + Asserts.assertEquals(rD[i], cmoveDGTforD(aD[i], bD[i], cD[i], dD[i])); + } + } + + private static void init(int[] a) { + for (int i = 0; i < SIZE; i++) { + a[i] = RANDOM.nextInt(); + } + } + + private static void init(long[] a) { + for (int i = 0; i < SIZE; i++) { + a[i] = RANDOM.nextLong(); + } + } + + private static void init(float[] a) { + for (int i = 0; i < SIZE; i++) { + a[i] = RANDOM.nextFloat(); + if (RANDOM.nextInt() % 20 == 0) { + a[i] = Float.NaN; + } + } + } + + private static void init(double[] a) { + for (int i = 0; i < SIZE; i++) { + a[i] = RANDOM.nextDouble(); + if (RANDOM.nextInt() % 20 == 0) { + a[i] = Double.NaN; + } + } } } diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index def34bb4ac6..69baddba601 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -304,16 +304,6 @@ public class IRNode { beforeMatchingNameRegex(CMOVE_I, "CMoveI"); } - public static final String CMOVE_VD = PREFIX + "CMOVE_VD" + POSTFIX; - static { - superWordNodes(CMOVE_VD, "CMoveVD"); - } - - public static final String CMOVE_VF = PREFIX + "CMOVE_VF" + POSTFIX; - static { - superWordNodes(CMOVE_VF, "CMoveVF"); - } - public static final String CMP_I = PREFIX + "CMP_I" + POSTFIX; static { beforeMatchingNameRegex(CMP_I, "CmpI"); @@ -1278,6 +1268,11 @@ public class IRNode { beforeMatchingNameRegex(VECTOR_BLEND, "VectorBlend"); } + public static final String VECTOR_MASK_CMP = PREFIX + "VECTOR_MASK_CMP" + POSTFIX; + static { + beforeMatchingNameRegex(VECTOR_MASK_CMP, "VectorMaskCmp"); + } + public static final String VECTOR_CAST_B2X = PREFIX + "VECTOR_CAST_B2X" + POSTFIX; static { beforeMatchingNameRegex(VECTOR_CAST_B2X, "VectorCastB2X");