diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index a2d8ffaccd5..76529a95ed7 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -592,16 +592,12 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { const TypeInt* step_val = gvn().type(argument(5))->isa_int(); const TypeInt* wrap = gvn().type(argument(6))->isa_int(); - Node* start = argument(4); - Node* step = argument(5); - - if (shuffle_klass == nullptr || vlen == nullptr || start_val == nullptr || step_val == nullptr || wrap == nullptr) { - return false; // dead code - } - if (!vlen->is_con() || !is_power_of_2(vlen->get_con()) || - shuffle_klass->const_oop() == nullptr || !wrap->is_con()) { + if (shuffle_klass == nullptr || shuffle_klass->const_oop() == nullptr || + vlen == nullptr || !vlen->is_con() || start_val == nullptr || step_val == nullptr || + wrap == nullptr || !wrap->is_con()) { return false; // not enough info for intrinsification } + if (!is_klass_initialized(shuffle_klass)) { if (C->print_intrinsics()) { tty->print_cr(" ** klass argument not initialized"); @@ -613,33 +609,50 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { int num_elem = vlen->get_con(); BasicType elem_bt = T_BYTE; - if (!arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed)) { + bool effective_indices_in_range = false; + if (start_val->is_con() && step_val->is_con()) { + int effective_min_index = start_val->get_con(); + int effective_max_index = start_val->get_con() + step_val->get_con() * (num_elem - 1); + effective_indices_in_range = effective_max_index >= effective_min_index && effective_min_index >= -128 && effective_max_index <= 127; + } + + if (!do_wrap && !effective_indices_in_range) { + // Disable instrinsification for unwrapped shuffle iota if start/step + // values are non-constant OR if intermediate result overflows byte value range. return false; } - if (!arch_supports_vector(Op_AddVB, num_elem, elem_bt, VecMaskNotUsed)) { + + if (!arch_supports_vector(Op_AddVB, num_elem, elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed) || + !arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed)) { return false; } - if (!arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed)) { + + if (!do_wrap && + (!arch_supports_vector(Op_SubVB, num_elem, elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskNotUsed))) { return false; } - if (!arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed)) { - return false; - } - if (!arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) { - return false; - } - if (!arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskUseStore)) { + + bool step_multiply = !step_val->is_con() || !is_power_of_2(step_val->get_con()); + if ((step_multiply && !arch_supports_vector(Op_MulVB, num_elem, elem_bt, VecMaskNotUsed)) || + (!step_multiply && !arch_supports_vector(Op_LShiftVB, num_elem, elem_bt, VecMaskNotUsed))) { return false; } const Type * type_bt = Type::get_const_basic_type(elem_bt); const TypeVect * vt = TypeVect::make(type_bt, num_elem); - Node* res = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt)); + Node* res = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt)); - if(!step_val->is_con() || !is_power_of_2(step_val->get_con())) { + Node* start = argument(4); + Node* step = argument(5); + + if (step_multiply) { Node* bcast_step = gvn().transform(VectorNode::scalar2vector(step, num_elem, type_bt)); - res = gvn().transform(VectorNode::make(Op_MulI, res, bcast_step, num_elem, elem_bt)); + res = gvn().transform(VectorNode::make(Op_MulVB, res, bcast_step, vt)); } else if (step_val->get_con() > 1) { Node* cnt = gvn().makecon(TypeInt::make(log2i_exact(step_val->get_con()))); Node* shift_cnt = vector_shift_count(cnt, Op_LShiftI, elem_bt, num_elem); @@ -648,24 +661,25 @@ bool LibraryCallKit::inline_vector_shuffle_iota() { if (!start_val->is_con() || start_val->get_con() != 0) { Node* bcast_start = gvn().transform(VectorNode::scalar2vector(start, num_elem, type_bt)); - res = gvn().transform(VectorNode::make(Op_AddI, res, bcast_start, num_elem, elem_bt)); + res = gvn().transform(VectorNode::make(Op_AddVB, res, bcast_start, vt)); } Node * mod_val = gvn().makecon(TypeInt::make(num_elem-1)); Node * bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt)); - if(do_wrap) { + + if (do_wrap) { // Wrap the indices greater than lane count. - res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt)); + res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); } else { - ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ge)); + ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ugt)); Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem)); Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt)); const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem); - Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vmask_type)); + Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ugt, bcast_lane_cnt, res, pred_node, vmask_type)); - // Make the indices greater than lane count as -ve values. This matches the java side implementation. - res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt)); - Node * biased_val = gvn().transform(VectorNode::make(Op_SubI, res, bcast_lane_cnt, num_elem, elem_bt)); + // Make the indices greater than lane count as -ve values to match the java side implementation. + res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); + Node * biased_val = gvn().transform(VectorNode::make(Op_SubVB, res, bcast_lane_cnt, vt)); res = gvn().transform(new VectorBlendNode(biased_val, res, mask)); } diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorShuffleIota.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorShuffleIota.java index 79829775db4..d76ef889636 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/TestVectorShuffleIota.java +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorShuffleIota.java @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,7 +24,9 @@ package compiler.vectorapi; +import jdk.incubator.vector.ByteVector; import jdk.incubator.vector.IntVector; +import jdk.incubator.vector.ShortVector; import jdk.incubator.vector.VectorSpecies; import jdk.incubator.vector.VectorShuffle; @@ -31,16 +34,94 @@ import jdk.incubator.vector.VectorShuffle; * @test * @bug 8265907 * @modules jdk.incubator.vector - * @run main/othervm compiler.vectorapi.TestVectorShuffleIota + * @run main/othervm -XX:CompileThresholdScaling=0.3 -XX:-TieredCompilation compiler.vectorapi.TestVectorShuffleIota */ public class TestVectorShuffleIota { static final VectorSpecies SPECIESi = IntVector.SPECIES_128; + static final VectorSpecies SPECIESs = ShortVector.SPECIES_128; + static final VectorSpecies SPECIESb = ByteVector.SPECIES_128; - static final int INVOC_COUNT = 50000; + static final int INVOC_COUNT = 5000; static int[] ai = {87, 65, 78, 71}; + static long expected_value(VectorSpecies SPECIES, int start, int step, boolean wrap) { + long res = 0; + int lanesM1 = SPECIES.length() - 1; + if (wrap) { + res = (lanesM1 & (start + step * lanesM1)); + } else { + int effective_index = start + step * lanesM1; + int wrapped_effective_index = effective_index & lanesM1; + res = (effective_index == wrapped_effective_index ? + wrapped_effective_index : + -SPECIES.length() + wrapped_effective_index); + } + return res; + } + + static void validateTests(long actual, VectorSpecies SPECIES, int start, int step, boolean wrap) { + long expected = expected_value(SPECIES, start, step, wrap); + if (actual != expected) { + throw new AssertionError("Result Mismatch!, actual = " + actual + " expected = " + expected); + } + } + + static void testShuffleIotaB128(int start, int step, boolean wrap) { + long res = SPECIESb.iotaShuffle(start, step, wrap) + .laneSource(SPECIESb.length()-1); + validateTests(res, SPECIESb, start, step, wrap); + } + + static void testShuffleIotaS128(int start, int step, boolean wrap) { + long res = SPECIESs.iotaShuffle(start, step, wrap) + .laneSource(SPECIESs.length()-1); + validateTests(res, SPECIESs, start, step, wrap); + } + + static void testShuffleIotaI128(int start, int step, boolean wrap) { + long res = SPECIESi.iotaShuffle(start, step, wrap) + .laneSource(SPECIESi.length()-1); + validateTests(res, SPECIESi, start, step, wrap); + } + + static void testShuffleIotaConst0B128() { + long res = SPECIESb.iotaShuffle(-32, 1, false) + .laneSource(SPECIESb.length()-1); + validateTests(res, SPECIESb, -32, 1, false); + } + + static void testShuffleIotaConst0S128() { + long res = SPECIESs.iotaShuffle(-32, 1, false) + .laneSource(SPECIESs.length()-1); + validateTests(res, SPECIESs, -32, 1, false); + } + + static void testShuffleIotaConst0I128() { + long res = SPECIESi.iotaShuffle(-32, 1, false) + .laneSource(SPECIESi.length()-1); + validateTests(res, SPECIESi, -32, 1, false); + } + + static void testShuffleIotaConst1B128() { + long res = SPECIESb.iotaShuffle(-32, 1, true) + .laneSource(SPECIESb.length()-1); + validateTests(res, SPECIESb, -32, 1, true); + } + + static void testShuffleIotaConst1S128() { + long res = SPECIESs.iotaShuffle(-32, 1, true) + .laneSource(SPECIESs.length()-1); + validateTests(res, SPECIESs, -32, 1, true); + } + + static void testShuffleIotaConst1I128() { + long res = SPECIESi.iotaShuffle(-32, 1, true) + .laneSource(SPECIESi.length()-1); + validateTests(res, SPECIESi, -32, 1, true); + } + static void testShuffleI() { IntVector iv = (IntVector) VectorShuffle.iota(SPECIESi, 0, 2, false).toVector(); iv.intoArray(ai, 0); @@ -53,6 +134,34 @@ public class TestVectorShuffleIota { for (int i = 0; i < ai.length; i++) { System.out.print(ai[i] + ", "); } + for (int i = 0; i < INVOC_COUNT; i++) { + testShuffleIotaI128(128, 1, true); + testShuffleIotaI128(128, 1, false); + testShuffleIotaI128(-128, 1, true); + testShuffleIotaI128(-128, 1, false); + testShuffleIotaI128(1, 1, true); + testShuffleIotaI128(1, 1, false); + + testShuffleIotaS128(128, 1, true); + testShuffleIotaS128(128, 1, false); + testShuffleIotaS128(-128, 1, true); + testShuffleIotaS128(-128, 1, false); + testShuffleIotaS128(1, 1, true); + testShuffleIotaS128(1, 1, false); + + testShuffleIotaB128(128, 1, true); + testShuffleIotaB128(128, 1, false); + testShuffleIotaB128(-128, 1, true); + testShuffleIotaB128(-128, 1, false); + testShuffleIotaB128(1, 1, true); + testShuffleIotaB128(1, 1, false); + testShuffleIotaConst0B128(); + testShuffleIotaConst0S128(); + testShuffleIotaConst0I128(); + testShuffleIotaConst1B128(); + testShuffleIotaConst1S128(); + testShuffleIotaConst1I128(); + } System.out.println(); } }