diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index 0d3a240cecf..d217fb7e949 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -320,6 +320,10 @@ source %{ } } + bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { + return false; + } + // Assert that the given node is not a variable shift. bool assert_not_var_shift(const Node* n) { assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift"); @@ -6150,41 +6154,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{ ins_pipe(pipe_slow); %} -// ------------------------------ Vector shuffle ------------------------------- - -instruct loadshuffle(vReg dst, vReg src) %{ - match(Set dst (VectorLoadShuffle src)); - format %{ "loadshuffle $dst, $src" %} - ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - if (bt == T_BYTE) { - if ($dst$$FloatRegister != $src$$FloatRegister) { - if (VM_Version::use_neon_for_vector(length_in_bytes)) { - __ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $src$$FloatRegister, $src$$FloatRegister); - } else { - assert(UseSVE > 0, "must be sve"); - __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); - } - } - } else { - if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // 4S/8S, 4I, 4F - __ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B); - if (type2aelembytes(bt) == 4) { - __ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H); - } - } else { - assert(UseSVE > 0, "must be sve"); - __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt), - $src$$FloatRegister, __ B); - } - } - %} - ins_pipe(pipe_slow); -%} - // ------------------------------ Vector rearrange ----------------------------- // Here is an example that rearranges a NEON vector with 4 ints: @@ -6207,6 +6176,7 @@ instruct loadshuffle(vReg dst, vReg src) %{ // need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl // to implement rearrange. +// Maybe move the shuffle preparation to VectorLoadShuffle instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{ predicate(UseSVE == 0 && (Matcher::vector_element_basic_type(n) == T_SHORT || diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 99708e9ef31..422e98d9b68 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -310,6 +310,10 @@ source %{ } } + bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { + return false; + } + // Assert that the given node is not a variable shift. bool assert_not_var_shift(const Node* n) { assert(!n->as_ShiftV()->is_var_shift(), "illegal variable shift"); @@ -4397,41 +4401,6 @@ instruct vtest_alltrue_sve(rFlagsReg cr, pReg src1, pReg src2, pReg ptmp) %{ ins_pipe(pipe_slow); %} -// ------------------------------ Vector shuffle ------------------------------- - -instruct loadshuffle(vReg dst, vReg src) %{ - match(Set dst (VectorLoadShuffle src)); - format %{ "loadshuffle $dst, $src" %} - ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - uint length_in_bytes = Matcher::vector_length_in_bytes(this); - if (bt == T_BYTE) { - if ($dst$$FloatRegister != $src$$FloatRegister) { - if (VM_Version::use_neon_for_vector(length_in_bytes)) { - __ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $src$$FloatRegister, $src$$FloatRegister); - } else { - assert(UseSVE > 0, "must be sve"); - __ sve_orr($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); - } - } - } else { - if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // 4S/8S, 4I, 4F - __ uxtl($dst$$FloatRegister, __ T8H, $src$$FloatRegister, __ T8B); - if (type2aelembytes(bt) == 4) { - __ uxtl($dst$$FloatRegister, __ T4S, $dst$$FloatRegister, __ T4H); - } - } else { - assert(UseSVE > 0, "must be sve"); - __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt), - $src$$FloatRegister, __ B); - } - } - %} - ins_pipe(pipe_slow); -%} - // ------------------------------ Vector rearrange ----------------------------- // Here is an example that rearranges a NEON vector with 4 ints: @@ -4454,6 +4423,7 @@ instruct loadshuffle(vReg dst, vReg src) %{ // need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl // to implement rearrange. +// Maybe move the shuffle preparation to VectorLoadShuffle instruct rearrange_HS_neon(vReg dst, vReg src, vReg shuffle, vReg tmp1, vReg tmp2) %{ predicate(UseSVE == 0 && (Matcher::vector_element_basic_type(n) == T_SHORT || diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index bfca986f350..a3db5c0619c 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -999,6 +999,10 @@ bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { return false; } +bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { + return false; +} + const RegMask* Matcher::predicate_reg_mask(void) { return nullptr; } diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 6979052c307..b023ccae86d 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2153,6 +2153,10 @@ bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { return false; } +bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { + return false; +} + const RegMask* Matcher::predicate_reg_mask(void) { return nullptr; } diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad index 6894f3ce9fd..4b30a911152 100644 --- a/src/hotspot/cpu/riscv/riscv_v.ad +++ b/src/hotspot/cpu/riscv/riscv_v.ad @@ -116,6 +116,9 @@ source %{ return false; } + bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { + return false; + } %} definitions %{ @@ -4422,41 +4425,6 @@ instruct vmask_reinterpret_diff_esize(vRegMask dst, vRegMask_V0 src, vReg tmp) % ins_pipe(pipe_slow); %} -// ------------------------------ Vector shuffle ------------------------------- - -instruct loadshuffleB(vReg dst) %{ - predicate(Matcher::vector_element_basic_type(n) == T_BYTE); - match(Set dst (VectorLoadShuffle dst)); - format %{ "loadshuffleB $dst, $dst" %} - ins_encode %{ - // For T_BYTE, no need to do anything - %} - ins_pipe(pipe_class_empty); -%} - -instruct loadshuffleX(vReg dst, vReg src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE || - Matcher::vector_element_basic_type(n) == T_LONG || - Matcher::vector_element_basic_type(n) == T_FLOAT || - Matcher::vector_element_basic_type(n) == T_INT || - Matcher::vector_element_basic_type(n) == T_SHORT); - match(Set dst (VectorLoadShuffle src)); - effect(TEMP_DEF dst); - format %{ "loadshuffleX $dst, $src" %} - ins_encode %{ - BasicType bt = Matcher::vector_element_basic_type(this); - __ vsetvli_helper(bt, Matcher::vector_length(this)); - if (bt == T_SHORT) { - __ vzext_vf2(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); - } else if (bt == T_FLOAT || bt == T_INT) { - __ vzext_vf4(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); - } else { // bt == T_DOUBLE || bt == T_LONG - __ vzext_vf8(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); - } - %} - ins_pipe(pipe_slow); -%} - // ------------------------------ Vector rearrange ----------------------------- instruct rearrange(vReg dst, vReg src, vReg shuffle) %{ diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 0f1d98d54b9..6fb73a0e64c 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1755,6 +1755,10 @@ bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { return false; } +bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { + return false; +} + const RegMask* Matcher::predicate_reg_mask(void) { return nullptr; } diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 49efbc03a4d..95b761ad44e 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -2211,6 +2211,19 @@ bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { return false; } +// Return true if Vector::rearrange needs preparation of the shuffle argument +bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { + switch (elem_bt) { + case T_BYTE: return false; + case T_SHORT: return !VM_Version::supports_avx512bw(); + case T_INT: return !VM_Version::supports_avx(); + case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); + default: + ShouldNotReachHere(); + return false; + } +} + MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { assert(Matcher::is_generic_vector(generic_opnd), "not generic"); bool legacy = (generic_opnd->opcode() == LEGVEC); @@ -8860,17 +8873,6 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ //-------------------------------- Rearrange ---------------------------------- // LoadShuffle/Rearrange for Byte - -instruct loadShuffleB(vec dst) %{ - predicate(Matcher::vector_element_basic_type(n) == T_BYTE); - match(Set dst (VectorLoadShuffle dst)); - format %{ "vector_load_shuffle $dst, $dst" %} - ins_encode %{ - // empty - %} - ins_pipe( pipe_slow ); -%} - instruct rearrangeB(vec dst, vec shuffle) %{ predicate(Matcher::vector_element_basic_type(n) == T_BYTE && Matcher::vector_length(n) < 32); @@ -8937,7 +8939,7 @@ instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ predicate(Matcher::vector_element_basic_type(n) == T_SHORT && - Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS + !VM_Version::supports_avx512bw()); match(Set dst (VectorLoadShuffle src)); effect(TEMP dst, TEMP vtmp); format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} @@ -8948,7 +8950,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ if (UseAVX == 0) { assert(vlen_in_bytes <= 16, "required"); // Multiply each shuffle by two to get byte index - __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); + __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); __ psllw($vtmp$$XMMRegister, 1); // Duplicate to create 2 copies of byte index @@ -8963,8 +8965,7 @@ instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); int vlen_enc = vector_length_encoding(this); // Multiply each shuffle by two to get byte index - __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); - __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); + __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); // Duplicate to create 2 copies of byte index __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); @@ -9011,21 +9012,6 @@ instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVe ins_pipe( pipe_slow ); %} -instruct loadShuffleS_evex(vec dst, vec src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_SHORT && - VM_Version::supports_avx512bw()); - match(Set dst (VectorLoadShuffle src)); - format %{ "vector_load_shuffle $dst, $src" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - if (!VM_Version::supports_avx512vl()) { - vlen_enc = Assembler::AVX_512bit; - } - __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ predicate(Matcher::vector_element_basic_type(n) == T_SHORT && VM_Version::supports_avx512bw()); @@ -9056,7 +9042,7 @@ instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ // only byte shuffle instruction available on these platforms // Duplicate and multiply each shuffle by 4 - __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); + __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); __ psllw($vtmp$$XMMRegister, 2); @@ -9085,18 +9071,6 @@ instruct rearrangeI(vec dst, vec shuffle) %{ ins_pipe( pipe_slow ); %} -instruct loadShuffleI_avx(vec dst, vec src) %{ - predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && - UseAVX > 0); - match(Set dst (VectorLoadShuffle src)); - format %{ "vector_load_shuffle $dst, $src" %} - ins_encode %{ - int vlen_enc = vector_length_encoding(this); - __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && UseAVX > 0); @@ -9126,8 +9100,7 @@ instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ // only double word shuffle instruction available on these platforms // Multiply each shuffle by two to get double word index - __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); - __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); + __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); // Duplicate each double word shuffle __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); @@ -9153,20 +9126,6 @@ instruct rearrangeL(vec dst, vec src, vec shuffle) %{ ins_pipe( pipe_slow ); %} -instruct loadShuffleL_evex(vec dst, vec src) %{ - predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE - (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); - match(Set dst (VectorLoadShuffle src)); - format %{ "vector_load_shuffle $dst, $src" %} - ins_encode %{ - assert(UseAVX > 2, "required"); - - int vlen_enc = vector_length_encoding(this); - __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); - %} - ins_pipe( pipe_slow ); -%} - instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index 68121c56c32..8ff494d687e 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -1002,33 +1002,6 @@ class methodHandle; "Ljdk/internal/vm/vector/VectorSupport$VectorPayload;") \ do_name(vector_frombits_coerced_name, "fromBitsCoerced") \ \ - do_intrinsic(_VectorShuffleIota, jdk_internal_vm_vector_VectorSupport, vector_shuffle_step_iota_name, vector_shuffle_step_iota_sig, F_S) \ - do_signature(vector_shuffle_step_iota_sig, "(Ljava/lang/Class;" \ - "Ljava/lang/Class;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorSpecies;" \ - "IIII" \ - "Ljdk/internal/vm/vector/VectorSupport$ShuffleIotaOperation;)" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \ - do_name(vector_shuffle_step_iota_name, "shuffleIota") \ - \ - do_intrinsic(_VectorShuffleToVector, jdk_internal_vm_vector_VectorSupport, vector_shuffle_to_vector_name, vector_shuffle_to_vector_sig, F_S) \ - do_signature(vector_shuffle_to_vector_sig, "(Ljava/lang/Class;" \ - "Ljava/lang/Class;" \ - "Ljava/lang/Class;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \ - "ILjdk/internal/vm/vector/VectorSupport$ShuffleToVectorOperation;)" \ - "Ljdk/internal/vm/vector/VectorSupport$Vector;") \ - do_name(vector_shuffle_to_vector_name, "shuffleToVector") \ - \ - do_intrinsic(_VectorWrapShuffleIndexes, jdk_internal_vm_vector_VectorSupport, vector_wrap_shuffle_indexes_name, \ - vector_wrap_shuffle_indexes_sig, F_S) \ - do_signature(vector_wrap_shuffle_indexes_sig, "(Ljava/lang/Class;" \ - "Ljava/lang/Class;" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \ - "ILjdk/internal/vm/vector/VectorSupport$WrapShuffleIndexesOperation;)" \ - "Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \ - do_name(vector_wrap_shuffle_indexes_name, "wrapShuffleIndexes") \ - \ do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \ do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \ "Ljava/lang/Class;" \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index 138fd38bfa4..c6744d0c7bd 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -811,9 +811,6 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_VectorBinaryOp: case vmIntrinsics::_VectorTernaryOp: case vmIntrinsics::_VectorFromBitsCoerced: - case vmIntrinsics::_VectorShuffleIota: - case vmIntrinsics::_VectorShuffleToVector: - case vmIntrinsics::_VectorWrapShuffleIndexes: case vmIntrinsics::_VectorLoadOp: case vmIntrinsics::_VectorLoadMaskedOp: case vmIntrinsics::_VectorStoreOp: diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp index 223f2ce83f5..b0150df04ed 100644 --- a/src/hotspot/share/opto/graphKit.hpp +++ b/src/hotspot/share/opto/graphKit.hpp @@ -870,7 +870,7 @@ class GraphKit : public Phase { // Vector API support (implemented in vectorIntrinsics.cpp) Node* box_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool deoptimize_on_exception = false); - Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool shuffle_to_vector = false); + Node* unbox_vector(Node* in, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem); Node* vector_shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem); }; diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 1072a5d6a24..096f0a20bd1 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -712,14 +712,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { return inline_vector_nary_operation(3); case vmIntrinsics::_VectorFromBitsCoerced: return inline_vector_frombits_coerced(); - case vmIntrinsics::_VectorShuffleIota: - return inline_vector_shuffle_iota(); case vmIntrinsics::_VectorMaskOp: return inline_vector_mask_operation(); - case vmIntrinsics::_VectorShuffleToVector: - return inline_vector_shuffle_to_vector(); - case vmIntrinsics::_VectorWrapShuffleIndexes: - return inline_vector_wrap_shuffle_indexes(); case vmIntrinsics::_VectorLoadOp: return inline_vector_mem_operation(/*is_store=*/false); case vmIntrinsics::_VectorLoadMaskedOp: diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index c5437e3bf73..f629f757df2 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -353,10 +353,6 @@ class LibraryCallKit : public GraphKit { // Vector API support bool inline_vector_nary_operation(int n); bool inline_vector_frombits_coerced(); - bool inline_vector_shuffle_to_vector(); - bool inline_vector_wrap_shuffle_indexes(); - bool inline_vector_shuffle_iota(); - Node* partially_wrap_indexes(Node* index_vec, int num_elem, BasicType type_bt); bool inline_vector_mask_operation(); bool inline_vector_mem_operation(bool is_store); bool inline_vector_mem_masked_operation(bool is_store); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp index 0faba23d983..baf43b0d538 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -341,6 +341,8 @@ public: static bool vector_needs_partial_operations(Node* node, const TypeVect* vt); + static bool vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen); + static const RegMask* predicate_reg_mask(void); // Vector width in bytes diff --git a/src/hotspot/share/opto/vector.cpp b/src/hotspot/share/opto/vector.cpp index 99b4c62fec7..73f9b9e74ba 100644 --- a/src/hotspot/share/opto/vector.cpp +++ b/src/hotspot/share/opto/vector.cpp @@ -36,11 +36,6 @@ static bool is_vector_mask(ciKlass* klass) { return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass()); } -static bool is_vector_shuffle(ciKlass* klass) { - return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass()); -} - - void PhaseVector::optimize_vector_boxes() { Compile::TracePhase tp(_t_vector_elimination); @@ -460,8 +455,6 @@ void PhaseVector::expand_vunbox_node(VectorUnboxNode* vec_unbox) { if (is_vector_mask(from_kls)) { bt = T_BOOLEAN; - } else if (is_vector_shuffle(from_kls)) { - bt = T_BYTE; } ciField* field = ciEnv::current()->vector_VectorPayload_klass()->get_field_by_name(ciSymbols::payload_name(), @@ -506,9 +499,6 @@ void PhaseVector::expand_vunbox_node(VectorUnboxNode* vec_unbox) { if (is_vector_mask(from_kls)) { vec_val_load = gvn.transform(new VectorLoadMaskNode(vec_val_load, TypeVect::makemask(masktype, num_elem))); - } else if (is_vector_shuffle(from_kls) && !vec_unbox->is_shuffle_to_vector()) { - assert(vec_unbox->bottom_type()->is_vect()->element_basic_type() == masktype, "expect shuffle type consistency"); - vec_val_load = gvn.transform(new VectorLoadShuffleNode(vec_val_load, TypeVect::make(masktype, num_elem))); } gvn.hash_delete(vec_unbox); diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 3d20b22a175..ce37e5604fa 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -74,10 +74,6 @@ static bool is_vector_mask(ciKlass* klass) { return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass()); } -static bool is_vector_shuffle(ciKlass* klass) { - return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass()); -} - bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, VectorMaskUseType mask_use_type, bool has_scalar_args) { bool is_supported = true; @@ -162,7 +158,7 @@ Node* GraphKit::box_vector(Node* vector, const TypeInstPtr* vbox_type, BasicType return gvn().transform(vbox); } -Node* GraphKit::unbox_vector(Node* v, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool shuffle_to_vector) { +Node* GraphKit::unbox_vector(Node* v, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem) { assert(EnableVectorSupport, ""); const TypeInstPtr* vbox_type_v = gvn().type(v)->isa_instptr(); if (vbox_type_v == nullptr || vbox_type->instance_klass() != vbox_type_v->instance_klass()) { @@ -173,7 +169,7 @@ Node* GraphKit::unbox_vector(Node* v, const TypeInstPtr* vbox_type, BasicType el } assert(check_vbox(vbox_type), ""); const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_type->instance_klass())); - Node* unbox = gvn().transform(new VectorUnboxNode(C, vt, v, merged_memory(), shuffle_to_vector)); + Node* unbox = gvn().transform(new VectorUnboxNode(C, vt, v, merged_memory())); return unbox; } @@ -514,135 +510,7 @@ bool LibraryCallKit::inline_vector_nary_operation(int n) { return true; } -// Following routine generates IR corresponding to AbstractShuffle::partiallyWrapIndex method, -// which partially wraps index by modulo VEC_LENGTH and generates a negative index value if original -// index is out of valid index range [0, VEC_LENGTH) -// -// wrapped_index = (VEC_LENGTH - 1) & index -// if (index u> VEC_LENGTH) { -// wrapped_index -= VEC_LENGTH; -// -// Note: Unsigned greater than comparison treat both <0 and >VEC_LENGTH indices as out-of-bound -// indexes. -Node* LibraryCallKit::partially_wrap_indexes(Node* index_vec, int num_elem, BasicType elem_bt) { - assert(elem_bt == T_BYTE, "Shuffles use byte array based backing storage."); - const TypeVect* vt = TypeVect::make(elem_bt, num_elem); - - Node* mod_mask = gvn().makecon(TypeInt::make(num_elem-1)); - Node* bcast_mod_mask = gvn().transform(VectorNode::scalar2vector(mod_mask, num_elem, elem_bt)); - - BoolTest::mask pred = BoolTest::ugt; - ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(pred)); - Node* lane_cnt = gvn().makecon(TypeInt::make(num_elem)); - Node* bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, elem_bt)); - const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem); - Node* mask = gvn().transform(new VectorMaskCmpNode(pred, bcast_lane_cnt, index_vec, pred_node, vmask_type)); - - // Make the indices greater than lane count as -ve values to match the java side implementation. - index_vec = gvn().transform(VectorNode::make(Op_AndV, index_vec, bcast_mod_mask, vt)); - Node* biased_val = gvn().transform(VectorNode::make(Op_SubVB, index_vec, bcast_lane_cnt, vt)); - return gvn().transform(new VectorBlendNode(biased_val, index_vec, mask)); -} - -// , E> -// Sh ShuffleIota(Class E, Class shuffleClass, Vector.Species s, int length, -// int start, int step, int wrap, ShuffleIotaOperation defaultImpl) -bool LibraryCallKit::inline_vector_shuffle_iota() { - const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInt* vlen = gvn().type(argument(3))->isa_int(); - const TypeInt* start_val = gvn().type(argument(4))->isa_int(); - const TypeInt* step_val = gvn().type(argument(5))->isa_int(); - const TypeInt* wrap = gvn().type(argument(6))->isa_int(); - - if (shuffle_klass == nullptr || shuffle_klass->const_oop() == nullptr || - vlen == nullptr || !vlen->is_con() || start_val == nullptr || step_val == nullptr || - wrap == nullptr || !wrap->is_con()) { - return false; // not enough info for intrinsification - } - - if (!is_klass_initialized(shuffle_klass)) { - log_if_needed(" ** klass argument not initialized"); - return false; - } - - int do_wrap = wrap->get_con(); - int num_elem = vlen->get_con(); - BasicType elem_bt = T_BYTE; - - bool effective_indices_in_range = false; - if (start_val->is_con() && step_val->is_con()) { - int effective_min_index = start_val->get_con(); - int effective_max_index = start_val->get_con() + step_val->get_con() * (num_elem - 1); - effective_indices_in_range = effective_max_index >= effective_min_index && effective_min_index >= -128 && effective_max_index <= 127; - } - - if (!do_wrap && !effective_indices_in_range) { - // Disable instrinsification for unwrapped shuffle iota if start/step - // values are non-constant OR if intermediate result overflows byte value range. - return false; - } - - if (!arch_supports_vector(Op_AddVB, num_elem, elem_bt, VecMaskNotUsed) || - !arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed) || - !arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed) || - !arch_supports_vector(Op_Replicate, num_elem, elem_bt, VecMaskNotUsed)) { - return false; - } - - if (!do_wrap && - (!arch_supports_vector(Op_SubVB, num_elem, elem_bt, VecMaskNotUsed) || - !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskNotUsed) || - !arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskNotUsed))) { - return false; - } - - bool step_multiply = !step_val->is_con() || !is_power_of_2(step_val->get_con()); - if ((step_multiply && !arch_supports_vector(Op_MulVB, num_elem, elem_bt, VecMaskNotUsed)) || - (!step_multiply && !arch_supports_vector(Op_LShiftVB, num_elem, elem_bt, VecMaskNotUsed))) { - return false; - } - - const TypeVect* vt = TypeVect::make(elem_bt, num_elem); - - Node* res = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt)); - - Node* start = argument(4); - Node* step = argument(5); - - if (step_multiply) { - Node* bcast_step = gvn().transform(VectorNode::scalar2vector(step, num_elem, elem_bt)); - res = gvn().transform(VectorNode::make(Op_MulVB, res, bcast_step, vt)); - } else if (step_val->get_con() > 1) { - Node* cnt = gvn().makecon(TypeInt::make(log2i_exact(step_val->get_con()))); - Node* shift_cnt = vector_shift_count(cnt, Op_LShiftI, elem_bt, num_elem); - res = gvn().transform(VectorNode::make(Op_LShiftVB, res, shift_cnt, vt)); - } - - if (!start_val->is_con() || start_val->get_con() != 0) { - Node* bcast_start = gvn().transform(VectorNode::scalar2vector(start, num_elem, elem_bt)); - res = gvn().transform(VectorNode::make(Op_AddVB, res, bcast_start, vt)); - } - - Node* mod_val = gvn().makecon(TypeInt::make(num_elem-1)); - Node* bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, elem_bt)); - - if (do_wrap) { - // Wrap the indices greater than lane count. - res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt)); - } else { - res = partially_wrap_indexes(res, num_elem, elem_bt); - } - - ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); - const TypeInstPtr* shuffle_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, sbox_klass); - - // Wrap it up in VectorBox to keep object type information. - res = box_vector(res, shuffle_box_type, elem_bt, num_elem); - set_result(res); - C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); - return true; -} - +// public static // // long maskReductionCoerced(int oper, Class maskClass, Class elemClass, // int length, M m, VectorMaskOp defaultImpl) @@ -673,10 +541,9 @@ bool LibraryCallKit::inline_vector_mask_operation() { return false; // not supported } - const Type* elem_ty = Type::get_const_basic_type(elem_bt); ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* mask_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); - Node* mask_vec = unbox_vector(mask, mask_box_type, elem_bt, num_elem, true); + Node* mask_vec = unbox_vector(mask, mask_box_type, elem_bt, num_elem); if (mask_vec == nullptr) { log_if_needed(" ** unbox failed mask=%s", NodeClassNames[argument(4)->Opcode()]); @@ -697,124 +564,6 @@ bool LibraryCallKit::inline_vector_mask_operation() { return true; } -// public static -// , -// E> -// V shuffleToVector(Class> vclass, Class elementType, -// Class shuffleClass, Sh s, int length, -// ShuffleToVectorOperation defaultImpl) -bool LibraryCallKit::inline_vector_shuffle_to_vector() { - const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr(); - const TypeInstPtr* elem_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInstPtr* shuffle_klass = gvn().type(argument(2))->isa_instptr(); - Node* shuffle = argument(3); - const TypeInt* vlen = gvn().type(argument(4))->isa_int(); - - if (vector_klass == nullptr || elem_klass == nullptr || shuffle_klass == nullptr || shuffle->is_top() || vlen == nullptr) { - return false; // dead code - } - if (!vlen->is_con() || vector_klass->const_oop() == nullptr || shuffle_klass->const_oop() == nullptr) { - return false; // not enough info for intrinsification - } - if (!is_klass_initialized(shuffle_klass) || !is_klass_initialized(vector_klass) ) { - log_if_needed(" ** klass argument not initialized"); - return false; - } - - int num_elem = vlen->get_con(); - ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type(); - BasicType elem_bt = elem_type->basic_type(); - - if (num_elem < 4) { - return false; - } - - int cast_vopc = VectorCastNode::opcode(-1, T_BYTE); // from shuffle of type T_BYTE - // Make sure that cast is implemented to particular type/size combination. - if (!arch_supports_vector(cast_vopc, num_elem, elem_bt, VecMaskNotUsed)) { - log_if_needed(" ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s", - cast_vopc, num_elem, type2name(elem_bt)); - return false; - } - - ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); - const TypeInstPtr* shuffle_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, sbox_klass); - - // Unbox shuffle with true flag to indicate its load shuffle to vector - // shuffle is a byte array - Node* shuffle_vec = unbox_vector(shuffle, shuffle_box_type, T_BYTE, num_elem, true); - - // cast byte to target element type - shuffle_vec = gvn().transform(VectorCastNode::make(cast_vopc, shuffle_vec, elem_bt, num_elem)); - - ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); - const TypeInstPtr* vec_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); - - // Box vector - Node* res = box_vector(shuffle_vec, vec_box_type, elem_bt, num_elem); - set_result(res); - C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt)))); - return true; -} - -// public static -// > -// SH wrapShuffleIndexes(Class eClass, Class shClass, SH sh, int length, -// ShuffleWrapIndexesOperation defaultImpl) -bool LibraryCallKit::inline_vector_wrap_shuffle_indexes() { - const TypeInstPtr* elem_klass = gvn().type(argument(0))->isa_instptr(); - const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr(); - Node* shuffle = argument(2); - const TypeInt* vlen = gvn().type(argument(3))->isa_int(); - - if (elem_klass == nullptr || shuffle_klass == nullptr || shuffle->is_top() || vlen == nullptr || - !vlen->is_con() || shuffle_klass->const_oop() == nullptr) { - // not enough info for intrinsification - return false; - } - - if (!is_klass_initialized(shuffle_klass)) { - log_if_needed(" ** klass argument not initialized"); - return false; - } - - int num_elem = vlen->get_con(); - if ((num_elem < 4) || !is_power_of_2(num_elem)) { - log_if_needed(" ** vlen < 4 or not power of two=%d", num_elem); - return false; - } - - // Shuffles use byte array based backing storage - BasicType shuffle_bt = T_BYTE; - if (!arch_supports_vector(Op_AndV, num_elem, shuffle_bt, VecMaskNotUsed) || - !arch_supports_vector(Op_Replicate, num_elem, shuffle_bt, VecMaskNotUsed)) { - log_if_needed(" ** not supported: op=wrapShuffleIndexes vlen=%d etype=%s", - num_elem, type2name(shuffle_bt)); - return false; - } - - ciKlass* sbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass(); - const TypeInstPtr* shuffle_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, sbox_klass); - - // Unbox shuffle with true flag to indicate its load shuffle to vector - // shuffle is a byte array - Node* shuffle_vec = unbox_vector(shuffle, shuffle_box_type, shuffle_bt, num_elem, true); - - const TypeVect* vt = TypeVect::make(shuffle_bt, num_elem); - Node* mod_mask = gvn().makecon(TypeInt::make(num_elem - 1)); - Node* bcast_mod_mask = gvn().transform(VectorNode::scalar2vector(mod_mask, num_elem, shuffle_bt)); - // Wrap the indices greater than lane count. - Node* res = gvn().transform(VectorNode::make(Op_AndV, shuffle_vec, bcast_mod_mask, vt)); - - // Wrap it up in VectorBox to keep object type information. - res = box_vector(res, shuffle_box_type, shuffle_bt, num_elem); - set_result(res); - C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(shuffle_bt)))); - return true; -} - // public static // , @@ -1995,13 +1744,23 @@ bool LibraryCallKit::inline_vector_rearrange() { log_if_needed(" ** not a primitive bt=%d", elem_type->basic_type()); return false; // should be primitive type } + BasicType elem_bt = elem_type->basic_type(); BasicType shuffle_bt = elem_bt; - int num_elem = vlen->get_con(); + if (shuffle_bt == T_FLOAT) { + shuffle_bt = T_INT; + } else if (shuffle_bt == T_DOUBLE) { + shuffle_bt = T_LONG; + } - if (!arch_supports_vector(Op_VectorLoadShuffle, num_elem, elem_bt, VecMaskNotUsed)) { - log_if_needed(" ** not supported: arity=0 op=load/shuffle vlen=%d etype=%s ismask=no", - num_elem, type2name(elem_bt)); + int num_elem = vlen->get_con(); + bool need_load_shuffle = Matcher::vector_rearrange_requires_load_shuffle(shuffle_bt, num_elem); + + if (need_load_shuffle && !arch_supports_vector(Op_VectorLoadShuffle, num_elem, shuffle_bt, VecMaskNotUsed)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=0 op=load/shuffle vlen=%d etype=%s ismask=no", + num_elem, type2name(shuffle_bt)); + } return false; // not supported } @@ -2013,6 +1772,11 @@ bool LibraryCallKit::inline_vector_rearrange() { !is_klass_initialized(mask_klass))) { log_if_needed(" ** mask_klass argument not initialized"); } + if (!arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed)) { + log_if_needed(" ** not supported: arity=2 op=and vlen=%d etype=%s ismask=no", + num_elem, type2name(elem_bt)); + return false; + } VectorMaskUseType checkFlags = (VectorMaskUseType)(is_masked_op ? (VecMaskUseLoad | VecMaskUsePred) : VecMaskNotUsed); if (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, checkFlags)) { use_predicate = false; @@ -2033,11 +1797,17 @@ bool LibraryCallKit::inline_vector_rearrange() { Node* v1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); Node* shuffle = unbox_vector(argument(6), shbox_type, shuffle_bt, num_elem); + const TypeVect* st = TypeVect::make(shuffle_bt, num_elem); if (v1 == nullptr || shuffle == nullptr) { return false; // operand unboxing failed } + assert(is_power_of_2(num_elem), "wrapping invalid"); + Node* wrapping_mask_elem = gvn().makecon(TypeInteger::make(num_elem - 1, num_elem - 1, Type::WidenMin, shuffle_bt == T_LONG ? T_LONG : T_INT)); + Node* wrapping_mask = gvn().transform(VectorNode::scalar2vector(wrapping_mask_elem, num_elem, shuffle_bt)); + shuffle = gvn().transform(new AndVNode(shuffle, wrapping_mask, st)); + Node* mask = nullptr; if (is_masked_op) { ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); @@ -2050,13 +1820,16 @@ bool LibraryCallKit::inline_vector_rearrange() { } } + if (need_load_shuffle) { + shuffle = gvn().transform(new VectorLoadShuffleNode(shuffle, st)); + } + Node* rearrange = new VectorRearrangeNode(v1, shuffle); if (is_masked_op) { if (use_predicate) { rearrange->add_req(mask); rearrange->add_flag(Node::Flag_is_predicated_vector); } else { - const TypeVect* vt = v1->bottom_type()->is_vect(); rearrange = gvn().transform(rearrange); Node* zero = gvn().makecon(Type::get_zero_type(elem_bt)); Node* zerovec = gvn().transform(VectorNode::scalar2vector(zero, num_elem, elem_bt)); @@ -2154,11 +1927,19 @@ bool LibraryCallKit::inline_vector_select_from() { return false; } + BasicType shuffle_bt = elem_bt; + if (shuffle_bt == T_FLOAT) { + shuffle_bt = T_INT; + } else if (shuffle_bt == T_DOUBLE) { + shuffle_bt = T_LONG; + } + bool need_load_shuffle = Matcher::vector_rearrange_requires_load_shuffle(shuffle_bt, num_elem); + int cast_vopc = VectorCastNode::opcode(-1, elem_bt); // from vector of type elem_bt - if (!arch_supports_vector(Op_VectorLoadShuffle, num_elem, elem_bt, VecMaskNotUsed)|| - !arch_supports_vector(Op_AndV, num_elem, T_BYTE, VecMaskNotUsed) || - !arch_supports_vector(Op_Replicate, num_elem, T_BYTE, VecMaskNotUsed) || - !arch_supports_vector(cast_vopc, num_elem, T_BYTE, VecMaskNotUsed)) { + if ((need_load_shuffle && !arch_supports_vector(Op_VectorLoadShuffle, num_elem, elem_bt, VecMaskNotUsed)) || + (elem_bt != shuffle_bt && !arch_supports_vector(cast_vopc, num_elem, shuffle_bt, VecMaskNotUsed)) || + !arch_supports_vector(Op_AndV, num_elem, shuffle_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_Replicate, num_elem, shuffle_bt, VecMaskNotUsed)) { log_if_needed(" ** not supported: arity=0 op=selectFrom vlen=%d etype=%s ismask=no", num_elem, type2name(elem_bt)); return false; // not supported @@ -2215,21 +1996,26 @@ bool LibraryCallKit::inline_vector_select_from() { } // cast index vector from elem_bt vector to byte vector - const TypeVect* byte_vt = TypeVect::make(T_BYTE, num_elem); - Node* byte_shuffle = gvn().transform(VectorCastNode::make(cast_vopc, v1, T_BYTE, num_elem)); + const TypeVect* shuffle_vt = TypeVect::make(shuffle_bt, num_elem); + Node* shuffle = v1; + + if (shuffle_bt != elem_bt) { + shuffle = gvn().transform(VectorCastNode::make(cast_vopc, v1, shuffle_bt, num_elem)); + } // wrap the byte vector lanes to (num_elem - 1) to form the shuffle vector where num_elem is vector length // this is a simple AND operation as we come here only for power of two vector length - Node* mod_val = gvn().makecon(TypeInt::make(num_elem-1)); - Node* bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, T_BYTE)); - byte_shuffle = gvn().transform(VectorNode::make(Op_AndV, byte_shuffle, bcast_mod, byte_vt)); + Node* mod_val = gvn().makecon(TypeInteger::make(num_elem - 1, num_elem - 1, Type::WidenMin, shuffle_bt == T_LONG ? T_LONG : T_INT)); + Node* bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, shuffle_bt)); + shuffle = gvn().transform(VectorNode::make(Op_AndV, shuffle, bcast_mod, shuffle_vt)); // load the shuffle to use in rearrange - const TypeVect* shuffle_vt = TypeVect::make(elem_bt, num_elem); - Node* load_shuffle = gvn().transform(new VectorLoadShuffleNode(byte_shuffle, shuffle_vt)); + if (need_load_shuffle) { + shuffle = gvn().transform(new VectorLoadShuffleNode(shuffle, shuffle_vt)); + } // and finally rearrange - Node* rearrange = new VectorRearrangeNode(v2, load_shuffle); + Node* rearrange = new VectorRearrangeNode(v2, shuffle); if (is_masked_op) { if (use_predicate) { // masked rearrange is supported so use that directly @@ -2482,9 +2268,7 @@ bool LibraryCallKit::inline_vector_convert() { ciKlass* vbox_klass_from = vector_klass_from->const_oop()->as_instance()->java_lang_Class_klass(); ciKlass* vbox_klass_to = vector_klass_to->const_oop()->as_instance()->java_lang_Class_klass(); - if (is_vector_shuffle(vbox_klass_from)) { - return false; // vector shuffles aren't supported - } + bool is_mask = is_vector_mask(vbox_klass_from); ciType* elem_type_from = elem_klass_from->const_oop()->as_instance()->java_mirror_type(); @@ -2524,18 +2308,6 @@ bool LibraryCallKit::inline_vector_convert() { return false; } - - if (is_vector_shuffle(vbox_klass_to) && - (!arch_supports_vector(Op_SubVB, num_elem_to, elem_bt_to, VecMaskNotUsed) || - !arch_supports_vector(Op_VectorBlend, num_elem_to, elem_bt_to, VecMaskNotUsed) || - !arch_supports_vector(Op_VectorMaskCmp, num_elem_to, elem_bt_to, VecMaskNotUsed) || - !arch_supports_vector(Op_AndV, num_elem_to, elem_bt_to, VecMaskNotUsed) || - !arch_supports_vector(Op_Replicate, num_elem_to, elem_bt_to, VecMaskNotUsed))) { - log_if_needed(" ** not supported: arity=1 op=shuffle_index_wrap vlen2=%d etype2=%s", - num_elem_to, type2name(elem_bt_to)); - return false; - } - // At this point, we know that both input and output vector registers are supported // by the architecture. Next check if the casted type is simply to same type - which means // that it is actually a resize and not a cast. @@ -2633,10 +2405,6 @@ bool LibraryCallKit::inline_vector_convert() { op = gvn().transform(new VectorReinterpretNode(op, src_type, dst_type)); } - if (is_vector_shuffle(vbox_klass_to)) { - op = partially_wrap_indexes(op, num_elem_to, elem_bt_to); - } - const TypeInstPtr* vbox_type_to = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass_to); Node* vbox = box_vector(op, vbox_type_to, elem_bt_to, num_elem_to); set_result(vbox); @@ -2864,45 +2632,49 @@ static Node* LowerSelectFromTwoVectorOperation(PhaseGVN& phase, Node* index_vec, // boxing penalties. // - const TypeVect* index_vect_type = index_vec->bottom_type()->is_vect(); - BasicType index_elem_bt = index_vect_type->element_basic_type(); + BasicType shuffle_bt = elem_bt; + if (shuffle_bt == T_FLOAT) { + shuffle_bt = T_INT; + } else if (shuffle_bt == T_DOUBLE) { + shuffle_bt = T_LONG; + } + const TypeVect* st = TypeVect::make(shuffle_bt, num_elem); - // Downcast index vector to a type agnostic shuffle representation, shuffle - // indices are held in a byte vector which are later transformed to target - // specific permutation index format by subsequent VectorLoadShuffle. - int cast_vopc = VectorCastNode::opcode(0, index_elem_bt, true); - Node* index_byte_vec = phase.transform(VectorCastNode::make(cast_vopc, index_vec, T_BYTE, num_elem)); + // Cast index vector to the corresponding bit type + if (elem_bt != shuffle_bt) { + int cast_vopc = VectorCastNode::opcode(0, elem_bt, true); + index_vec = phase.transform(VectorCastNode::make(cast_vopc, index_vec, shuffle_bt, num_elem)); + } // Wrap indexes into two vector index range [0, VLEN * 2) - Node* two_vect_lane_cnt_m1 = phase.makecon(TypeInt::make(2 * num_elem - 1)); + Node* two_vect_lane_cnt_m1 = phase.makecon(TypeInteger::make(2 * num_elem - 1, 2 * num_elem - 1, Type::WidenMin, shuffle_bt == T_LONG ? T_LONG : T_INT)); Node* bcast_two_vect_lane_cnt_m1_vec = phase.transform(VectorNode::scalar2vector(two_vect_lane_cnt_m1, num_elem, - T_BYTE, false)); - index_byte_vec = phase.transform(VectorNode::make(Op_AndV, index_byte_vec, bcast_two_vect_lane_cnt_m1_vec, - index_byte_vec->bottom_type()->is_vect())); + shuffle_bt, false)); + index_vec = phase.transform(VectorNode::make(Op_AndV, index_vec, bcast_two_vect_lane_cnt_m1_vec, st)); // Compute the blend mask for merging two independently permitted vectors // using shuffle index in two vector index range [0, VLEN * 2). BoolTest::mask pred = BoolTest::le; ConINode* pred_node = phase.makecon(TypeInt::make(pred))->as_ConI(); - const TypeVect* vmask_type = TypeVect::makemask(T_BYTE, num_elem); - Node* lane_cnt_m1 = phase.makecon(TypeInt::make(num_elem - 1)); - Node* bcast_lane_cnt_m1_vec = phase.transform(VectorNode::scalar2vector(lane_cnt_m1, num_elem, - T_BYTE, false)); - Node* mask = phase.transform(new VectorMaskCmpNode(pred, index_byte_vec, bcast_lane_cnt_m1_vec, pred_node, vmask_type)); + const TypeVect* vmask_type = TypeVect::makemask(shuffle_bt, num_elem); + Node* lane_cnt_m1 = phase.makecon(TypeInteger::make(num_elem - 1, num_elem - 1, Type::WidenMin, shuffle_bt == T_LONG ? T_LONG : T_INT)); + Node* bcast_lane_cnt_m1_vec = phase.transform(VectorNode::scalar2vector(lane_cnt_m1, num_elem, shuffle_bt, false)); + Node* mask = phase.transform(new VectorMaskCmpNode(pred, index_vec, bcast_lane_cnt_m1_vec, pred_node, vmask_type)); // Rearrange expects the indexes to lie within single vector index range [0, VLEN). - index_byte_vec = phase.transform(VectorNode::make(Op_AndV, index_byte_vec, bcast_lane_cnt_m1_vec, - index_byte_vec->bottom_type()->is_vect())); + Node* wrapped_index_vec = phase.transform(VectorNode::make(Op_AndV, index_vec, bcast_lane_cnt_m1_vec, st)); // Load indexes from byte vector and appropriately transform them to target // specific permutation index format. - index_vec = phase.transform(new VectorLoadShuffleNode(index_byte_vec, index_vect_type)); + if (Matcher::vector_rearrange_requires_load_shuffle(shuffle_bt, num_elem)) { + wrapped_index_vec = phase.transform(new VectorLoadShuffleNode(wrapped_index_vec, st)); + } vmask_type = TypeVect::makemask(elem_bt, num_elem); mask = phase.transform(new VectorMaskCastNode(mask, vmask_type)); - Node* p1 = phase.transform(new VectorRearrangeNode(src1, index_vec)); - Node* p2 = phase.transform(new VectorRearrangeNode(src2, index_vec)); + Node* p1 = phase.transform(new VectorRearrangeNode(src1, wrapped_index_vec)); + Node* p2 = phase.transform(new VectorRearrangeNode(src2, wrapped_index_vec)); return new VectorBlendNode(p2, p1, mask); } @@ -2955,14 +2727,14 @@ bool LibraryCallKit::inline_vector_select_from_two_vectors() { bool lowerSelectFromOp = false; if (!arch_supports_vector(Op_SelectFromTwoVector, num_elem, elem_bt, VecMaskNotUsed)) { int cast_vopc = VectorCastNode::opcode(-1, elem_bt, true); - if (!arch_supports_vector(Op_VectorMaskCmp, num_elem, T_BYTE, VecMaskNotUsed) || - !arch_supports_vector(Op_AndV, num_elem, T_BYTE, VecMaskNotUsed) || + if ((elem_bt != index_elem_bt && !arch_supports_vector(cast_vopc, num_elem, index_elem_bt, VecMaskNotUsed)) || + !arch_supports_vector(Op_VectorMaskCmp, num_elem, index_elem_bt, VecMaskNotUsed) || + !arch_supports_vector(Op_AndV, num_elem, index_elem_bt, VecMaskNotUsed) || !arch_supports_vector(Op_VectorMaskCast, num_elem, elem_bt, VecMaskNotUsed) || !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad) || !arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed) || - !arch_supports_vector(cast_vopc, num_elem, T_BYTE, VecMaskNotUsed) || !arch_supports_vector(Op_VectorLoadShuffle, num_elem, index_elem_bt, VecMaskNotUsed) || - !arch_supports_vector(Op_Replicate, num_elem, T_BYTE, VecMaskNotUsed)) { + !arch_supports_vector(Op_Replicate, num_elem, index_elem_bt, VecMaskNotUsed)) { log_if_needed(" ** not supported: opc=%d vlen=%d etype=%s ismask=useload", Op_SelectFromTwoVector, num_elem, type2name(elem_bt)); return false; // not supported diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 03e1e7ef247..33429dfacad 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -1731,19 +1731,13 @@ Node* VectorUnboxNode::Ideal(PhaseGVN* phase, bool can_reshape) { if (in_vt->length() == out_vt->length()) { Node* value = vbox->in(VectorBoxNode::Value); - bool is_vector_mask = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass()); - bool is_vector_shuffle = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass()); + bool is_vector_mask = vbox_klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass()); if (is_vector_mask) { // VectorUnbox (VectorBox vmask) ==> VectorMaskCast vmask const TypeVect* vmask_type = TypeVect::makemask(out_vt->element_basic_type(), out_vt->length()); return new VectorMaskCastNode(value, vmask_type); - } else if (is_vector_shuffle) { - if (!is_shuffle_to_vector()) { - // VectorUnbox (VectorBox vshuffle) ==> VectorLoadShuffle vshuffle - return new VectorLoadShuffleNode(value, out_vt); - } } else { - // Vector type mismatch is only supported for masks and shuffles, but sometimes it happens in pathological cases. + // Vector type mismatch is only supported for masks, but sometimes it happens in pathological cases. } } else { // Vector length mismatch. diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 3f737e6e881..f5574ba79e3 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -1688,15 +1688,18 @@ public: virtual int Opcode() const; }; - +// The target may not directly support the rearrange operation for an element type. In those cases, +// we can transform the rearrange into a different element type. For example, on x86 before AVX512, +// there is no rearrange instruction for short elements, what we will then do is to transform the +// shuffle vector into one that we can do byte rearrange such that it would provide the same +// result. This could have been done in VectorRearrangeNode during code emission but we eagerly +// expand this out because it is often the case that an index vector is reused in many rearrange +// operations. This allows the index preparation to be GVN-ed as well as hoisted out of loops, etc. class VectorLoadShuffleNode : public VectorNode { public: VectorLoadShuffleNode(Node* in, const TypeVect* vt) - : VectorNode(in, vt) { - assert(in->bottom_type()->is_vect()->element_basic_type() == T_BYTE, "must be BYTE"); - } + : VectorNode(in, vt) {} - int GetOutShuffleSize() const { return type2aelembytes(vect_type()->element_basic_type()); } virtual int Opcode() const; }; @@ -1935,14 +1938,11 @@ class VectorBoxAllocateNode : public CallStaticJavaNode { }; class VectorUnboxNode : public VectorNode { - private: - bool _shuffle_to_vector; protected: uint size_of() const { return sizeof(*this); } public: - VectorUnboxNode(Compile* C, const TypeVect* vec_type, Node* obj, Node* mem, bool shuffle_to_vector) + VectorUnboxNode(Compile* C, const TypeVect* vec_type, Node* obj, Node* mem) : VectorNode(mem, obj, vec_type) { - _shuffle_to_vector = shuffle_to_vector; init_class_id(Class_VectorUnbox); init_flags(Flag_is_macro); C->add_macro_node(this); @@ -1953,7 +1953,6 @@ class VectorUnboxNode : public VectorNode { Node* mem() const { return in(1); } virtual Node* Identity(PhaseGVN* phase); Node* Ideal(PhaseGVN* phase, bool can_reshape); - bool is_shuffle_to_vector() { return _shuffle_to_vector; } }; class RotateRightVNode : public VectorNode { diff --git a/src/hotspot/share/prims/vectorSupport.cpp b/src/hotspot/share/prims/vectorSupport.cpp index 9eb0b46131b..21acaa06348 100644 --- a/src/hotspot/share/prims/vectorSupport.cpp +++ b/src/hotspot/share/prims/vectorSupport.cpp @@ -73,10 +73,6 @@ bool VectorSupport::is_vector_mask(Klass* klass) { return klass->is_subclass_of(vmClasses::vector_VectorMask_klass()); } -bool VectorSupport::is_vector_shuffle(Klass* klass) { - return klass->is_subclass_of(vmClasses::vector_VectorShuffle_klass()); -} - BasicType VectorSupport::klass2bt(InstanceKlass* ik) { assert(ik->is_subclass_of(vmClasses::vector_VectorPayload_klass()), "%s not a VectorPayload", ik->name()->as_C_string()); fieldDescriptor fd; // find_field initializes fd if found @@ -87,9 +83,7 @@ BasicType VectorSupport::klass2bt(InstanceKlass* ik) { assert(fd.is_static(), ""); assert(fd.offset() > 0, ""); - if (is_vector_shuffle(ik)) { - return T_BYTE; - } else if (is_vector_mask(ik)) { + if (is_vector_mask(ik)) { return T_BOOLEAN; } else { // vector and mask oop value = ik->java_mirror()->obj_field(fd.offset()); diff --git a/src/hotspot/share/prims/vectorSupport.hpp b/src/hotspot/share/prims/vectorSupport.hpp index 001484874e2..688fb595099 100644 --- a/src/hotspot/share/prims/vectorSupport.hpp +++ b/src/hotspot/share/prims/vectorSupport.hpp @@ -157,6 +157,5 @@ class VectorSupport : AllStatic { static bool is_vector(Klass* klass); static bool is_vector_mask(Klass* klass); - static bool is_vector_shuffle(Klass* klass); }; #endif // SHARE_PRIMS_VECTORSUPPORT_HPP diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java index 341a7125e6c..6e1c363f3d9 100644 --- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java +++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java @@ -234,56 +234,6 @@ public class VectorSupport { return defaultImpl.apply(offset, limit); } - /* ============================================================================ */ - public interface ShuffleIotaOperation, - SH extends VectorShuffle> { - SH apply(int length, int start, int step, S s); - } - - @IntrinsicCandidate - public static - , - SH extends VectorShuffle> - SH shuffleIota(Class eClass, Class shClass, S s, - int length, - int start, int step, int wrap, - ShuffleIotaOperation defaultImpl) { - assert isNonCapturingLambda(defaultImpl) : defaultImpl; - return defaultImpl.apply(length, start, step, s); - } - - public interface ShuffleToVectorOperation, - SH extends VectorShuffle> { - V apply(SH sh); - } - - @IntrinsicCandidate - public static - , - SH extends VectorShuffle, - E> - V shuffleToVector(Class> vClass, Class eClass, Class shClass, SH sh, - int length, - ShuffleToVectorOperation defaultImpl) { - assert isNonCapturingLambda(defaultImpl) : defaultImpl; - return defaultImpl.apply(sh); - } - - public interface WrapShuffleIndexesOperation> { - SH apply(SH sh); - } - - @IntrinsicCandidate - public static - > - SH wrapShuffleIndexes(Class eClass, Class shClass, SH sh, int length, - WrapShuffleIndexesOperation defaultImpl) { - assert isNonCapturingLambda(defaultImpl) : defaultImpl; - return defaultImpl.apply(sh); - } - /* ============================================================================ */ public interface IndexOperation, S extends VectorSpecies> { diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractShuffle.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractShuffle.java index 81be152ddae..155853a86f9 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractShuffle.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractShuffle.java @@ -26,53 +26,16 @@ package jdk.incubator.vector; import java.util.function.IntUnaryOperator; import jdk.internal.vm.annotation.ForceInline; +import jdk.internal.vm.vector.VectorSupport; abstract class AbstractShuffle extends VectorShuffle { static final IntUnaryOperator IDENTITY = i -> i; - // Internal representation allows for a maximum index of 256 + // Internal representation allows for a maximum index of E.MAX_VALUE - 1 // Values are clipped to [-VLENGTH..VLENGTH-1]. - AbstractShuffle(int length, byte[] reorder) { - super(reorder); - assert(length == reorder.length); - assert(indexesInRange(reorder)); - } - - AbstractShuffle(int length, int[] reorder) { - this(length, reorder, 0); - } - - AbstractShuffle(int length, int[] reorder, int offset) { - super(prepare(length, reorder, offset)); - } - - AbstractShuffle(int length, IntUnaryOperator f) { - super(prepare(length, f)); - } - - private static byte[] prepare(int length, int[] reorder, int offset) { - byte[] a = new byte[length]; - for (int i = 0; i < length; i++) { - int si = reorder[offset + i]; - si = partiallyWrapIndex(si, length); - a[i] = (byte) si; - } - return a; - } - - private static byte[] prepare(int length, IntUnaryOperator f) { - byte[] a = new byte[length]; - for (int i = 0; i < a.length; i++) { - int si = f.applyAsInt(i); - si = partiallyWrapIndex(si, length); - a[i] = (byte) si; - } - return a; - } - - byte[] reorder() { - return (byte[])getPayload(); + AbstractShuffle(Object indices) { + super(indices); } /*package-private*/ @@ -84,91 +47,55 @@ abstract class AbstractShuffle extends VectorShuffle { return vspecies(); } - @Override - @ForceInline - public void intoArray(int[] a, int offset) { - byte[] reorder = reorder(); - int vlen = reorder.length; - for (int i = 0; i < vlen; i++) { - int sourceIndex = reorder[i]; - assert(sourceIndex >= -vlen && sourceIndex < vlen); - a[offset + i] = sourceIndex; - } - } - - @Override - @ForceInline - public int[] toArray() { - byte[] reorder = reorder(); - int[] a = new int[reorder.length]; - intoArray(a, 0); - return a; - } - /*package-private*/ + abstract AbstractVector toBitsVector(); + @ForceInline - final - AbstractVector - toVectorTemplate() { - // Note that the values produced by laneSource - // are already clipped. At this point we convert - // them from internal ints (or bytes) into the ETYPE. - // FIXME: Use a conversion intrinsic for this operation. - // https://bugs.openjdk.org/browse/JDK-8225740 - return (AbstractVector) vspecies().fromIntValues(toArray()); + final AbstractVector toBitsVectorTemplate() { + AbstractSpecies dsp = vspecies().asIntegral(); + Class etype = dsp.elementType(); + Class rvtype = dsp.dummyVector().getClass(); + return VectorSupport.convert(VectorSupport.VECTOR_OP_REINTERPRET, + getClass(), etype, length(), + rvtype, etype, length(), + this, dsp, + (v, s) -> v.toBitsVector0()); } + abstract AbstractVector toBitsVector0(); + + @Override + @ForceInline + public final int[] toArray() { + int[] res = new int[length()]; + intoArray(res, 0); + return res; + } + + @Override + @ForceInline + public final VectorShuffle cast(VectorSpecies s) { + if (length() != s.length()) { + throw new IllegalArgumentException("VectorShuffle length and species length differ"); + } + return toBitsVector().bitsToShuffle((AbstractSpecies) s); + } + + @Override @ForceInline public final VectorShuffle checkIndexes() { if (VectorIntrinsics.VECTOR_ACCESS_OOB_CHECK == 0) { return this; } - Vector shufvec = this.toVector(); - VectorMask vecmask = shufvec.compare(VectorOperators.LT, vspecies().zero()); + Vector shufvec = this.toBitsVector(); + VectorMask vecmask = shufvec.compare(VectorOperators.LT, 0); if (vecmask.anyTrue()) { - byte[] reorder = reorder(); - throw checkIndexFailed(reorder[vecmask.firstTrue()], length()); + int[] indices = toArray(); + throw checkIndexFailed(indices[vecmask.firstTrue()], length()); } return this; } - @ForceInline - public final VectorShuffle wrapIndexesTemplate() { - Vector shufvec = this.toVector(); - VectorMask vecmask = shufvec.compare(VectorOperators.LT, vspecies().zero()); - if (vecmask.anyTrue()) { - // FIXME: vectorize this - byte[] reorder = reorder(); - return wrapAndRebuild(reorder); - } - return this; - } - - @ForceInline - public final VectorShuffle wrapAndRebuild(byte[] oldReorder) { - int length = oldReorder.length; - byte[] reorder = new byte[length]; - for (int i = 0; i < length; i++) { - int si = oldReorder[i]; - // FIXME: This does not work unless it's a power of 2. - if ((length & (length - 1)) == 0) { - si += si & length; // power-of-two optimization - } else if (si < 0) { - // non-POT code requires a conditional add - si += length; - } - assert(si >= 0 && si < length); - reorder[i] = (byte) si; - } - return vspecies().dummyVector().shuffleFromBytes(reorder); - } - - @ForceInline - public final VectorMask laneIsValid() { - Vector shufvec = this.toVector(); - return shufvec.compare(VectorOperators.GE, vspecies().zero()); - } - @Override @ForceInline @SuppressWarnings("unchecked") @@ -220,21 +147,4 @@ abstract class AbstractShuffle extends VectorShuffle { String msg = "required an index in [0.."+max+"] but found "+index; return new IndexOutOfBoundsException(msg); } - - static boolean indexesInRange(byte[] reorder) { - int length = reorder.length; - for (byte si : reorder) { - if (si >= length || si < -length) { - boolean assertsEnabled = false; - assert(assertsEnabled = true); - if (assertsEnabled) { - String msg = ("index "+si+"out of range ["+length+"] in "+ - java.util.Arrays.toString(reorder)); - throw new AssertionError(msg); - } - return false; - } - } - return true; - } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java index 0ff4830ded5..bdc72c64ce5 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java @@ -48,6 +48,8 @@ abstract class AbstractSpecies extends jdk.internal.vm.vector.VectorSupport.V @Stable final Class> maskType; @Stable + final Class> shuffleType; + @Stable final Function> vectorFactory; @Stable @@ -61,11 +63,13 @@ abstract class AbstractSpecies extends jdk.internal.vm.vector.VectorSupport.V LaneType laneType, Class> vectorType, Class> maskType, + Class> shuffleType, Function> vectorFactory) { this.vectorShape = vectorShape; this.laneType = laneType; this.vectorType = vectorType; this.maskType = maskType; + this.shuffleType = shuffleType; this.vectorFactory = vectorFactory; // derived values: @@ -162,6 +166,11 @@ abstract class AbstractSpecies extends jdk.internal.vm.vector.VectorSupport.V return maskType; } + @ForceInline + final Class> shuffleType() { + return shuffleType; + } + @Override @ForceInline public final int elementSize() { diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java index 64d681e4aee..867d74dd6b3 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -194,12 +194,43 @@ abstract class AbstractVector extends Vector { abstract AbstractMask maskFromArray(boolean[] bits); + abstract VectorShuffle bitsToShuffle(AbstractSpecies dsp); + + /*package-private*/ + @ForceInline + final VectorShuffle bitsToShuffleTemplate(AbstractSpecies dsp) { + Class etype = vspecies().elementType(); + Class dvtype = dsp.shuffleType(); + Class dtype = dsp.asIntegral().elementType(); + int dlength = dsp.dummyVector().length(); + return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, + getClass(), etype, length(), + dvtype, dtype, dlength, + this, dsp, + AbstractVector::bitsToShuffle0); + } + + abstract VectorShuffle bitsToShuffle0(AbstractSpecies dsp); + + abstract VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap); + abstract AbstractShuffle iotaShuffle(); abstract AbstractShuffle iotaShuffle(int start, int step, boolean wrap); - /*do not alias this byte array*/ - abstract AbstractShuffle shuffleFromBytes(byte[] reorder); + @ForceInline + final VectorShuffle iotaShuffleTemplate(int start, int step, boolean wrap) { + if ((length() & (length() - 1)) != 0) { + // Uncommon path, the length is not a power of 2 + return wrap ? shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i * step + start, length()))) + : shuffleFromOp(i -> i * step + start); + } + + AbstractVector iota = vspecies().asIntegral().iota(); + iota = (AbstractVector) iota.lanewise(VectorOperators.MUL, step) + .lanewise(VectorOperators.ADD, start); + return iota.toShuffle(vspecies(), wrap); + } abstract AbstractShuffle shuffleFromArray(int[] indexes, int i); diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java index 3cf25d46f44..a71a6212295 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java @@ -141,24 +141,15 @@ final class Byte128Vector extends ByteVector { @ForceInline Byte128Shuffle iotaShuffle() { return Byte128Shuffle.IOTA; } + @Override @ForceInline Byte128Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Byte128Shuffle)VectorSupport.shuffleIota(ETYPE, Byte128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Byte128Shuffle)VectorSupport.shuffleIota(ETYPE, Byte128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Byte128Shuffle) iotaShuffleTemplate((byte) start, (byte) step, wrap); } @Override @ForceInline - Byte128Shuffle shuffleFromBytes(byte[] reorder) { return new Byte128Shuffle(reorder); } - - @Override - @ForceInline - Byte128Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte128Shuffle(indexes, i); } + Byte128Shuffle shuffleFromArray(int[] indices, int i) { return new Byte128Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Byte128Vector extends ByteVector { return (long) super.reduceLanesTemplate(op, Byte128Mask.class, (Byte128Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Byte128Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Byte128Shuffle toShuffle() { + return (Byte128Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -821,23 +819,26 @@ final class Byte128Vector extends ByteVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = byte.class; // used by the JVM - Byte128Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Byte128Shuffle(byte[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Byte128Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Byte128Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Byte128Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Byte128Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Byte128Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + byte[] indices() { + return (byte[])getPayload(); } @Override + @ForceInline public ByteSpecies vspecies() { return VSPECIES; } @@ -853,39 +854,103 @@ final class Byte128Vector extends ByteVector { @Override @ForceInline public Byte128Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte128Shuffle.class, this, VLENGTH, - (s) -> ((Byte128Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Byte128Vector toBitsVector() { + return (Byte128Vector) super.toBitsVectorTemplate(); + } + + @Override + Byte128Vector toBitsVector0() { + return ((Byte128Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Byte128Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Byte128Shuffle.class, this, VLENGTH, - (s) -> ((Byte128Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_128; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.B2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.B2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + v.convertShape(VectorOperators.B2I, species, 2) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 2); + v.convertShape(VectorOperators.B2I, species, 3) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 3); + } + + @Override + @ForceInline + public final Byte128Mask laneIsValid() { + return (Byte128Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Byte128Shuffle rearrange(VectorShuffle shuffle) { - Byte128Shuffle s = (Byte128Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Byte128Shuffle rearrange(VectorShuffle shuffle) { + Byte128Shuffle concreteShuffle = (Byte128Shuffle) shuffle; + return (Byte128Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Byte128Shuffle wrapIndexes() { + Byte128Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Byte128Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Byte128Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Byte128Shuffle(r); + return (Byte128Shuffle) v.toShuffle(vspecies(), false); + } + + private static byte[] prepare(int[] indices, int offset) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static byte[] prepare(IntUnaryOperator f) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static boolean indicesInRange(byte[] indices) { + int length = indices.length; + for (byte si : indices) { + if (si >= (byte)length || si < (byte)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java index cb9f2679dca..828c8bcb905 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java @@ -141,24 +141,15 @@ final class Byte256Vector extends ByteVector { @ForceInline Byte256Shuffle iotaShuffle() { return Byte256Shuffle.IOTA; } + @Override @ForceInline Byte256Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Byte256Shuffle)VectorSupport.shuffleIota(ETYPE, Byte256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Byte256Shuffle)VectorSupport.shuffleIota(ETYPE, Byte256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Byte256Shuffle) iotaShuffleTemplate((byte) start, (byte) step, wrap); } @Override @ForceInline - Byte256Shuffle shuffleFromBytes(byte[] reorder) { return new Byte256Shuffle(reorder); } - - @Override - @ForceInline - Byte256Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte256Shuffle(indexes, i); } + Byte256Shuffle shuffleFromArray(int[] indices, int i) { return new Byte256Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Byte256Vector extends ByteVector { return (long) super.reduceLanesTemplate(op, Byte256Mask.class, (Byte256Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Byte256Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Byte256Shuffle toShuffle() { + return (Byte256Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -853,23 +851,26 @@ final class Byte256Vector extends ByteVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = byte.class; // used by the JVM - Byte256Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Byte256Shuffle(byte[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Byte256Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Byte256Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Byte256Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Byte256Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Byte256Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + byte[] indices() { + return (byte[])getPayload(); } @Override + @ForceInline public ByteSpecies vspecies() { return VSPECIES; } @@ -885,39 +886,103 @@ final class Byte256Vector extends ByteVector { @Override @ForceInline public Byte256Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte256Shuffle.class, this, VLENGTH, - (s) -> ((Byte256Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Byte256Vector toBitsVector() { + return (Byte256Vector) super.toBitsVectorTemplate(); + } + + @Override + Byte256Vector toBitsVector0() { + return ((Byte256Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Byte256Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Byte256Shuffle.class, this, VLENGTH, - (s) -> ((Byte256Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_256; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.B2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.B2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + v.convertShape(VectorOperators.B2I, species, 2) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 2); + v.convertShape(VectorOperators.B2I, species, 3) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 3); + } + + @Override + @ForceInline + public final Byte256Mask laneIsValid() { + return (Byte256Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Byte256Shuffle rearrange(VectorShuffle shuffle) { - Byte256Shuffle s = (Byte256Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Byte256Shuffle rearrange(VectorShuffle shuffle) { + Byte256Shuffle concreteShuffle = (Byte256Shuffle) shuffle; + return (Byte256Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Byte256Shuffle wrapIndexes() { + Byte256Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Byte256Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Byte256Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Byte256Shuffle(r); + return (Byte256Shuffle) v.toShuffle(vspecies(), false); + } + + private static byte[] prepare(int[] indices, int offset) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static byte[] prepare(IntUnaryOperator f) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static boolean indicesInRange(byte[] indices) { + int length = indices.length; + for (byte si : indices) { + if (si >= (byte)length || si < (byte)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java index f5ff0575703..a5c32ff6b1f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java @@ -141,24 +141,15 @@ final class Byte512Vector extends ByteVector { @ForceInline Byte512Shuffle iotaShuffle() { return Byte512Shuffle.IOTA; } + @Override @ForceInline Byte512Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Byte512Shuffle)VectorSupport.shuffleIota(ETYPE, Byte512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Byte512Shuffle)VectorSupport.shuffleIota(ETYPE, Byte512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Byte512Shuffle) iotaShuffleTemplate((byte) start, (byte) step, wrap); } @Override @ForceInline - Byte512Shuffle shuffleFromBytes(byte[] reorder) { return new Byte512Shuffle(reorder); } - - @Override - @ForceInline - Byte512Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte512Shuffle(indexes, i); } + Byte512Shuffle shuffleFromArray(int[] indices, int i) { return new Byte512Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Byte512Vector extends ByteVector { return (long) super.reduceLanesTemplate(op, Byte512Mask.class, (Byte512Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Byte512Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Byte512Shuffle toShuffle() { + return (Byte512Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -917,23 +915,26 @@ final class Byte512Vector extends ByteVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = byte.class; // used by the JVM - Byte512Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Byte512Shuffle(byte[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Byte512Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Byte512Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Byte512Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Byte512Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Byte512Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + byte[] indices() { + return (byte[])getPayload(); } @Override + @ForceInline public ByteSpecies vspecies() { return VSPECIES; } @@ -949,39 +950,103 @@ final class Byte512Vector extends ByteVector { @Override @ForceInline public Byte512Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte512Shuffle.class, this, VLENGTH, - (s) -> ((Byte512Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Byte512Vector toBitsVector() { + return (Byte512Vector) super.toBitsVectorTemplate(); + } + + @Override + Byte512Vector toBitsVector0() { + return ((Byte512Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Byte512Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Byte512Shuffle.class, this, VLENGTH, - (s) -> ((Byte512Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_512; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.B2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.B2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + v.convertShape(VectorOperators.B2I, species, 2) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 2); + v.convertShape(VectorOperators.B2I, species, 3) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 3); + } + + @Override + @ForceInline + public final Byte512Mask laneIsValid() { + return (Byte512Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Byte512Shuffle rearrange(VectorShuffle shuffle) { - Byte512Shuffle s = (Byte512Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Byte512Shuffle rearrange(VectorShuffle shuffle) { + Byte512Shuffle concreteShuffle = (Byte512Shuffle) shuffle; + return (Byte512Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Byte512Shuffle wrapIndexes() { + Byte512Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Byte512Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Byte512Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Byte512Shuffle(r); + return (Byte512Shuffle) v.toShuffle(vspecies(), false); + } + + private static byte[] prepare(int[] indices, int offset) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static byte[] prepare(IntUnaryOperator f) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static boolean indicesInRange(byte[] indices) { + int length = indices.length; + for (byte si : indices) { + if (si >= (byte)length || si < (byte)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java index 37e8978d7e8..222e6ec5e39 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java @@ -141,24 +141,15 @@ final class Byte64Vector extends ByteVector { @ForceInline Byte64Shuffle iotaShuffle() { return Byte64Shuffle.IOTA; } + @Override @ForceInline Byte64Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Byte64Shuffle)VectorSupport.shuffleIota(ETYPE, Byte64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Byte64Shuffle)VectorSupport.shuffleIota(ETYPE, Byte64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Byte64Shuffle) iotaShuffleTemplate((byte) start, (byte) step, wrap); } @Override @ForceInline - Byte64Shuffle shuffleFromBytes(byte[] reorder) { return new Byte64Shuffle(reorder); } - - @Override - @ForceInline - Byte64Shuffle shuffleFromArray(int[] indexes, int i) { return new Byte64Shuffle(indexes, i); } + Byte64Shuffle shuffleFromArray(int[] indices, int i) { return new Byte64Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Byte64Vector extends ByteVector { return (long) super.reduceLanesTemplate(op, Byte64Mask.class, (Byte64Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Byte64Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Byte64Shuffle toShuffle() { + return (Byte64Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -805,23 +803,26 @@ final class Byte64Vector extends ByteVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = byte.class; // used by the JVM - Byte64Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Byte64Shuffle(byte[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Byte64Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Byte64Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Byte64Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Byte64Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Byte64Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + byte[] indices() { + return (byte[])getPayload(); } @Override + @ForceInline public ByteSpecies vspecies() { return VSPECIES; } @@ -837,39 +838,103 @@ final class Byte64Vector extends ByteVector { @Override @ForceInline public Byte64Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Byte64Shuffle.class, this, VLENGTH, - (s) -> ((Byte64Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Byte64Vector toBitsVector() { + return (Byte64Vector) super.toBitsVectorTemplate(); + } + + @Override + Byte64Vector toBitsVector0() { + return ((Byte64Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Byte64Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Byte64Shuffle.class, this, VLENGTH, - (s) -> ((Byte64Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_64; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.B2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.B2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + v.convertShape(VectorOperators.B2I, species, 2) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 2); + v.convertShape(VectorOperators.B2I, species, 3) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 3); + } + + @Override + @ForceInline + public final Byte64Mask laneIsValid() { + return (Byte64Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Byte64Shuffle rearrange(VectorShuffle shuffle) { - Byte64Shuffle s = (Byte64Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Byte64Shuffle rearrange(VectorShuffle shuffle) { + Byte64Shuffle concreteShuffle = (Byte64Shuffle) shuffle; + return (Byte64Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Byte64Shuffle wrapIndexes() { + Byte64Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Byte64Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Byte64Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Byte64Shuffle(r); + return (Byte64Shuffle) v.toShuffle(vspecies(), false); + } + + private static byte[] prepare(int[] indices, int offset) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static byte[] prepare(IntUnaryOperator f) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static boolean indicesInRange(byte[] indices) { + int length = indices.length; + for (byte si : indices) { + if (si >= (byte)length || si < (byte)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java index 17dcf193ceb..774b351bebb 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java @@ -141,24 +141,15 @@ final class ByteMaxVector extends ByteVector { @ForceInline ByteMaxShuffle iotaShuffle() { return ByteMaxShuffle.IOTA; } + @Override @ForceInline ByteMaxShuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (ByteMaxShuffle)VectorSupport.shuffleIota(ETYPE, ByteMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (ByteMaxShuffle)VectorSupport.shuffleIota(ETYPE, ByteMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (ByteMaxShuffle) iotaShuffleTemplate((byte) start, (byte) step, wrap); } @Override @ForceInline - ByteMaxShuffle shuffleFromBytes(byte[] reorder) { return new ByteMaxShuffle(reorder); } - - @Override - @ForceInline - ByteMaxShuffle shuffleFromArray(int[] indexes, int i) { return new ByteMaxShuffle(indexes, i); } + ByteMaxShuffle shuffleFromArray(int[] indices, int i) { return new ByteMaxShuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class ByteMaxVector extends ByteVector { return (long) super.reduceLanesTemplate(op, ByteMaxMask.class, (ByteMaxMask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(ByteMaxShuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final ByteMaxShuffle toShuffle() { + return (ByteMaxShuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -791,23 +789,26 @@ final class ByteMaxVector extends ByteVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = byte.class; // used by the JVM - ByteMaxShuffle(byte[] reorder) { - super(VLENGTH, reorder); + ByteMaxShuffle(byte[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public ByteMaxShuffle(int[] reorder) { - super(VLENGTH, reorder); + ByteMaxShuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public ByteMaxShuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + ByteMaxShuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public ByteMaxShuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + byte[] indices() { + return (byte[])getPayload(); } @Override + @ForceInline public ByteSpecies vspecies() { return VSPECIES; } @@ -823,39 +824,103 @@ final class ByteMaxVector extends ByteVector { @Override @ForceInline public ByteMaxVector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, ByteMaxShuffle.class, this, VLENGTH, - (s) -> ((ByteMaxVector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + ByteMaxVector toBitsVector() { + return (ByteMaxVector) super.toBitsVectorTemplate(); + } + + @Override + ByteMaxVector toBitsVector0() { + return ((ByteMaxVector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public ByteMaxShuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, ByteMaxShuffle.class, this, VLENGTH, - (s) -> ((ByteMaxShuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_MAX; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.B2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.B2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + v.convertShape(VectorOperators.B2I, species, 2) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 2); + v.convertShape(VectorOperators.B2I, species, 3) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 3); + } + + @Override + @ForceInline + public final ByteMaxMask laneIsValid() { + return (ByteMaxMask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public ByteMaxShuffle rearrange(VectorShuffle shuffle) { - ByteMaxShuffle s = (ByteMaxShuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final ByteMaxShuffle rearrange(VectorShuffle shuffle) { + ByteMaxShuffle concreteShuffle = (ByteMaxShuffle) shuffle; + return (ByteMaxShuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final ByteMaxShuffle wrapIndexes() { + ByteMaxVector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (ByteMaxVector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (ByteMaxVector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new ByteMaxShuffle(r); + return (ByteMaxShuffle) v.toShuffle(vspecies(), false); + } + + private static byte[] prepare(int[] indices, int offset) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static byte[] prepare(IntUnaryOperator f) { + byte[] a = new byte[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (byte)si; + } + return a; + } + + private static boolean indicesInRange(byte[] indices) { + int length = indices.length; + for (byte si : indices) { + if (si >= (byte)length || si < (byte)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java index 346b00eda5e..eab5d5c15ae 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java @@ -1096,7 +1096,7 @@ public abstract class ByteVector extends AbstractVector { // and broadcast, but it would be more surprising not to continue // the obvious pattern started by unary and binary. - /** + /** * {@inheritDoc} * @see #lanewise(VectorOperators.Ternary,byte,byte,VectorMask) * @see #lanewise(VectorOperators.Ternary,Vector,byte,VectorMask) @@ -2308,9 +2308,10 @@ public abstract class ByteVector extends AbstractVector { ByteVector that = (ByteVector) v1; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + ByteVector iotaVector = (ByteVector) iotaShuffle().toBitsVector(); + ByteVector filter = broadcast((byte)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2338,9 +2339,10 @@ public abstract class ByteVector extends AbstractVector { @ForceInline ByteVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((byte)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + ByteVector iotaVector = (ByteVector) iotaShuffle().toBitsVector(); + ByteVector filter = broadcast((byte)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2359,10 +2361,10 @@ public abstract class ByteVector extends AbstractVector { ByteVector that = (ByteVector) w; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast((byte)(origin)))); - iota = iotaShuffle(-origin, 1, true); + ByteVector iotaVector = (ByteVector) iotaShuffle().toBitsVector(); + ByteVector filter = broadcast((byte)origin); + VectorMask blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2399,10 +2401,10 @@ public abstract class ByteVector extends AbstractVector { ByteVector unsliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((byte)(origin)))); - iota = iotaShuffle(-origin, 1, true); + ByteVector iotaVector = (ByteVector) iotaShuffle().toBitsVector(); + ByteVector filter = broadcast((byte)origin); + VectorMask blendMask = iotaVector.compare(VectorOperators.GE, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2425,13 +2427,11 @@ public abstract class ByteVector extends AbstractVector { final > ByteVector rearrangeTemplate(Class shuffletype, S shuffle) { - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, null, byte.class, length(), - this, ws, null, + this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); } @@ -2454,13 +2454,11 @@ public abstract class ByteVector extends AbstractVector { M m) { m.check(masktype, this); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, masktype, byte.class, length(), - this, ws, m, + this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return !m_.laneIsSet(i) ? 0 : v1.lane(ei); })); } @@ -2481,30 +2479,29 @@ public abstract class ByteVector extends AbstractVector { S shuffle, ByteVector v) { VectorMask valid = shuffle.laneIsValid(); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); ByteVector r0 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, byte.class, length(), - this, ws, null, + this, shuffle, null, (v0, s_, m_) -> v0.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length()); return v0.lane(ei); })); ByteVector r1 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, byte.class, length(), - v, ws, null, + v, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); return r1.blend(r0, valid); } + @Override @ForceInline - private final - VectorShuffle toShuffle0(ByteSpecies dsp) { + final VectorShuffle bitsToShuffle0(AbstractSpecies dsp) { + assert(dsp.length() == vspecies().length()); byte[] a = toArray(); int[] sa = new int[a.length]; for (int i = 0; i < a.length; i++) { @@ -2513,16 +2510,18 @@ public abstract class ByteVector extends AbstractVector { return VectorShuffle.fromArray(dsp, sa, 0); } - /*package-private*/ @ForceInline - final - VectorShuffle toShuffleTemplate(Class shuffleType) { - ByteSpecies vsp = vspecies(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), byte.class, length(), - shuffleType, byte.class, length(), - this, vsp, - ByteVector::toShuffle0); + final + VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap) { + assert(dsp.elementSize() == vspecies().elementSize()); + ByteVector idx = this; + ByteVector wrapped = idx.lanewise(VectorOperators.AND, length() - 1); + if (!wrap) { + ByteVector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length()); + VectorMask inBound = wrapped.compare(VectorOperators.EQ, idx); + wrapped = wrappedEx.blend(wrapped, inBound); + } + return wrapped.bitsToShuffle(dsp); } /** @@ -4216,9 +4215,10 @@ public abstract class ByteVector extends AbstractVector { private ByteSpecies(VectorShape shape, Class vectorType, Class> maskType, + Class> shuffleType, Function vectorFactory) { super(shape, LaneType.of(byte.class), - vectorType, maskType, + vectorType, maskType, shuffleType, vectorFactory); assert(this.elementSize() == Byte.SIZE); } @@ -4504,6 +4504,7 @@ public abstract class ByteVector extends AbstractVector { = new ByteSpecies(VectorShape.S_64_BIT, Byte64Vector.class, Byte64Vector.Byte64Mask.class, + Byte64Vector.Byte64Shuffle.class, Byte64Vector::new); /** Species representing {@link ByteVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ @@ -4511,6 +4512,7 @@ public abstract class ByteVector extends AbstractVector { = new ByteSpecies(VectorShape.S_128_BIT, Byte128Vector.class, Byte128Vector.Byte128Mask.class, + Byte128Vector.Byte128Shuffle.class, Byte128Vector::new); /** Species representing {@link ByteVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ @@ -4518,6 +4520,7 @@ public abstract class ByteVector extends AbstractVector { = new ByteSpecies(VectorShape.S_256_BIT, Byte256Vector.class, Byte256Vector.Byte256Mask.class, + Byte256Vector.Byte256Shuffle.class, Byte256Vector::new); /** Species representing {@link ByteVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ @@ -4525,6 +4528,7 @@ public abstract class ByteVector extends AbstractVector { = new ByteSpecies(VectorShape.S_512_BIT, Byte512Vector.class, Byte512Vector.Byte512Mask.class, + Byte512Vector.Byte512Shuffle.class, Byte512Vector::new); /** Species representing {@link ByteVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ @@ -4532,6 +4536,7 @@ public abstract class ByteVector extends AbstractVector { = new ByteSpecies(VectorShape.S_Max_BIT, ByteMaxVector.class, ByteMaxVector.ByteMaxMask.class, + ByteMaxVector.ByteMaxShuffle.class, ByteMaxVector::new); /** diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java index 37607492645..7d343d4e244 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java @@ -141,24 +141,15 @@ final class Double128Vector extends DoubleVector { @ForceInline Double128Shuffle iotaShuffle() { return Double128Shuffle.IOTA; } + @Override @ForceInline Double128Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Double128Shuffle)VectorSupport.shuffleIota(ETYPE, Double128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Double128Shuffle)VectorSupport.shuffleIota(ETYPE, Double128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Double128Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Double128Shuffle shuffleFromBytes(byte[] reorder) { return new Double128Shuffle(reorder); } - - @Override - @ForceInline - Double128Shuffle shuffleFromArray(int[] indexes, int i) { return new Double128Shuffle(indexes, i); } + Double128Shuffle shuffleFromArray(int[] indices, int i) { return new Double128Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Double128Vector extends DoubleVector { return (long) super.reduceLanesTemplate(op, Double128Mask.class, (Double128Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Double128Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Double128Shuffle toShuffle() { + return (Double128Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -780,25 +778,28 @@ final class Double128Vector extends DoubleVector { static final class Double128Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = double.class; // used by the JVM + static final Class ETYPE = long.class; // used by the JVM - Double128Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Double128Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Double128Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Double128Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Double128Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Double128Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Double128Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public DoubleSpecies vspecies() { return VSPECIES; } @@ -806,47 +807,122 @@ final class Double128Vector extends DoubleVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Double128Shuffle IOTA = new Double128Shuffle(IDENTITY); @Override @ForceInline public Double128Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double128Shuffle.class, this, VLENGTH, - (s) -> ((Double128Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Double128Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long128Vector toBitsVector() { + return (Long128Vector) super.toBitsVectorTemplate(); + } + + @Override + Long128Vector toBitsVector0() { + return ((Long128Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Double128Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Double128Shuffle.class, this, VLENGTH, - (s) -> ((Double128Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Double128Shuffle rearrange(VectorShuffle shuffle) { - Double128Shuffle s = (Double128Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Double128Shuffle(r); + } + + @Override + @ForceInline + public final Double128Mask laneIsValid() { + return (Double128Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Double128Shuffle rearrange(VectorShuffle shuffle) { + Double128Shuffle concreteShuffle = (Double128Shuffle) shuffle; + return (Double128Shuffle) toBitsVector().rearrange(concreteShuffle.cast(LongVector.SPECIES_128)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Double128Shuffle wrapIndexes() { + Long128Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long128Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long128Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Double128Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java index 2e31a802550..a8645badc27 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java @@ -141,24 +141,15 @@ final class Double256Vector extends DoubleVector { @ForceInline Double256Shuffle iotaShuffle() { return Double256Shuffle.IOTA; } + @Override @ForceInline Double256Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Double256Shuffle)VectorSupport.shuffleIota(ETYPE, Double256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Double256Shuffle)VectorSupport.shuffleIota(ETYPE, Double256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Double256Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Double256Shuffle shuffleFromBytes(byte[] reorder) { return new Double256Shuffle(reorder); } - - @Override - @ForceInline - Double256Shuffle shuffleFromArray(int[] indexes, int i) { return new Double256Shuffle(indexes, i); } + Double256Shuffle shuffleFromArray(int[] indices, int i) { return new Double256Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Double256Vector extends DoubleVector { return (long) super.reduceLanesTemplate(op, Double256Mask.class, (Double256Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Double256Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Double256Shuffle toShuffle() { + return (Double256Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -784,25 +782,28 @@ final class Double256Vector extends DoubleVector { static final class Double256Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = double.class; // used by the JVM + static final Class ETYPE = long.class; // used by the JVM - Double256Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Double256Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Double256Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Double256Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Double256Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Double256Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Double256Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public DoubleSpecies vspecies() { return VSPECIES; } @@ -810,47 +811,122 @@ final class Double256Vector extends DoubleVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Double256Shuffle IOTA = new Double256Shuffle(IDENTITY); @Override @ForceInline public Double256Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double256Shuffle.class, this, VLENGTH, - (s) -> ((Double256Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Double256Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long256Vector toBitsVector() { + return (Long256Vector) super.toBitsVectorTemplate(); + } + + @Override + Long256Vector toBitsVector0() { + return ((Long256Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Double256Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Double256Shuffle.class, this, VLENGTH, - (s) -> ((Double256Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Double256Shuffle rearrange(VectorShuffle shuffle) { - Double256Shuffle s = (Double256Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Double256Shuffle(r); + } + + @Override + @ForceInline + public final Double256Mask laneIsValid() { + return (Double256Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Double256Shuffle rearrange(VectorShuffle shuffle) { + Double256Shuffle concreteShuffle = (Double256Shuffle) shuffle; + return (Double256Shuffle) toBitsVector().rearrange(concreteShuffle.cast(LongVector.SPECIES_256)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Double256Shuffle wrapIndexes() { + Long256Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long256Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long256Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Double256Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java index 6ed3dd7325c..64535c67b19 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java @@ -141,24 +141,15 @@ final class Double512Vector extends DoubleVector { @ForceInline Double512Shuffle iotaShuffle() { return Double512Shuffle.IOTA; } + @Override @ForceInline Double512Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Double512Shuffle)VectorSupport.shuffleIota(ETYPE, Double512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Double512Shuffle)VectorSupport.shuffleIota(ETYPE, Double512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Double512Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Double512Shuffle shuffleFromBytes(byte[] reorder) { return new Double512Shuffle(reorder); } - - @Override - @ForceInline - Double512Shuffle shuffleFromArray(int[] indexes, int i) { return new Double512Shuffle(indexes, i); } + Double512Shuffle shuffleFromArray(int[] indices, int i) { return new Double512Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Double512Vector extends DoubleVector { return (long) super.reduceLanesTemplate(op, Double512Mask.class, (Double512Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Double512Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Double512Shuffle toShuffle() { + return (Double512Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -792,25 +790,28 @@ final class Double512Vector extends DoubleVector { static final class Double512Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = double.class; // used by the JVM + static final Class ETYPE = long.class; // used by the JVM - Double512Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Double512Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Double512Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Double512Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Double512Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Double512Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Double512Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public DoubleSpecies vspecies() { return VSPECIES; } @@ -818,47 +819,122 @@ final class Double512Vector extends DoubleVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Double512Shuffle IOTA = new Double512Shuffle(IDENTITY); @Override @ForceInline public Double512Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double512Shuffle.class, this, VLENGTH, - (s) -> ((Double512Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Double512Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long512Vector toBitsVector() { + return (Long512Vector) super.toBitsVectorTemplate(); + } + + @Override + Long512Vector toBitsVector0() { + return ((Long512Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Double512Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Double512Shuffle.class, this, VLENGTH, - (s) -> ((Double512Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Double512Shuffle rearrange(VectorShuffle shuffle) { - Double512Shuffle s = (Double512Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Double512Shuffle(r); + } + + @Override + @ForceInline + public final Double512Mask laneIsValid() { + return (Double512Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Double512Shuffle rearrange(VectorShuffle shuffle) { + Double512Shuffle concreteShuffle = (Double512Shuffle) shuffle; + return (Double512Shuffle) toBitsVector().rearrange(concreteShuffle.cast(LongVector.SPECIES_512)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Double512Shuffle wrapIndexes() { + Long512Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long512Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long512Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Double512Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java index 2e1b2135001..45b91ee88bf 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java @@ -141,24 +141,15 @@ final class Double64Vector extends DoubleVector { @ForceInline Double64Shuffle iotaShuffle() { return Double64Shuffle.IOTA; } + @Override @ForceInline Double64Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Double64Shuffle)VectorSupport.shuffleIota(ETYPE, Double64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Double64Shuffle)VectorSupport.shuffleIota(ETYPE, Double64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Double64Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Double64Shuffle shuffleFromBytes(byte[] reorder) { return new Double64Shuffle(reorder); } - - @Override - @ForceInline - Double64Shuffle shuffleFromArray(int[] indexes, int i) { return new Double64Shuffle(indexes, i); } + Double64Shuffle shuffleFromArray(int[] indices, int i) { return new Double64Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Double64Vector extends DoubleVector { return (long) super.reduceLanesTemplate(op, Double64Mask.class, (Double64Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Double64Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Double64Shuffle toShuffle() { + return (Double64Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -778,25 +776,28 @@ final class Double64Vector extends DoubleVector { static final class Double64Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = double.class; // used by the JVM + static final Class ETYPE = long.class; // used by the JVM - Double64Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Double64Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Double64Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Double64Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Double64Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Double64Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Double64Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public DoubleSpecies vspecies() { return VSPECIES; } @@ -804,47 +805,122 @@ final class Double64Vector extends DoubleVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Double64Shuffle IOTA = new Double64Shuffle(IDENTITY); @Override @ForceInline public Double64Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Double64Shuffle.class, this, VLENGTH, - (s) -> ((Double64Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Double64Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long64Vector toBitsVector() { + return (Long64Vector) super.toBitsVectorTemplate(); + } + + @Override + Long64Vector toBitsVector0() { + return ((Long64Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Double64Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Double64Shuffle.class, this, VLENGTH, - (s) -> ((Double64Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Double64Shuffle rearrange(VectorShuffle shuffle) { - Double64Shuffle s = (Double64Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Double64Shuffle(r); + } + + @Override + @ForceInline + public final Double64Mask laneIsValid() { + return (Double64Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Double64Shuffle rearrange(VectorShuffle shuffle) { + Double64Shuffle concreteShuffle = (Double64Shuffle) shuffle; + return (Double64Shuffle) toBitsVector().rearrange(concreteShuffle.cast(LongVector.SPECIES_64)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Double64Shuffle wrapIndexes() { + Long64Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long64Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long64Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Double64Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java index 8d69b6fcbc7..03f9448f1ab 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java @@ -141,24 +141,15 @@ final class DoubleMaxVector extends DoubleVector { @ForceInline DoubleMaxShuffle iotaShuffle() { return DoubleMaxShuffle.IOTA; } + @Override @ForceInline DoubleMaxShuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (DoubleMaxShuffle)VectorSupport.shuffleIota(ETYPE, DoubleMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (DoubleMaxShuffle)VectorSupport.shuffleIota(ETYPE, DoubleMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (DoubleMaxShuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - DoubleMaxShuffle shuffleFromBytes(byte[] reorder) { return new DoubleMaxShuffle(reorder); } - - @Override - @ForceInline - DoubleMaxShuffle shuffleFromArray(int[] indexes, int i) { return new DoubleMaxShuffle(indexes, i); } + DoubleMaxShuffle shuffleFromArray(int[] indices, int i) { return new DoubleMaxShuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class DoubleMaxVector extends DoubleVector { return (long) super.reduceLanesTemplate(op, DoubleMaxMask.class, (DoubleMaxMask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(DoubleMaxShuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final DoubleMaxShuffle toShuffle() { + return (DoubleMaxShuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -777,25 +775,28 @@ final class DoubleMaxVector extends DoubleVector { static final class DoubleMaxShuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = double.class; // used by the JVM + static final Class ETYPE = long.class; // used by the JVM - DoubleMaxShuffle(byte[] reorder) { - super(VLENGTH, reorder); + DoubleMaxShuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public DoubleMaxShuffle(int[] reorder) { - super(VLENGTH, reorder); + DoubleMaxShuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public DoubleMaxShuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + DoubleMaxShuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public DoubleMaxShuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public DoubleSpecies vspecies() { return VSPECIES; } @@ -803,47 +804,122 @@ final class DoubleMaxVector extends DoubleVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final DoubleMaxShuffle IOTA = new DoubleMaxShuffle(IDENTITY); @Override @ForceInline public DoubleMaxVector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, DoubleMaxShuffle.class, this, VLENGTH, - (s) -> ((DoubleMaxVector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (DoubleMaxVector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + LongMaxVector toBitsVector() { + return (LongMaxVector) super.toBitsVectorTemplate(); + } + + @Override + LongMaxVector toBitsVector0() { + return ((LongMaxVector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public DoubleMaxShuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, DoubleMaxShuffle.class, this, VLENGTH, - (s) -> ((DoubleMaxShuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public DoubleMaxShuffle rearrange(VectorShuffle shuffle) { - DoubleMaxShuffle s = (DoubleMaxShuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new DoubleMaxShuffle(r); + } + + @Override + @ForceInline + public final DoubleMaxMask laneIsValid() { + return (DoubleMaxMask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final DoubleMaxShuffle rearrange(VectorShuffle shuffle) { + DoubleMaxShuffle concreteShuffle = (DoubleMaxShuffle) shuffle; + return (DoubleMaxShuffle) toBitsVector().rearrange(concreteShuffle.cast(LongVector.SPECIES_MAX)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final DoubleMaxShuffle wrapIndexes() { + LongMaxVector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (LongMaxVector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (LongMaxVector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (DoubleMaxShuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java index 5b3a25baa58..8065bf915a5 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java @@ -966,7 +966,7 @@ public abstract class DoubleVector extends AbstractVector { // and broadcast, but it would be more surprising not to continue // the obvious pattern started by unary and binary. - /** + /** * {@inheritDoc} * @see #lanewise(VectorOperators.Ternary,double,double,VectorMask) * @see #lanewise(VectorOperators.Ternary,Vector,double,VectorMask) @@ -2138,9 +2138,10 @@ public abstract class DoubleVector extends AbstractVector { DoubleVector that = (DoubleVector) v1; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((double)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = LongVector.broadcast((LongVector.LongSpecies) vspecies().asIntegral(), (long)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2168,9 +2169,10 @@ public abstract class DoubleVector extends AbstractVector { @ForceInline DoubleVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((double)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = LongVector.broadcast((LongVector.LongSpecies) vspecies().asIntegral(), (long)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2189,10 +2191,10 @@ public abstract class DoubleVector extends AbstractVector { DoubleVector that = (DoubleVector) w; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast((double)(origin)))); - iota = iotaShuffle(-origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = LongVector.broadcast((LongVector.LongSpecies) vspecies().asIntegral(), (long)origin); + VectorMask blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2229,10 +2231,10 @@ public abstract class DoubleVector extends AbstractVector { DoubleVector unsliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((double)(origin)))); - iota = iotaShuffle(-origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = LongVector.broadcast((LongVector.LongSpecies) vspecies().asIntegral(), (long)origin); + VectorMask blendMask = iotaVector.compare(VectorOperators.GE, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2255,13 +2257,11 @@ public abstract class DoubleVector extends AbstractVector { final > DoubleVector rearrangeTemplate(Class shuffletype, S shuffle) { - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, null, double.class, length(), - this, ws, null, + this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); } @@ -2284,13 +2284,11 @@ public abstract class DoubleVector extends AbstractVector { M m) { m.check(masktype, this); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, masktype, double.class, length(), - this, ws, m, + this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return !m_.laneIsSet(i) ? 0 : v1.lane(ei); })); } @@ -2311,48 +2309,43 @@ public abstract class DoubleVector extends AbstractVector { S shuffle, DoubleVector v) { VectorMask valid = shuffle.laneIsValid(); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); DoubleVector r0 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, double.class, length(), - this, ws, null, + this, shuffle, null, (v0, s_, m_) -> v0.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length()); return v0.lane(ei); })); DoubleVector r1 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, double.class, length(), - v, ws, null, + v, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); return r1.blend(r0, valid); } + @Override @ForceInline - private final - VectorShuffle toShuffle0(DoubleSpecies dsp) { - double[] a = toArray(); - int[] sa = new int[a.length]; - for (int i = 0; i < a.length; i++) { - sa[i] = (int) a[i]; - } - return VectorShuffle.fromArray(dsp, sa, 0); + final VectorShuffle bitsToShuffle0(AbstractSpecies dsp) { + throw new AssertionError(); } - /*package-private*/ @ForceInline - final - VectorShuffle toShuffleTemplate(Class shuffleType) { - DoubleSpecies vsp = vspecies(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), double.class, length(), - shuffleType, byte.class, length(), - this, vsp, - DoubleVector::toShuffle0); + final + VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap) { + assert(dsp.elementSize() == vspecies().elementSize()); + LongVector idx = convert(VectorOperators.D2L, 0).reinterpretAsLongs(); + LongVector wrapped = idx.lanewise(VectorOperators.AND, length() - 1); + if (!wrap) { + LongVector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length()); + VectorMask inBound = wrapped.compare(VectorOperators.EQ, idx); + wrapped = wrappedEx.blend(wrapped, inBound); + } + return wrapped.bitsToShuffle(dsp); } /** @@ -3731,9 +3724,10 @@ public abstract class DoubleVector extends AbstractVector { private DoubleSpecies(VectorShape shape, Class vectorType, Class> maskType, + Class> shuffleType, Function vectorFactory) { super(shape, LaneType.of(double.class), - vectorType, maskType, + vectorType, maskType, shuffleType, vectorFactory); assert(this.elementSize() == Double.SIZE); } @@ -4019,6 +4013,7 @@ public abstract class DoubleVector extends AbstractVector { = new DoubleSpecies(VectorShape.S_64_BIT, Double64Vector.class, Double64Vector.Double64Mask.class, + Double64Vector.Double64Shuffle.class, Double64Vector::new); /** Species representing {@link DoubleVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ @@ -4026,6 +4021,7 @@ public abstract class DoubleVector extends AbstractVector { = new DoubleSpecies(VectorShape.S_128_BIT, Double128Vector.class, Double128Vector.Double128Mask.class, + Double128Vector.Double128Shuffle.class, Double128Vector::new); /** Species representing {@link DoubleVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ @@ -4033,6 +4029,7 @@ public abstract class DoubleVector extends AbstractVector { = new DoubleSpecies(VectorShape.S_256_BIT, Double256Vector.class, Double256Vector.Double256Mask.class, + Double256Vector.Double256Shuffle.class, Double256Vector::new); /** Species representing {@link DoubleVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ @@ -4040,6 +4037,7 @@ public abstract class DoubleVector extends AbstractVector { = new DoubleSpecies(VectorShape.S_512_BIT, Double512Vector.class, Double512Vector.Double512Mask.class, + Double512Vector.Double512Shuffle.class, Double512Vector::new); /** Species representing {@link DoubleVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ @@ -4047,6 +4045,7 @@ public abstract class DoubleVector extends AbstractVector { = new DoubleSpecies(VectorShape.S_Max_BIT, DoubleMaxVector.class, DoubleMaxVector.DoubleMaxMask.class, + DoubleMaxVector.DoubleMaxShuffle.class, DoubleMaxVector::new); /** diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java index 79239532cc6..77301e4c773 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java @@ -141,24 +141,15 @@ final class Float128Vector extends FloatVector { @ForceInline Float128Shuffle iotaShuffle() { return Float128Shuffle.IOTA; } + @Override @ForceInline Float128Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Float128Shuffle)VectorSupport.shuffleIota(ETYPE, Float128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Float128Shuffle)VectorSupport.shuffleIota(ETYPE, Float128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Float128Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Float128Shuffle shuffleFromBytes(byte[] reorder) { return new Float128Shuffle(reorder); } - - @Override - @ForceInline - Float128Shuffle shuffleFromArray(int[] indexes, int i) { return new Float128Shuffle(indexes, i); } + Float128Shuffle shuffleFromArray(int[] indices, int i) { return new Float128Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Float128Vector extends FloatVector { return (long) super.reduceLanesTemplate(op, Float128Mask.class, (Float128Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Float128Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Float128Shuffle toShuffle() { + return (Float128Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -784,25 +782,28 @@ final class Float128Vector extends FloatVector { static final class Float128Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = float.class; // used by the JVM + static final Class ETYPE = int.class; // used by the JVM - Float128Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Float128Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Float128Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Float128Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Float128Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Float128Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Float128Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public FloatSpecies vspecies() { return VSPECIES; } @@ -810,47 +811,98 @@ final class Float128Vector extends FloatVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Float128Shuffle IOTA = new Float128Shuffle(IDENTITY); @Override @ForceInline public Float128Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Float128Shuffle.class, this, VLENGTH, - (s) -> ((Float128Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Float128Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int128Vector toBitsVector() { + return (Int128Vector) super.toBitsVectorTemplate(); + } + + @Override + Int128Vector toBitsVector0() { + return ((Int128Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Float128Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Float128Shuffle.class, this, VLENGTH, - (s) -> ((Float128Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Float128Mask laneIsValid() { + return (Float128Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Float128Shuffle rearrange(VectorShuffle shuffle) { - Float128Shuffle s = (Float128Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Float128Shuffle rearrange(VectorShuffle shuffle) { + Float128Shuffle concreteShuffle = (Float128Shuffle) shuffle; + return (Float128Shuffle) toBitsVector().rearrange(concreteShuffle.cast(IntVector.SPECIES_128)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Float128Shuffle wrapIndexes() { + Int128Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int128Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int128Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Float128Shuffle(r); + return (Float128Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java index 5f5a26fd316..f81a9adda42 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java @@ -141,24 +141,15 @@ final class Float256Vector extends FloatVector { @ForceInline Float256Shuffle iotaShuffle() { return Float256Shuffle.IOTA; } + @Override @ForceInline Float256Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Float256Shuffle)VectorSupport.shuffleIota(ETYPE, Float256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Float256Shuffle)VectorSupport.shuffleIota(ETYPE, Float256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Float256Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Float256Shuffle shuffleFromBytes(byte[] reorder) { return new Float256Shuffle(reorder); } - - @Override - @ForceInline - Float256Shuffle shuffleFromArray(int[] indexes, int i) { return new Float256Shuffle(indexes, i); } + Float256Shuffle shuffleFromArray(int[] indices, int i) { return new Float256Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Float256Vector extends FloatVector { return (long) super.reduceLanesTemplate(op, Float256Mask.class, (Float256Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Float256Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Float256Shuffle toShuffle() { + return (Float256Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -792,25 +790,28 @@ final class Float256Vector extends FloatVector { static final class Float256Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = float.class; // used by the JVM + static final Class ETYPE = int.class; // used by the JVM - Float256Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Float256Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Float256Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Float256Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Float256Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Float256Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Float256Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public FloatSpecies vspecies() { return VSPECIES; } @@ -818,47 +819,98 @@ final class Float256Vector extends FloatVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Float256Shuffle IOTA = new Float256Shuffle(IDENTITY); @Override @ForceInline public Float256Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Float256Shuffle.class, this, VLENGTH, - (s) -> ((Float256Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Float256Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int256Vector toBitsVector() { + return (Int256Vector) super.toBitsVectorTemplate(); + } + + @Override + Int256Vector toBitsVector0() { + return ((Int256Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Float256Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Float256Shuffle.class, this, VLENGTH, - (s) -> ((Float256Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Float256Mask laneIsValid() { + return (Float256Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Float256Shuffle rearrange(VectorShuffle shuffle) { - Float256Shuffle s = (Float256Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Float256Shuffle rearrange(VectorShuffle shuffle) { + Float256Shuffle concreteShuffle = (Float256Shuffle) shuffle; + return (Float256Shuffle) toBitsVector().rearrange(concreteShuffle.cast(IntVector.SPECIES_256)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Float256Shuffle wrapIndexes() { + Int256Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int256Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int256Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Float256Shuffle(r); + return (Float256Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java index f8c191ea016..1cf6afca4b4 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java @@ -141,24 +141,15 @@ final class Float512Vector extends FloatVector { @ForceInline Float512Shuffle iotaShuffle() { return Float512Shuffle.IOTA; } + @Override @ForceInline Float512Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Float512Shuffle)VectorSupport.shuffleIota(ETYPE, Float512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Float512Shuffle)VectorSupport.shuffleIota(ETYPE, Float512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Float512Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Float512Shuffle shuffleFromBytes(byte[] reorder) { return new Float512Shuffle(reorder); } - - @Override - @ForceInline - Float512Shuffle shuffleFromArray(int[] indexes, int i) { return new Float512Shuffle(indexes, i); } + Float512Shuffle shuffleFromArray(int[] indices, int i) { return new Float512Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Float512Vector extends FloatVector { return (long) super.reduceLanesTemplate(op, Float512Mask.class, (Float512Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Float512Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Float512Shuffle toShuffle() { + return (Float512Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -808,25 +806,28 @@ final class Float512Vector extends FloatVector { static final class Float512Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = float.class; // used by the JVM + static final Class ETYPE = int.class; // used by the JVM - Float512Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Float512Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Float512Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Float512Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Float512Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Float512Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Float512Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public FloatSpecies vspecies() { return VSPECIES; } @@ -834,47 +835,98 @@ final class Float512Vector extends FloatVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Float512Shuffle IOTA = new Float512Shuffle(IDENTITY); @Override @ForceInline public Float512Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Float512Shuffle.class, this, VLENGTH, - (s) -> ((Float512Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Float512Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int512Vector toBitsVector() { + return (Int512Vector) super.toBitsVectorTemplate(); + } + + @Override + Int512Vector toBitsVector0() { + return ((Int512Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Float512Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Float512Shuffle.class, this, VLENGTH, - (s) -> ((Float512Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Float512Mask laneIsValid() { + return (Float512Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Float512Shuffle rearrange(VectorShuffle shuffle) { - Float512Shuffle s = (Float512Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Float512Shuffle rearrange(VectorShuffle shuffle) { + Float512Shuffle concreteShuffle = (Float512Shuffle) shuffle; + return (Float512Shuffle) toBitsVector().rearrange(concreteShuffle.cast(IntVector.SPECIES_512)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Float512Shuffle wrapIndexes() { + Int512Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int512Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int512Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Float512Shuffle(r); + return (Float512Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java index 9496e598868..1974b93d0ae 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java @@ -141,24 +141,15 @@ final class Float64Vector extends FloatVector { @ForceInline Float64Shuffle iotaShuffle() { return Float64Shuffle.IOTA; } + @Override @ForceInline Float64Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Float64Shuffle)VectorSupport.shuffleIota(ETYPE, Float64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Float64Shuffle)VectorSupport.shuffleIota(ETYPE, Float64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Float64Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Float64Shuffle shuffleFromBytes(byte[] reorder) { return new Float64Shuffle(reorder); } - - @Override - @ForceInline - Float64Shuffle shuffleFromArray(int[] indexes, int i) { return new Float64Shuffle(indexes, i); } + Float64Shuffle shuffleFromArray(int[] indices, int i) { return new Float64Shuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class Float64Vector extends FloatVector { return (long) super.reduceLanesTemplate(op, Float64Mask.class, (Float64Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Float64Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final Float64Shuffle toShuffle() { + return (Float64Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -780,25 +778,28 @@ final class Float64Vector extends FloatVector { static final class Float64Shuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = float.class; // used by the JVM + static final Class ETYPE = int.class; // used by the JVM - Float64Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Float64Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Float64Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Float64Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Float64Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Float64Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Float64Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public FloatSpecies vspecies() { return VSPECIES; } @@ -806,47 +807,98 @@ final class Float64Vector extends FloatVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Float64Shuffle IOTA = new Float64Shuffle(IDENTITY); @Override @ForceInline public Float64Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Float64Shuffle.class, this, VLENGTH, - (s) -> ((Float64Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (Float64Vector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int64Vector toBitsVector() { + return (Int64Vector) super.toBitsVectorTemplate(); + } + + @Override + Int64Vector toBitsVector0() { + return ((Int64Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Float64Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Float64Shuffle.class, this, VLENGTH, - (s) -> ((Float64Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Float64Mask laneIsValid() { + return (Float64Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Float64Shuffle rearrange(VectorShuffle shuffle) { - Float64Shuffle s = (Float64Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Float64Shuffle rearrange(VectorShuffle shuffle) { + Float64Shuffle concreteShuffle = (Float64Shuffle) shuffle; + return (Float64Shuffle) toBitsVector().rearrange(concreteShuffle.cast(IntVector.SPECIES_64)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Float64Shuffle wrapIndexes() { + Int64Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int64Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int64Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Float64Shuffle(r); + return (Float64Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java index 6f093957262..14981d02151 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java @@ -141,24 +141,15 @@ final class FloatMaxVector extends FloatVector { @ForceInline FloatMaxShuffle iotaShuffle() { return FloatMaxShuffle.IOTA; } + @Override @ForceInline FloatMaxShuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (FloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, FloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (FloatMaxShuffle)VectorSupport.shuffleIota(ETYPE, FloatMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (FloatMaxShuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - FloatMaxShuffle shuffleFromBytes(byte[] reorder) { return new FloatMaxShuffle(reorder); } - - @Override - @ForceInline - FloatMaxShuffle shuffleFromArray(int[] indexes, int i) { return new FloatMaxShuffle(indexes, i); } + FloatMaxShuffle shuffleFromArray(int[] indices, int i) { return new FloatMaxShuffle(indices, i); } @Override @ForceInline @@ -344,9 +335,16 @@ final class FloatMaxVector extends FloatVector { return (long) super.reduceLanesTemplate(op, FloatMaxMask.class, (FloatMaxMask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(FloatMaxShuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + throw new AssertionError(); + } + + @Override + @ForceInline + public final FloatMaxShuffle toShuffle() { + return (FloatMaxShuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -777,25 +775,28 @@ final class FloatMaxVector extends FloatVector { static final class FloatMaxShuffle extends AbstractShuffle { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class ETYPE = float.class; // used by the JVM + static final Class ETYPE = int.class; // used by the JVM - FloatMaxShuffle(byte[] reorder) { - super(VLENGTH, reorder); + FloatMaxShuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public FloatMaxShuffle(int[] reorder) { - super(VLENGTH, reorder); + FloatMaxShuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public FloatMaxShuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + FloatMaxShuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public FloatMaxShuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public FloatSpecies vspecies() { return VSPECIES; } @@ -803,47 +804,98 @@ final class FloatMaxVector extends FloatVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final FloatMaxShuffle IOTA = new FloatMaxShuffle(IDENTITY); @Override @ForceInline public FloatMaxVector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, FloatMaxShuffle.class, this, VLENGTH, - (s) -> ((FloatMaxVector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return (FloatMaxVector) toBitsVector().castShape(vspecies(), 0); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + IntMaxVector toBitsVector() { + return (IntMaxVector) super.toBitsVectorTemplate(); + } + + @Override + IntMaxVector toBitsVector0() { + return ((IntMaxVector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public FloatMaxShuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, FloatMaxShuffle.class, this, VLENGTH, - (s) -> ((FloatMaxShuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final FloatMaxMask laneIsValid() { + return (FloatMaxMask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public FloatMaxShuffle rearrange(VectorShuffle shuffle) { - FloatMaxShuffle s = (FloatMaxShuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final FloatMaxShuffle rearrange(VectorShuffle shuffle) { + FloatMaxShuffle concreteShuffle = (FloatMaxShuffle) shuffle; + return (FloatMaxShuffle) toBitsVector().rearrange(concreteShuffle.cast(IntVector.SPECIES_MAX)) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final FloatMaxShuffle wrapIndexes() { + IntMaxVector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (IntMaxVector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (IntMaxVector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new FloatMaxShuffle(r); + return (FloatMaxShuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java index a3686d467a7..ab056d1c397 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java @@ -966,7 +966,7 @@ public abstract class FloatVector extends AbstractVector { // and broadcast, but it would be more surprising not to continue // the obvious pattern started by unary and binary. - /** + /** * {@inheritDoc} * @see #lanewise(VectorOperators.Ternary,float,float,VectorMask) * @see #lanewise(VectorOperators.Ternary,Vector,float,VectorMask) @@ -2150,9 +2150,10 @@ public abstract class FloatVector extends AbstractVector { FloatVector that = (FloatVector) v1; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = IntVector.broadcast((IntVector.IntSpecies) vspecies().asIntegral(), (int)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2180,9 +2181,10 @@ public abstract class FloatVector extends AbstractVector { @ForceInline FloatVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((float)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = IntVector.broadcast((IntVector.IntSpecies) vspecies().asIntegral(), (int)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2201,10 +2203,10 @@ public abstract class FloatVector extends AbstractVector { FloatVector that = (FloatVector) w; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast((float)(origin)))); - iota = iotaShuffle(-origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = IntVector.broadcast((IntVector.IntSpecies) vspecies().asIntegral(), (int)origin); + VectorMask blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2241,10 +2243,10 @@ public abstract class FloatVector extends AbstractVector { FloatVector unsliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((float)(origin)))); - iota = iotaShuffle(-origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = IntVector.broadcast((IntVector.IntSpecies) vspecies().asIntegral(), (int)origin); + VectorMask blendMask = iotaVector.compare(VectorOperators.GE, filter).cast(vspecies()); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2267,13 +2269,11 @@ public abstract class FloatVector extends AbstractVector { final > FloatVector rearrangeTemplate(Class shuffletype, S shuffle) { - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, null, float.class, length(), - this, ws, null, + this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); } @@ -2296,13 +2296,11 @@ public abstract class FloatVector extends AbstractVector { M m) { m.check(masktype, this); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, masktype, float.class, length(), - this, ws, m, + this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return !m_.laneIsSet(i) ? 0 : v1.lane(ei); })); } @@ -2323,48 +2321,43 @@ public abstract class FloatVector extends AbstractVector { S shuffle, FloatVector v) { VectorMask valid = shuffle.laneIsValid(); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); FloatVector r0 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, float.class, length(), - this, ws, null, + this, shuffle, null, (v0, s_, m_) -> v0.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length()); return v0.lane(ei); })); FloatVector r1 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, float.class, length(), - v, ws, null, + v, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); return r1.blend(r0, valid); } + @Override @ForceInline - private final - VectorShuffle toShuffle0(FloatSpecies dsp) { - float[] a = toArray(); - int[] sa = new int[a.length]; - for (int i = 0; i < a.length; i++) { - sa[i] = (int) a[i]; - } - return VectorShuffle.fromArray(dsp, sa, 0); + final VectorShuffle bitsToShuffle0(AbstractSpecies dsp) { + throw new AssertionError(); } - /*package-private*/ @ForceInline - final - VectorShuffle toShuffleTemplate(Class shuffleType) { - FloatSpecies vsp = vspecies(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), float.class, length(), - shuffleType, byte.class, length(), - this, vsp, - FloatVector::toShuffle0); + final + VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap) { + assert(dsp.elementSize() == vspecies().elementSize()); + IntVector idx = convert(VectorOperators.F2I, 0).reinterpretAsInts(); + IntVector wrapped = idx.lanewise(VectorOperators.AND, length() - 1); + if (!wrap) { + IntVector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length()); + VectorMask inBound = wrapped.compare(VectorOperators.EQ, idx); + wrapped = wrappedEx.blend(wrapped, inBound); + } + return wrapped.bitsToShuffle(dsp); } /** @@ -3681,9 +3674,10 @@ public abstract class FloatVector extends AbstractVector { private FloatSpecies(VectorShape shape, Class vectorType, Class> maskType, + Class> shuffleType, Function vectorFactory) { super(shape, LaneType.of(float.class), - vectorType, maskType, + vectorType, maskType, shuffleType, vectorFactory); assert(this.elementSize() == Float.SIZE); } @@ -3969,6 +3963,7 @@ public abstract class FloatVector extends AbstractVector { = new FloatSpecies(VectorShape.S_64_BIT, Float64Vector.class, Float64Vector.Float64Mask.class, + Float64Vector.Float64Shuffle.class, Float64Vector::new); /** Species representing {@link FloatVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ @@ -3976,6 +3971,7 @@ public abstract class FloatVector extends AbstractVector { = new FloatSpecies(VectorShape.S_128_BIT, Float128Vector.class, Float128Vector.Float128Mask.class, + Float128Vector.Float128Shuffle.class, Float128Vector::new); /** Species representing {@link FloatVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ @@ -3983,6 +3979,7 @@ public abstract class FloatVector extends AbstractVector { = new FloatSpecies(VectorShape.S_256_BIT, Float256Vector.class, Float256Vector.Float256Mask.class, + Float256Vector.Float256Shuffle.class, Float256Vector::new); /** Species representing {@link FloatVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ @@ -3990,6 +3987,7 @@ public abstract class FloatVector extends AbstractVector { = new FloatSpecies(VectorShape.S_512_BIT, Float512Vector.class, Float512Vector.Float512Mask.class, + Float512Vector.Float512Shuffle.class, Float512Vector::new); /** Species representing {@link FloatVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ @@ -3997,6 +3995,7 @@ public abstract class FloatVector extends AbstractVector { = new FloatSpecies(VectorShape.S_Max_BIT, FloatMaxVector.class, FloatMaxVector.FloatMaxMask.class, + FloatMaxVector.FloatMaxShuffle.class, FloatMaxVector::new); /** diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java index 4aa1e8044b0..a4660dd94c6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java @@ -141,24 +141,15 @@ final class Int128Vector extends IntVector { @ForceInline Int128Shuffle iotaShuffle() { return Int128Shuffle.IOTA; } + @Override @ForceInline Int128Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Int128Shuffle)VectorSupport.shuffleIota(ETYPE, Int128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Int128Shuffle)VectorSupport.shuffleIota(ETYPE, Int128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Int128Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Int128Shuffle shuffleFromBytes(byte[] reorder) { return new Int128Shuffle(reorder); } - - @Override - @ForceInline - Int128Shuffle shuffleFromArray(int[] indexes, int i) { return new Int128Shuffle(indexes, i); } + Int128Shuffle shuffleFromArray(int[] indices, int i) { return new Int128Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Int128Vector extends IntVector { return (long) super.reduceLanesTemplate(op, Int128Mask.class, (Int128Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Int128Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Int128Shuffle toShuffle() { + return (Int128Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -797,23 +795,26 @@ final class Int128Vector extends IntVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = int.class; // used by the JVM - Int128Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Int128Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Int128Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Int128Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Int128Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Int128Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Int128Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public IntSpecies vspecies() { return VSPECIES; } @@ -821,47 +822,98 @@ final class Int128Vector extends IntVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Int128Shuffle IOTA = new Int128Shuffle(IDENTITY); @Override @ForceInline public Int128Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Int128Shuffle.class, this, VLENGTH, - (s) -> ((Int128Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int128Vector toBitsVector() { + return (Int128Vector) super.toBitsVectorTemplate(); + } + + @Override + Int128Vector toBitsVector0() { + return ((Int128Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Int128Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Int128Shuffle.class, this, VLENGTH, - (s) -> ((Int128Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Int128Mask laneIsValid() { + return (Int128Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Int128Shuffle rearrange(VectorShuffle shuffle) { - Int128Shuffle s = (Int128Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Int128Shuffle rearrange(VectorShuffle shuffle) { + Int128Shuffle concreteShuffle = (Int128Shuffle) shuffle; + return (Int128Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Int128Shuffle wrapIndexes() { + Int128Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int128Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int128Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Int128Shuffle(r); + return (Int128Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java index 753f96f216f..284ee1cebca 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java @@ -141,24 +141,15 @@ final class Int256Vector extends IntVector { @ForceInline Int256Shuffle iotaShuffle() { return Int256Shuffle.IOTA; } + @Override @ForceInline Int256Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Int256Shuffle)VectorSupport.shuffleIota(ETYPE, Int256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Int256Shuffle)VectorSupport.shuffleIota(ETYPE, Int256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Int256Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Int256Shuffle shuffleFromBytes(byte[] reorder) { return new Int256Shuffle(reorder); } - - @Override - @ForceInline - Int256Shuffle shuffleFromArray(int[] indexes, int i) { return new Int256Shuffle(indexes, i); } + Int256Shuffle shuffleFromArray(int[] indices, int i) { return new Int256Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Int256Vector extends IntVector { return (long) super.reduceLanesTemplate(op, Int256Mask.class, (Int256Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Int256Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Int256Shuffle toShuffle() { + return (Int256Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -805,23 +803,26 @@ final class Int256Vector extends IntVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = int.class; // used by the JVM - Int256Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Int256Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Int256Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Int256Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Int256Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Int256Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Int256Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public IntSpecies vspecies() { return VSPECIES; } @@ -829,47 +830,98 @@ final class Int256Vector extends IntVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Int256Shuffle IOTA = new Int256Shuffle(IDENTITY); @Override @ForceInline public Int256Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Int256Shuffle.class, this, VLENGTH, - (s) -> ((Int256Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int256Vector toBitsVector() { + return (Int256Vector) super.toBitsVectorTemplate(); + } + + @Override + Int256Vector toBitsVector0() { + return ((Int256Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Int256Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Int256Shuffle.class, this, VLENGTH, - (s) -> ((Int256Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Int256Mask laneIsValid() { + return (Int256Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Int256Shuffle rearrange(VectorShuffle shuffle) { - Int256Shuffle s = (Int256Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Int256Shuffle rearrange(VectorShuffle shuffle) { + Int256Shuffle concreteShuffle = (Int256Shuffle) shuffle; + return (Int256Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Int256Shuffle wrapIndexes() { + Int256Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int256Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int256Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Int256Shuffle(r); + return (Int256Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java index 8e6ed6fc882..f0c8ddabfae 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java @@ -141,24 +141,15 @@ final class Int512Vector extends IntVector { @ForceInline Int512Shuffle iotaShuffle() { return Int512Shuffle.IOTA; } + @Override @ForceInline Int512Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Int512Shuffle)VectorSupport.shuffleIota(ETYPE, Int512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Int512Shuffle)VectorSupport.shuffleIota(ETYPE, Int512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Int512Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Int512Shuffle shuffleFromBytes(byte[] reorder) { return new Int512Shuffle(reorder); } - - @Override - @ForceInline - Int512Shuffle shuffleFromArray(int[] indexes, int i) { return new Int512Shuffle(indexes, i); } + Int512Shuffle shuffleFromArray(int[] indices, int i) { return new Int512Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Int512Vector extends IntVector { return (long) super.reduceLanesTemplate(op, Int512Mask.class, (Int512Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Int512Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Int512Shuffle toShuffle() { + return (Int512Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -821,23 +819,26 @@ final class Int512Vector extends IntVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = int.class; // used by the JVM - Int512Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Int512Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Int512Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Int512Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Int512Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Int512Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Int512Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public IntSpecies vspecies() { return VSPECIES; } @@ -845,47 +846,98 @@ final class Int512Vector extends IntVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Int512Shuffle IOTA = new Int512Shuffle(IDENTITY); @Override @ForceInline public Int512Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Int512Shuffle.class, this, VLENGTH, - (s) -> ((Int512Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int512Vector toBitsVector() { + return (Int512Vector) super.toBitsVectorTemplate(); + } + + @Override + Int512Vector toBitsVector0() { + return ((Int512Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Int512Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Int512Shuffle.class, this, VLENGTH, - (s) -> ((Int512Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Int512Mask laneIsValid() { + return (Int512Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Int512Shuffle rearrange(VectorShuffle shuffle) { - Int512Shuffle s = (Int512Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Int512Shuffle rearrange(VectorShuffle shuffle) { + Int512Shuffle concreteShuffle = (Int512Shuffle) shuffle; + return (Int512Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Int512Shuffle wrapIndexes() { + Int512Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int512Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int512Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Int512Shuffle(r); + return (Int512Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java index 98cd39d9beb..27963859a09 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java @@ -141,24 +141,15 @@ final class Int64Vector extends IntVector { @ForceInline Int64Shuffle iotaShuffle() { return Int64Shuffle.IOTA; } + @Override @ForceInline Int64Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Int64Shuffle)VectorSupport.shuffleIota(ETYPE, Int64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Int64Shuffle)VectorSupport.shuffleIota(ETYPE, Int64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Int64Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Int64Shuffle shuffleFromBytes(byte[] reorder) { return new Int64Shuffle(reorder); } - - @Override - @ForceInline - Int64Shuffle shuffleFromArray(int[] indexes, int i) { return new Int64Shuffle(indexes, i); } + Int64Shuffle shuffleFromArray(int[] indices, int i) { return new Int64Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Int64Vector extends IntVector { return (long) super.reduceLanesTemplate(op, Int64Mask.class, (Int64Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Int64Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Int64Shuffle toShuffle() { + return (Int64Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -793,23 +791,26 @@ final class Int64Vector extends IntVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = int.class; // used by the JVM - Int64Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Int64Shuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Int64Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Int64Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Int64Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Int64Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Int64Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public IntSpecies vspecies() { return VSPECIES; } @@ -817,47 +818,98 @@ final class Int64Vector extends IntVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final Int64Shuffle IOTA = new Int64Shuffle(IDENTITY); @Override @ForceInline public Int64Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Int64Shuffle.class, this, VLENGTH, - (s) -> ((Int64Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Int64Vector toBitsVector() { + return (Int64Vector) super.toBitsVectorTemplate(); + } + + @Override + Int64Vector toBitsVector0() { + return ((Int64Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Int64Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Int64Shuffle.class, this, VLENGTH, - (s) -> ((Int64Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final Int64Mask laneIsValid() { + return (Int64Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Int64Shuffle rearrange(VectorShuffle shuffle) { - Int64Shuffle s = (Int64Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Int64Shuffle rearrange(VectorShuffle shuffle) { + Int64Shuffle concreteShuffle = (Int64Shuffle) shuffle; + return (Int64Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Int64Shuffle wrapIndexes() { + Int64Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Int64Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Int64Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Int64Shuffle(r); + return (Int64Shuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java index f301161b980..d5140567ca0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java @@ -141,24 +141,15 @@ final class IntMaxVector extends IntVector { @ForceInline IntMaxShuffle iotaShuffle() { return IntMaxShuffle.IOTA; } + @Override @ForceInline IntMaxShuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (IntMaxShuffle)VectorSupport.shuffleIota(ETYPE, IntMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (IntMaxShuffle)VectorSupport.shuffleIota(ETYPE, IntMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (IntMaxShuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - IntMaxShuffle shuffleFromBytes(byte[] reorder) { return new IntMaxShuffle(reorder); } - - @Override - @ForceInline - IntMaxShuffle shuffleFromArray(int[] indexes, int i) { return new IntMaxShuffle(indexes, i); } + IntMaxShuffle shuffleFromArray(int[] indices, int i) { return new IntMaxShuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class IntMaxVector extends IntVector { return (long) super.reduceLanesTemplate(op, IntMaxMask.class, (IntMaxMask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(IntMaxShuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final IntMaxShuffle toShuffle() { + return (IntMaxShuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -802,23 +800,26 @@ final class IntMaxVector extends IntVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = int.class; // used by the JVM - IntMaxShuffle(byte[] reorder) { - super(VLENGTH, reorder); + IntMaxShuffle(int[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public IntMaxShuffle(int[] reorder) { - super(VLENGTH, reorder); + IntMaxShuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public IntMaxShuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + IntMaxShuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public IntMaxShuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + int[] indices() { + return (int[])getPayload(); } @Override + @ForceInline public IntSpecies vspecies() { return VSPECIES; } @@ -826,47 +827,98 @@ final class IntMaxVector extends IntVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Integer.MAX_VALUE); + assert(Integer.MIN_VALUE <= -VLENGTH); } static final IntMaxShuffle IOTA = new IntMaxShuffle(IDENTITY); @Override @ForceInline public IntMaxVector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, IntMaxShuffle.class, this, VLENGTH, - (s) -> ((IntMaxVector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + IntMaxVector toBitsVector() { + return (IntMaxVector) super.toBitsVectorTemplate(); + } + + @Override + IntMaxVector toBitsVector0() { + return ((IntMaxVector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public IntMaxShuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, IntMaxShuffle.class, this, VLENGTH, - (s) -> ((IntMaxShuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + toBitsVector().intoArray(a, offset); + } + + @Override + @ForceInline + public final IntMaxMask laneIsValid() { + return (IntMaxMask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public IntMaxShuffle rearrange(VectorShuffle shuffle) { - IntMaxShuffle s = (IntMaxShuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final IntMaxShuffle rearrange(VectorShuffle shuffle) { + IntMaxShuffle concreteShuffle = (IntMaxShuffle) shuffle; + return (IntMaxShuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final IntMaxShuffle wrapIndexes() { + IntMaxVector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (IntMaxVector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (IntMaxVector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new IntMaxShuffle(r); + return (IntMaxShuffle) v.toShuffle(vspecies(), false); + } + + private static int[] prepare(int[] indices, int offset) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static int[] prepare(IntUnaryOperator f) { + int[] a = new int[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (int)si; + } + return a; + } + + private static boolean indicesInRange(int[] indices) { + int length = indices.length; + for (int si : indices) { + if (si >= (int)length || si < (int)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java index 390c8026083..063b0c0d8e8 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java @@ -1099,7 +1099,7 @@ public abstract class IntVector extends AbstractVector { // and broadcast, but it would be more surprising not to continue // the obvious pattern started by unary and binary. - /** + /** * {@inheritDoc} * @see #lanewise(VectorOperators.Ternary,int,int,VectorMask) * @see #lanewise(VectorOperators.Ternary,Vector,int,VectorMask) @@ -2293,9 +2293,10 @@ public abstract class IntVector extends AbstractVector { IntVector that = (IntVector) v1; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((int)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = broadcast((int)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2323,9 +2324,10 @@ public abstract class IntVector extends AbstractVector { @ForceInline IntVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((int)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = broadcast((int)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2344,10 +2346,10 @@ public abstract class IntVector extends AbstractVector { IntVector that = (IntVector) w; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast((int)(origin)))); - iota = iotaShuffle(-origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = broadcast((int)origin); + VectorMask blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2384,10 +2386,10 @@ public abstract class IntVector extends AbstractVector { IntVector unsliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((int)(origin)))); - iota = iotaShuffle(-origin, 1, true); + IntVector iotaVector = (IntVector) iotaShuffle().toBitsVector(); + IntVector filter = broadcast((int)origin); + VectorMask blendMask = iotaVector.compare(VectorOperators.GE, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2410,13 +2412,11 @@ public abstract class IntVector extends AbstractVector { final > IntVector rearrangeTemplate(Class shuffletype, S shuffle) { - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, null, int.class, length(), - this, ws, null, + this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); } @@ -2439,13 +2439,11 @@ public abstract class IntVector extends AbstractVector { M m) { m.check(masktype, this); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, masktype, int.class, length(), - this, ws, m, + this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return !m_.laneIsSet(i) ? 0 : v1.lane(ei); })); } @@ -2466,30 +2464,29 @@ public abstract class IntVector extends AbstractVector { S shuffle, IntVector v) { VectorMask valid = shuffle.laneIsValid(); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); IntVector r0 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, int.class, length(), - this, ws, null, + this, shuffle, null, (v0, s_, m_) -> v0.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length()); return v0.lane(ei); })); IntVector r1 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, int.class, length(), - v, ws, null, + v, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); return r1.blend(r0, valid); } + @Override @ForceInline - private final - VectorShuffle toShuffle0(IntSpecies dsp) { + final VectorShuffle bitsToShuffle0(AbstractSpecies dsp) { + assert(dsp.length() == vspecies().length()); int[] a = toArray(); int[] sa = new int[a.length]; for (int i = 0; i < a.length; i++) { @@ -2498,16 +2495,18 @@ public abstract class IntVector extends AbstractVector { return VectorShuffle.fromArray(dsp, sa, 0); } - /*package-private*/ @ForceInline - final - VectorShuffle toShuffleTemplate(Class shuffleType) { - IntSpecies vsp = vspecies(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), int.class, length(), - shuffleType, byte.class, length(), - this, vsp, - IntVector::toShuffle0); + final + VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap) { + assert(dsp.elementSize() == vspecies().elementSize()); + IntVector idx = this; + IntVector wrapped = idx.lanewise(VectorOperators.AND, length() - 1); + if (!wrap) { + IntVector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length()); + VectorMask inBound = wrapped.compare(VectorOperators.EQ, idx); + wrapped = wrappedEx.blend(wrapped, inBound); + } + return wrapped.bitsToShuffle(dsp); } /** @@ -3849,9 +3848,10 @@ public abstract class IntVector extends AbstractVector { private IntSpecies(VectorShape shape, Class vectorType, Class> maskType, + Class> shuffleType, Function vectorFactory) { super(shape, LaneType.of(int.class), - vectorType, maskType, + vectorType, maskType, shuffleType, vectorFactory); assert(this.elementSize() == Integer.SIZE); } @@ -4137,6 +4137,7 @@ public abstract class IntVector extends AbstractVector { = new IntSpecies(VectorShape.S_64_BIT, Int64Vector.class, Int64Vector.Int64Mask.class, + Int64Vector.Int64Shuffle.class, Int64Vector::new); /** Species representing {@link IntVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ @@ -4144,6 +4145,7 @@ public abstract class IntVector extends AbstractVector { = new IntSpecies(VectorShape.S_128_BIT, Int128Vector.class, Int128Vector.Int128Mask.class, + Int128Vector.Int128Shuffle.class, Int128Vector::new); /** Species representing {@link IntVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ @@ -4151,6 +4153,7 @@ public abstract class IntVector extends AbstractVector { = new IntSpecies(VectorShape.S_256_BIT, Int256Vector.class, Int256Vector.Int256Mask.class, + Int256Vector.Int256Shuffle.class, Int256Vector::new); /** Species representing {@link IntVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ @@ -4158,6 +4161,7 @@ public abstract class IntVector extends AbstractVector { = new IntSpecies(VectorShape.S_512_BIT, Int512Vector.class, Int512Vector.Int512Mask.class, + Int512Vector.Int512Shuffle.class, Int512Vector::new); /** Species representing {@link IntVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ @@ -4165,6 +4169,7 @@ public abstract class IntVector extends AbstractVector { = new IntSpecies(VectorShape.S_Max_BIT, IntMaxVector.class, IntMaxVector.IntMaxMask.class, + IntMaxVector.IntMaxShuffle.class, IntMaxVector::new); /** diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java index c65816a4d6c..8b57f1c498c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java @@ -136,24 +136,15 @@ final class Long128Vector extends LongVector { @ForceInline Long128Shuffle iotaShuffle() { return Long128Shuffle.IOTA; } + @Override @ForceInline Long128Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Long128Shuffle)VectorSupport.shuffleIota(ETYPE, Long128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Long128Shuffle)VectorSupport.shuffleIota(ETYPE, Long128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Long128Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Long128Shuffle shuffleFromBytes(byte[] reorder) { return new Long128Shuffle(reorder); } - - @Override - @ForceInline - Long128Shuffle shuffleFromArray(int[] indexes, int i) { return new Long128Shuffle(indexes, i); } + Long128Shuffle shuffleFromArray(int[] indices, int i) { return new Long128Shuffle(indices, i); } @Override @ForceInline @@ -352,9 +343,16 @@ final class Long128Vector extends LongVector { return (long) super.reduceLanesTemplate(op, Long128Mask.class, (Long128Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Long128Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Long128Shuffle toShuffle() { + return (Long128Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -783,23 +781,26 @@ final class Long128Vector extends LongVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = long.class; // used by the JVM - Long128Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Long128Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Long128Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Long128Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Long128Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Long128Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Long128Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public LongSpecies vspecies() { return VSPECIES; } @@ -807,47 +808,122 @@ final class Long128Vector extends LongVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Long128Shuffle IOTA = new Long128Shuffle(IDENTITY); @Override @ForceInline public Long128Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Long128Shuffle.class, this, VLENGTH, - (s) -> ((Long128Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long128Vector toBitsVector() { + return (Long128Vector) super.toBitsVectorTemplate(); + } + + @Override + Long128Vector toBitsVector0() { + return ((Long128Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Long128Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Long128Shuffle.class, this, VLENGTH, - (s) -> ((Long128Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Long128Shuffle rearrange(VectorShuffle shuffle) { - Long128Shuffle s = (Long128Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Long128Shuffle(r); + } + + @Override + @ForceInline + public final Long128Mask laneIsValid() { + return (Long128Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Long128Shuffle rearrange(VectorShuffle shuffle) { + Long128Shuffle concreteShuffle = (Long128Shuffle) shuffle; + return (Long128Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Long128Shuffle wrapIndexes() { + Long128Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long128Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long128Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Long128Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java index 7ca3e43e92b..3f808fe1a2f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java @@ -136,24 +136,15 @@ final class Long256Vector extends LongVector { @ForceInline Long256Shuffle iotaShuffle() { return Long256Shuffle.IOTA; } + @Override @ForceInline Long256Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Long256Shuffle)VectorSupport.shuffleIota(ETYPE, Long256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Long256Shuffle)VectorSupport.shuffleIota(ETYPE, Long256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Long256Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Long256Shuffle shuffleFromBytes(byte[] reorder) { return new Long256Shuffle(reorder); } - - @Override - @ForceInline - Long256Shuffle shuffleFromArray(int[] indexes, int i) { return new Long256Shuffle(indexes, i); } + Long256Shuffle shuffleFromArray(int[] indices, int i) { return new Long256Shuffle(indices, i); } @Override @ForceInline @@ -352,9 +343,16 @@ final class Long256Vector extends LongVector { return (long) super.reduceLanesTemplate(op, Long256Mask.class, (Long256Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Long256Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Long256Shuffle toShuffle() { + return (Long256Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -787,23 +785,26 @@ final class Long256Vector extends LongVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = long.class; // used by the JVM - Long256Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Long256Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Long256Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Long256Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Long256Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Long256Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Long256Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public LongSpecies vspecies() { return VSPECIES; } @@ -811,47 +812,122 @@ final class Long256Vector extends LongVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Long256Shuffle IOTA = new Long256Shuffle(IDENTITY); @Override @ForceInline public Long256Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Long256Shuffle.class, this, VLENGTH, - (s) -> ((Long256Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long256Vector toBitsVector() { + return (Long256Vector) super.toBitsVectorTemplate(); + } + + @Override + Long256Vector toBitsVector0() { + return ((Long256Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Long256Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Long256Shuffle.class, this, VLENGTH, - (s) -> ((Long256Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Long256Shuffle rearrange(VectorShuffle shuffle) { - Long256Shuffle s = (Long256Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Long256Shuffle(r); + } + + @Override + @ForceInline + public final Long256Mask laneIsValid() { + return (Long256Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Long256Shuffle rearrange(VectorShuffle shuffle) { + Long256Shuffle concreteShuffle = (Long256Shuffle) shuffle; + return (Long256Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Long256Shuffle wrapIndexes() { + Long256Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long256Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long256Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Long256Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java index 317cac1f110..3d5e00b09c6 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java @@ -136,24 +136,15 @@ final class Long512Vector extends LongVector { @ForceInline Long512Shuffle iotaShuffle() { return Long512Shuffle.IOTA; } + @Override @ForceInline Long512Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Long512Shuffle)VectorSupport.shuffleIota(ETYPE, Long512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Long512Shuffle)VectorSupport.shuffleIota(ETYPE, Long512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Long512Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Long512Shuffle shuffleFromBytes(byte[] reorder) { return new Long512Shuffle(reorder); } - - @Override - @ForceInline - Long512Shuffle shuffleFromArray(int[] indexes, int i) { return new Long512Shuffle(indexes, i); } + Long512Shuffle shuffleFromArray(int[] indices, int i) { return new Long512Shuffle(indices, i); } @Override @ForceInline @@ -352,9 +343,16 @@ final class Long512Vector extends LongVector { return (long) super.reduceLanesTemplate(op, Long512Mask.class, (Long512Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Long512Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Long512Shuffle toShuffle() { + return (Long512Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -795,23 +793,26 @@ final class Long512Vector extends LongVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = long.class; // used by the JVM - Long512Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Long512Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Long512Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Long512Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Long512Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Long512Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Long512Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public LongSpecies vspecies() { return VSPECIES; } @@ -819,47 +820,122 @@ final class Long512Vector extends LongVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Long512Shuffle IOTA = new Long512Shuffle(IDENTITY); @Override @ForceInline public Long512Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Long512Shuffle.class, this, VLENGTH, - (s) -> ((Long512Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long512Vector toBitsVector() { + return (Long512Vector) super.toBitsVectorTemplate(); + } + + @Override + Long512Vector toBitsVector0() { + return ((Long512Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Long512Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Long512Shuffle.class, this, VLENGTH, - (s) -> ((Long512Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Long512Shuffle rearrange(VectorShuffle shuffle) { - Long512Shuffle s = (Long512Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Long512Shuffle(r); + } + + @Override + @ForceInline + public final Long512Mask laneIsValid() { + return (Long512Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Long512Shuffle rearrange(VectorShuffle shuffle) { + Long512Shuffle concreteShuffle = (Long512Shuffle) shuffle; + return (Long512Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Long512Shuffle wrapIndexes() { + Long512Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long512Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long512Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Long512Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java index b13712595db..d34905dbce0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java @@ -136,24 +136,15 @@ final class Long64Vector extends LongVector { @ForceInline Long64Shuffle iotaShuffle() { return Long64Shuffle.IOTA; } + @Override @ForceInline Long64Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Long64Shuffle)VectorSupport.shuffleIota(ETYPE, Long64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Long64Shuffle)VectorSupport.shuffleIota(ETYPE, Long64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Long64Shuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - Long64Shuffle shuffleFromBytes(byte[] reorder) { return new Long64Shuffle(reorder); } - - @Override - @ForceInline - Long64Shuffle shuffleFromArray(int[] indexes, int i) { return new Long64Shuffle(indexes, i); } + Long64Shuffle shuffleFromArray(int[] indices, int i) { return new Long64Shuffle(indices, i); } @Override @ForceInline @@ -352,9 +343,16 @@ final class Long64Vector extends LongVector { return (long) super.reduceLanesTemplate(op, Long64Mask.class, (Long64Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Long64Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Long64Shuffle toShuffle() { + return (Long64Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -781,23 +779,26 @@ final class Long64Vector extends LongVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = long.class; // used by the JVM - Long64Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Long64Shuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Long64Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Long64Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Long64Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Long64Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Long64Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public LongSpecies vspecies() { return VSPECIES; } @@ -805,47 +806,122 @@ final class Long64Vector extends LongVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final Long64Shuffle IOTA = new Long64Shuffle(IDENTITY); @Override @ForceInline public Long64Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Long64Shuffle.class, this, VLENGTH, - (s) -> ((Long64Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Long64Vector toBitsVector() { + return (Long64Vector) super.toBitsVectorTemplate(); + } + + @Override + Long64Vector toBitsVector0() { + return ((Long64Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Long64Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Long64Shuffle.class, this, VLENGTH, - (s) -> ((Long64Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public Long64Shuffle rearrange(VectorShuffle shuffle) { - Long64Shuffle s = (Long64Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new Long64Shuffle(r); + } + + @Override + @ForceInline + public final Long64Mask laneIsValid() { + return (Long64Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final Long64Shuffle rearrange(VectorShuffle shuffle) { + Long64Shuffle concreteShuffle = (Long64Shuffle) shuffle; + return (Long64Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Long64Shuffle wrapIndexes() { + Long64Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Long64Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Long64Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (Long64Shuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java index 9edc442be88..84ff9d17444 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java @@ -136,24 +136,15 @@ final class LongMaxVector extends LongVector { @ForceInline LongMaxShuffle iotaShuffle() { return LongMaxShuffle.IOTA; } + @Override @ForceInline LongMaxShuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (LongMaxShuffle)VectorSupport.shuffleIota(ETYPE, LongMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (LongMaxShuffle)VectorSupport.shuffleIota(ETYPE, LongMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (LongMaxShuffle) iotaShuffleTemplate(start, step, wrap); } @Override @ForceInline - LongMaxShuffle shuffleFromBytes(byte[] reorder) { return new LongMaxShuffle(reorder); } - - @Override - @ForceInline - LongMaxShuffle shuffleFromArray(int[] indexes, int i) { return new LongMaxShuffle(indexes, i); } + LongMaxShuffle shuffleFromArray(int[] indices, int i) { return new LongMaxShuffle(indices, i); } @Override @ForceInline @@ -352,9 +343,16 @@ final class LongMaxVector extends LongVector { return (long) super.reduceLanesTemplate(op, LongMaxMask.class, (LongMaxMask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(LongMaxShuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final LongMaxShuffle toShuffle() { + return (LongMaxShuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -781,23 +779,26 @@ final class LongMaxVector extends LongVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = long.class; // used by the JVM - LongMaxShuffle(byte[] reorder) { - super(VLENGTH, reorder); + LongMaxShuffle(long[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public LongMaxShuffle(int[] reorder) { - super(VLENGTH, reorder); + LongMaxShuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public LongMaxShuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + LongMaxShuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public LongMaxShuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + long[] indices() { + return (long[])getPayload(); } @Override + @ForceInline public LongSpecies vspecies() { return VSPECIES; } @@ -805,47 +806,122 @@ final class LongMaxVector extends LongVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Long.MAX_VALUE); + assert(Long.MIN_VALUE <= -VLENGTH); } static final LongMaxShuffle IOTA = new LongMaxShuffle(IDENTITY); @Override @ForceInline public LongMaxVector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, LongMaxShuffle.class, this, VLENGTH, - (s) -> ((LongMaxVector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + LongMaxVector toBitsVector() { + return (LongMaxVector) super.toBitsVectorTemplate(); + } + + @Override + LongMaxVector toBitsVector0() { + return ((LongMaxVector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public LongMaxShuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, LongMaxShuffle.class, this, VLENGTH, - (s) -> ((LongMaxShuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } - @ForceInline @Override - public LongMaxShuffle rearrange(VectorShuffle shuffle) { - LongMaxShuffle s = (LongMaxShuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + @ForceInline + public void intoArray(int[] a, int offset) { + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new LongMaxShuffle(r); + } + + @Override + @ForceInline + public final LongMaxMask laneIsValid() { + return (LongMaxMask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final LongMaxShuffle rearrange(VectorShuffle shuffle) { + LongMaxShuffle concreteShuffle = (LongMaxShuffle) shuffle; + return (LongMaxShuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final LongMaxShuffle wrapIndexes() { + LongMaxVector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (LongMaxVector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (LongMaxVector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return (LongMaxShuffle) v.toShuffle(vspecies(), false); + } + + private static long[] prepare(int[] indices, int offset) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static long[] prepare(IntUnaryOperator f) { + long[] a = new long[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (long)si; + } + return a; + } + + private static boolean indicesInRange(long[] indices) { + int length = indices.length; + for (long si : indices) { + if (si >= (long)length || si < (long)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java index 43fedc2693b..cde68c8fb38 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java @@ -1012,7 +1012,7 @@ public abstract class LongVector extends AbstractVector { // and broadcast, but it would be more surprising not to continue // the obvious pattern started by unary and binary. - /** + /** * {@inheritDoc} * @see #lanewise(VectorOperators.Ternary,long,long,VectorMask) * @see #lanewise(VectorOperators.Ternary,Vector,long,VectorMask) @@ -2159,9 +2159,10 @@ public abstract class LongVector extends AbstractVector { LongVector that = (LongVector) v1; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((long)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = broadcast((long)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2189,9 +2190,10 @@ public abstract class LongVector extends AbstractVector { @ForceInline LongVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((long)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = broadcast((long)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2210,10 +2212,10 @@ public abstract class LongVector extends AbstractVector { LongVector that = (LongVector) w; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast((long)(origin)))); - iota = iotaShuffle(-origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = broadcast((long)origin); + VectorMask blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2250,10 +2252,10 @@ public abstract class LongVector extends AbstractVector { LongVector unsliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((long)(origin)))); - iota = iotaShuffle(-origin, 1, true); + LongVector iotaVector = (LongVector) iotaShuffle().toBitsVector(); + LongVector filter = broadcast((long)origin); + VectorMask blendMask = iotaVector.compare(VectorOperators.GE, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2276,13 +2278,11 @@ public abstract class LongVector extends AbstractVector { final > LongVector rearrangeTemplate(Class shuffletype, S shuffle) { - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, null, long.class, length(), - this, ws, null, + this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); } @@ -2305,13 +2305,11 @@ public abstract class LongVector extends AbstractVector { M m) { m.check(masktype, this); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, masktype, long.class, length(), - this, ws, m, + this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return !m_.laneIsSet(i) ? 0 : v1.lane(ei); })); } @@ -2332,30 +2330,29 @@ public abstract class LongVector extends AbstractVector { S shuffle, LongVector v) { VectorMask valid = shuffle.laneIsValid(); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); LongVector r0 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, long.class, length(), - this, ws, null, + this, shuffle, null, (v0, s_, m_) -> v0.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length()); return v0.lane(ei); })); LongVector r1 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, long.class, length(), - v, ws, null, + v, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); return r1.blend(r0, valid); } + @Override @ForceInline - private final - VectorShuffle toShuffle0(LongSpecies dsp) { + final VectorShuffle bitsToShuffle0(AbstractSpecies dsp) { + assert(dsp.length() == vspecies().length()); long[] a = toArray(); int[] sa = new int[a.length]; for (int i = 0; i < a.length; i++) { @@ -2364,16 +2361,18 @@ public abstract class LongVector extends AbstractVector { return VectorShuffle.fromArray(dsp, sa, 0); } - /*package-private*/ @ForceInline - final - VectorShuffle toShuffleTemplate(Class shuffleType) { - LongSpecies vsp = vspecies(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), long.class, length(), - shuffleType, byte.class, length(), - this, vsp, - LongVector::toShuffle0); + final + VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap) { + assert(dsp.elementSize() == vspecies().elementSize()); + LongVector idx = this; + LongVector wrapped = idx.lanewise(VectorOperators.AND, length() - 1); + if (!wrap) { + LongVector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length()); + VectorMask inBound = wrapped.compare(VectorOperators.EQ, idx); + wrapped = wrappedEx.blend(wrapped, inBound); + } + return wrapped.bitsToShuffle(dsp); } /** @@ -3784,9 +3783,10 @@ public abstract class LongVector extends AbstractVector { private LongSpecies(VectorShape shape, Class vectorType, Class> maskType, + Class> shuffleType, Function vectorFactory) { super(shape, LaneType.of(long.class), - vectorType, maskType, + vectorType, maskType, shuffleType, vectorFactory); assert(this.elementSize() == Long.SIZE); } @@ -4063,6 +4063,7 @@ public abstract class LongVector extends AbstractVector { = new LongSpecies(VectorShape.S_64_BIT, Long64Vector.class, Long64Vector.Long64Mask.class, + Long64Vector.Long64Shuffle.class, Long64Vector::new); /** Species representing {@link LongVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ @@ -4070,6 +4071,7 @@ public abstract class LongVector extends AbstractVector { = new LongSpecies(VectorShape.S_128_BIT, Long128Vector.class, Long128Vector.Long128Mask.class, + Long128Vector.Long128Shuffle.class, Long128Vector::new); /** Species representing {@link LongVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ @@ -4077,6 +4079,7 @@ public abstract class LongVector extends AbstractVector { = new LongSpecies(VectorShape.S_256_BIT, Long256Vector.class, Long256Vector.Long256Mask.class, + Long256Vector.Long256Shuffle.class, Long256Vector::new); /** Species representing {@link LongVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ @@ -4084,6 +4087,7 @@ public abstract class LongVector extends AbstractVector { = new LongSpecies(VectorShape.S_512_BIT, Long512Vector.class, Long512Vector.Long512Mask.class, + Long512Vector.Long512Shuffle.class, Long512Vector::new); /** Species representing {@link LongVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ @@ -4091,6 +4095,7 @@ public abstract class LongVector extends AbstractVector { = new LongSpecies(VectorShape.S_Max_BIT, LongMaxVector.class, LongMaxVector.LongMaxMask.class, + LongMaxVector.LongMaxShuffle.class, LongMaxVector::new); /** diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java index b013e4b2825..17ec4f72d2e 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java @@ -141,24 +141,15 @@ final class Short128Vector extends ShortVector { @ForceInline Short128Shuffle iotaShuffle() { return Short128Shuffle.IOTA; } + @Override @ForceInline Short128Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Short128Shuffle)VectorSupport.shuffleIota(ETYPE, Short128Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Short128Shuffle)VectorSupport.shuffleIota(ETYPE, Short128Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Short128Shuffle) iotaShuffleTemplate((short) start, (short) step, wrap); } @Override @ForceInline - Short128Shuffle shuffleFromBytes(byte[] reorder) { return new Short128Shuffle(reorder); } - - @Override - @ForceInline - Short128Shuffle shuffleFromArray(int[] indexes, int i) { return new Short128Shuffle(indexes, i); } + Short128Shuffle shuffleFromArray(int[] indices, int i) { return new Short128Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Short128Vector extends ShortVector { return (long) super.reduceLanesTemplate(op, Short128Mask.class, (Short128Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Short128Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Short128Shuffle toShuffle() { + return (Short128Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -805,23 +803,26 @@ final class Short128Vector extends ShortVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = short.class; // used by the JVM - Short128Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Short128Shuffle(short[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Short128Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Short128Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Short128Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Short128Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Short128Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + short[] indices() { + return (short[])getPayload(); } @Override + @ForceInline public ShortSpecies vspecies() { return VSPECIES; } @@ -829,47 +830,105 @@ final class Short128Vector extends ShortVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Short.MAX_VALUE); + assert(Short.MIN_VALUE <= -VLENGTH); } static final Short128Shuffle IOTA = new Short128Shuffle(IDENTITY); @Override @ForceInline public Short128Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Short128Shuffle.class, this, VLENGTH, - (s) -> ((Short128Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Short128Vector toBitsVector() { + return (Short128Vector) super.toBitsVectorTemplate(); + } + + @Override + Short128Vector toBitsVector0() { + return ((Short128Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Short128Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Short128Shuffle.class, this, VLENGTH, - (s) -> ((Short128Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_128; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.S2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.S2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + } + + @Override + @ForceInline + public final Short128Mask laneIsValid() { + return (Short128Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Short128Shuffle rearrange(VectorShuffle shuffle) { - Short128Shuffle s = (Short128Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Short128Shuffle rearrange(VectorShuffle shuffle) { + Short128Shuffle concreteShuffle = (Short128Shuffle) shuffle; + return (Short128Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Short128Shuffle wrapIndexes() { + Short128Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Short128Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Short128Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Short128Shuffle(r); + return (Short128Shuffle) v.toShuffle(vspecies(), false); + } + + private static short[] prepare(int[] indices, int offset) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static short[] prepare(IntUnaryOperator f) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static boolean indicesInRange(short[] indices) { + int length = indices.length; + for (short si : indices) { + if (si >= (short)length || si < (short)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java index af4c862eaf3..e7505779f18 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java @@ -141,24 +141,15 @@ final class Short256Vector extends ShortVector { @ForceInline Short256Shuffle iotaShuffle() { return Short256Shuffle.IOTA; } + @Override @ForceInline Short256Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Short256Shuffle)VectorSupport.shuffleIota(ETYPE, Short256Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Short256Shuffle)VectorSupport.shuffleIota(ETYPE, Short256Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Short256Shuffle) iotaShuffleTemplate((short) start, (short) step, wrap); } @Override @ForceInline - Short256Shuffle shuffleFromBytes(byte[] reorder) { return new Short256Shuffle(reorder); } - - @Override - @ForceInline - Short256Shuffle shuffleFromArray(int[] indexes, int i) { return new Short256Shuffle(indexes, i); } + Short256Shuffle shuffleFromArray(int[] indices, int i) { return new Short256Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Short256Vector extends ShortVector { return (long) super.reduceLanesTemplate(op, Short256Mask.class, (Short256Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Short256Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Short256Shuffle toShuffle() { + return (Short256Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -821,23 +819,26 @@ final class Short256Vector extends ShortVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = short.class; // used by the JVM - Short256Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Short256Shuffle(short[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Short256Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Short256Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Short256Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Short256Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Short256Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + short[] indices() { + return (short[])getPayload(); } @Override + @ForceInline public ShortSpecies vspecies() { return VSPECIES; } @@ -845,47 +846,105 @@ final class Short256Vector extends ShortVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Short.MAX_VALUE); + assert(Short.MIN_VALUE <= -VLENGTH); } static final Short256Shuffle IOTA = new Short256Shuffle(IDENTITY); @Override @ForceInline public Short256Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Short256Shuffle.class, this, VLENGTH, - (s) -> ((Short256Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Short256Vector toBitsVector() { + return (Short256Vector) super.toBitsVectorTemplate(); + } + + @Override + Short256Vector toBitsVector0() { + return ((Short256Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Short256Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Short256Shuffle.class, this, VLENGTH, - (s) -> ((Short256Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_256; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.S2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.S2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + } + + @Override + @ForceInline + public final Short256Mask laneIsValid() { + return (Short256Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Short256Shuffle rearrange(VectorShuffle shuffle) { - Short256Shuffle s = (Short256Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Short256Shuffle rearrange(VectorShuffle shuffle) { + Short256Shuffle concreteShuffle = (Short256Shuffle) shuffle; + return (Short256Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Short256Shuffle wrapIndexes() { + Short256Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Short256Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Short256Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Short256Shuffle(r); + return (Short256Shuffle) v.toShuffle(vspecies(), false); + } + + private static short[] prepare(int[] indices, int offset) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static short[] prepare(IntUnaryOperator f) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static boolean indicesInRange(short[] indices) { + int length = indices.length; + for (short si : indices) { + if (si >= (short)length || si < (short)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java index 3bb019f3b7c..7ca0d00b37a 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java @@ -141,24 +141,15 @@ final class Short512Vector extends ShortVector { @ForceInline Short512Shuffle iotaShuffle() { return Short512Shuffle.IOTA; } + @Override @ForceInline Short512Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Short512Shuffle)VectorSupport.shuffleIota(ETYPE, Short512Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Short512Shuffle)VectorSupport.shuffleIota(ETYPE, Short512Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Short512Shuffle) iotaShuffleTemplate((short) start, (short) step, wrap); } @Override @ForceInline - Short512Shuffle shuffleFromBytes(byte[] reorder) { return new Short512Shuffle(reorder); } - - @Override - @ForceInline - Short512Shuffle shuffleFromArray(int[] indexes, int i) { return new Short512Shuffle(indexes, i); } + Short512Shuffle shuffleFromArray(int[] indices, int i) { return new Short512Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Short512Vector extends ShortVector { return (long) super.reduceLanesTemplate(op, Short512Mask.class, (Short512Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Short512Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Short512Shuffle toShuffle() { + return (Short512Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -853,23 +851,26 @@ final class Short512Vector extends ShortVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = short.class; // used by the JVM - Short512Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Short512Shuffle(short[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Short512Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Short512Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Short512Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Short512Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Short512Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + short[] indices() { + return (short[])getPayload(); } @Override + @ForceInline public ShortSpecies vspecies() { return VSPECIES; } @@ -877,47 +878,105 @@ final class Short512Vector extends ShortVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Short.MAX_VALUE); + assert(Short.MIN_VALUE <= -VLENGTH); } static final Short512Shuffle IOTA = new Short512Shuffle(IDENTITY); @Override @ForceInline public Short512Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Short512Shuffle.class, this, VLENGTH, - (s) -> ((Short512Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Short512Vector toBitsVector() { + return (Short512Vector) super.toBitsVectorTemplate(); + } + + @Override + Short512Vector toBitsVector0() { + return ((Short512Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Short512Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Short512Shuffle.class, this, VLENGTH, - (s) -> ((Short512Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_512; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.S2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.S2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + } + + @Override + @ForceInline + public final Short512Mask laneIsValid() { + return (Short512Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Short512Shuffle rearrange(VectorShuffle shuffle) { - Short512Shuffle s = (Short512Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Short512Shuffle rearrange(VectorShuffle shuffle) { + Short512Shuffle concreteShuffle = (Short512Shuffle) shuffle; + return (Short512Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Short512Shuffle wrapIndexes() { + Short512Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Short512Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Short512Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Short512Shuffle(r); + return (Short512Shuffle) v.toShuffle(vspecies(), false); + } + + private static short[] prepare(int[] indices, int offset) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static short[] prepare(IntUnaryOperator f) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static boolean indicesInRange(short[] indices) { + int length = indices.length; + for (short si : indices) { + if (si >= (short)length || si < (short)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java index 905e313e95c..4837bd3d687 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java @@ -141,24 +141,15 @@ final class Short64Vector extends ShortVector { @ForceInline Short64Shuffle iotaShuffle() { return Short64Shuffle.IOTA; } + @Override @ForceInline Short64Shuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (Short64Shuffle)VectorSupport.shuffleIota(ETYPE, Short64Shuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (Short64Shuffle)VectorSupport.shuffleIota(ETYPE, Short64Shuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (Short64Shuffle) iotaShuffleTemplate((short) start, (short) step, wrap); } @Override @ForceInline - Short64Shuffle shuffleFromBytes(byte[] reorder) { return new Short64Shuffle(reorder); } - - @Override - @ForceInline - Short64Shuffle shuffleFromArray(int[] indexes, int i) { return new Short64Shuffle(indexes, i); } + Short64Shuffle shuffleFromArray(int[] indices, int i) { return new Short64Shuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class Short64Vector extends ShortVector { return (long) super.reduceLanesTemplate(op, Short64Mask.class, (Short64Mask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(Short64Shuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final Short64Shuffle toShuffle() { + return (Short64Shuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -797,23 +795,26 @@ final class Short64Vector extends ShortVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = short.class; // used by the JVM - Short64Shuffle(byte[] reorder) { - super(VLENGTH, reorder); + Short64Shuffle(short[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public Short64Shuffle(int[] reorder) { - super(VLENGTH, reorder); + Short64Shuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public Short64Shuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + Short64Shuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public Short64Shuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + short[] indices() { + return (short[])getPayload(); } @Override + @ForceInline public ShortSpecies vspecies() { return VSPECIES; } @@ -821,47 +822,105 @@ final class Short64Vector extends ShortVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Short.MAX_VALUE); + assert(Short.MIN_VALUE <= -VLENGTH); } static final Short64Shuffle IOTA = new Short64Shuffle(IDENTITY); @Override @ForceInline public Short64Vector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, Short64Shuffle.class, this, VLENGTH, - (s) -> ((Short64Vector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + Short64Vector toBitsVector() { + return (Short64Vector) super.toBitsVectorTemplate(); + } + + @Override + Short64Vector toBitsVector0() { + return ((Short64Vector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public Short64Shuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, Short64Shuffle.class, this, VLENGTH, - (s) -> ((Short64Shuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_64; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.S2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.S2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + } + + @Override + @ForceInline + public final Short64Mask laneIsValid() { + return (Short64Mask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public Short64Shuffle rearrange(VectorShuffle shuffle) { - Short64Shuffle s = (Short64Shuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final Short64Shuffle rearrange(VectorShuffle shuffle) { + Short64Shuffle concreteShuffle = (Short64Shuffle) shuffle; + return (Short64Shuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final Short64Shuffle wrapIndexes() { + Short64Vector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (Short64Vector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (Short64Vector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new Short64Shuffle(r); + return (Short64Shuffle) v.toShuffle(vspecies(), false); + } + + private static short[] prepare(int[] indices, int offset) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static short[] prepare(IntUnaryOperator f) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static boolean indicesInRange(short[] indices) { + int length = indices.length; + for (short si : indices) { + if (si >= (short)length || si < (short)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java index 5bb1beee6ed..9b90756c826 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java @@ -141,24 +141,15 @@ final class ShortMaxVector extends ShortVector { @ForceInline ShortMaxShuffle iotaShuffle() { return ShortMaxShuffle.IOTA; } + @Override @ForceInline ShortMaxShuffle iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return (ShortMaxShuffle)VectorSupport.shuffleIota(ETYPE, ShortMaxShuffle.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return (ShortMaxShuffle)VectorSupport.shuffleIota(ETYPE, ShortMaxShuffle.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } + return (ShortMaxShuffle) iotaShuffleTemplate((short) start, (short) step, wrap); } @Override @ForceInline - ShortMaxShuffle shuffleFromBytes(byte[] reorder) { return new ShortMaxShuffle(reorder); } - - @Override - @ForceInline - ShortMaxShuffle shuffleFromArray(int[] indexes, int i) { return new ShortMaxShuffle(indexes, i); } + ShortMaxShuffle shuffleFromArray(int[] indices, int i) { return new ShortMaxShuffle(indices, i); } @Override @ForceInline @@ -357,9 +348,16 @@ final class ShortMaxVector extends ShortVector { return (long) super.reduceLanesTemplate(op, ShortMaxMask.class, (ShortMaxMask) m); // specialized } + @Override @ForceInline - public VectorShuffle toShuffle() { - return super.toShuffleTemplate(ShortMaxShuffle.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { + return bitsToShuffleTemplate(dsp); + } + + @Override + @ForceInline + public final ShortMaxShuffle toShuffle() { + return (ShortMaxShuffle) toShuffle(vspecies(), false); } // Specialized unary testing @@ -791,23 +789,26 @@ final class ShortMaxVector extends ShortVector { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM static final Class ETYPE = short.class; // used by the JVM - ShortMaxShuffle(byte[] reorder) { - super(VLENGTH, reorder); + ShortMaxShuffle(short[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public ShortMaxShuffle(int[] reorder) { - super(VLENGTH, reorder); + ShortMaxShuffle(int[] indices, int i) { + this(prepare(indices, i)); } - public ShortMaxShuffle(int[] reorder, int i) { - super(VLENGTH, reorder, i); + ShortMaxShuffle(IntUnaryOperator fn) { + this(prepare(fn)); } - public ShortMaxShuffle(IntUnaryOperator fn) { - super(VLENGTH, fn); + short[] indices() { + return (short[])getPayload(); } @Override + @ForceInline public ShortSpecies vspecies() { return VSPECIES; } @@ -815,47 +816,105 @@ final class ShortMaxVector extends ShortVector { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < Short.MAX_VALUE); + assert(Short.MIN_VALUE <= -VLENGTH); } static final ShortMaxShuffle IOTA = new ShortMaxShuffle(IDENTITY); @Override @ForceInline public ShortMaxVector toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, ShortMaxShuffle.class, this, VLENGTH, - (s) -> ((ShortMaxVector)(((AbstractShuffle)(s)).toVectorTemplate()))); + return toBitsVector(); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + ShortMaxVector toBitsVector() { + return (ShortMaxVector) super.toBitsVectorTemplate(); + } + + @Override + ShortMaxVector toBitsVector0() { + return ((ShortMaxVector) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public ShortMaxShuffle wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, ShortMaxShuffle.class, this, VLENGTH, - (s) -> ((ShortMaxShuffle)(((AbstractShuffle)(s)).wrapIndexesTemplate()))); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); + } + + @Override + @ForceInline + public void intoArray(int[] a, int offset) { + VectorSpecies species = IntVector.SPECIES_MAX; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.S2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.S2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + } + + @Override + @ForceInline + public final ShortMaxMask laneIsValid() { + return (ShortMaxMask) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); } @ForceInline @Override - public ShortMaxShuffle rearrange(VectorShuffle shuffle) { - ShortMaxShuffle s = (ShortMaxShuffle) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public final ShortMaxShuffle rearrange(VectorShuffle shuffle) { + ShortMaxShuffle concreteShuffle = (ShortMaxShuffle) shuffle; + return (ShortMaxShuffle) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); + } + + @ForceInline + @Override + public final ShortMaxShuffle wrapIndexes() { + ShortMaxVector v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = (ShortMaxVector) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = (ShortMaxVector) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); } - return new ShortMaxShuffle(r); + return (ShortMaxShuffle) v.toShuffle(vspecies(), false); + } + + private static short[] prepare(int[] indices, int offset) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static short[] prepare(IntUnaryOperator f) { + short[] a = new short[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = (short)si; + } + return a; + } + + private static boolean indicesInRange(short[] indices) { + int length = indices.length; + for (short si : indices) { + if (si >= (short)length || si < (short)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java index 552967d82e7..3cf848f31d0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java @@ -1096,7 +1096,7 @@ public abstract class ShortVector extends AbstractVector { // and broadcast, but it would be more surprising not to continue // the obvious pattern started by unary and binary. - /** + /** * {@inheritDoc} * @see #lanewise(VectorOperators.Ternary,short,short,VectorMask) * @see #lanewise(VectorOperators.Ternary,Vector,short,VectorMask) @@ -2309,9 +2309,10 @@ public abstract class ShortVector extends AbstractVector { ShortVector that = (ShortVector) v1; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + ShortVector iotaVector = (ShortVector) iotaShuffle().toBitsVector(); + ShortVector filter = broadcast((short)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2339,9 +2340,10 @@ public abstract class ShortVector extends AbstractVector { @ForceInline ShortVector sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast((short)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + ShortVector iotaVector = (ShortVector) iotaShuffle().toBitsVector(); + ShortVector filter = broadcast((short)(length() - origin)); + VectorMask blendMask = iotaVector.compare(VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2360,10 +2362,10 @@ public abstract class ShortVector extends AbstractVector { ShortVector that = (ShortVector) w; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast((short)(origin)))); - iota = iotaShuffle(-origin, 1, true); + ShortVector iotaVector = (ShortVector) iotaShuffle().toBitsVector(); + ShortVector filter = broadcast((short)origin); + VectorMask blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2400,10 +2402,10 @@ public abstract class ShortVector extends AbstractVector { ShortVector unsliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle iota = iotaShuffle(); - VectorMask blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast((short)(origin)))); - iota = iotaShuffle(-origin, 1, true); + ShortVector iotaVector = (ShortVector) iotaShuffle().toBitsVector(); + ShortVector filter = broadcast((short)origin); + VectorMask blendMask = iotaVector.compare(VectorOperators.GE, filter); + AbstractShuffle iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2426,13 +2428,11 @@ public abstract class ShortVector extends AbstractVector { final > ShortVector rearrangeTemplate(Class shuffletype, S shuffle) { - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, null, short.class, length(), - this, ws, null, + this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); } @@ -2455,13 +2455,11 @@ public abstract class ShortVector extends AbstractVector { M m) { m.check(masktype, this); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, masktype, short.class, length(), - this, ws, m, + this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return !m_.laneIsSet(i) ? 0 : v1.lane(ei); })); } @@ -2482,30 +2480,29 @@ public abstract class ShortVector extends AbstractVector { S shuffle, ShortVector v) { VectorMask valid = shuffle.laneIsValid(); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); ShortVector r0 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, short.class, length(), - this, ws, null, + this, shuffle, null, (v0, s_, m_) -> v0.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length()); return v0.lane(ei); })); ShortVector r1 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, short.class, length(), - v, ws, null, + v, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); return r1.blend(r0, valid); } + @Override @ForceInline - private final - VectorShuffle toShuffle0(ShortSpecies dsp) { + final VectorShuffle bitsToShuffle0(AbstractSpecies dsp) { + assert(dsp.length() == vspecies().length()); short[] a = toArray(); int[] sa = new int[a.length]; for (int i = 0; i < a.length; i++) { @@ -2514,16 +2511,18 @@ public abstract class ShortVector extends AbstractVector { return VectorShuffle.fromArray(dsp, sa, 0); } - /*package-private*/ @ForceInline - final - VectorShuffle toShuffleTemplate(Class shuffleType) { - ShortSpecies vsp = vspecies(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), short.class, length(), - shuffleType, byte.class, length(), - this, vsp, - ShortVector::toShuffle0); + final + VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap) { + assert(dsp.elementSize() == vspecies().elementSize()); + ShortVector idx = this; + ShortVector wrapped = idx.lanewise(VectorOperators.AND, length() - 1); + if (!wrap) { + ShortVector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length()); + VectorMask inBound = wrapped.compare(VectorOperators.EQ, idx); + wrapped = wrappedEx.blend(wrapped, inBound); + } + return wrapped.bitsToShuffle(dsp); } /** @@ -4210,9 +4209,10 @@ public abstract class ShortVector extends AbstractVector { private ShortSpecies(VectorShape shape, Class vectorType, Class> maskType, + Class> shuffleType, Function vectorFactory) { super(shape, LaneType.of(short.class), - vectorType, maskType, + vectorType, maskType, shuffleType, vectorFactory); assert(this.elementSize() == Short.SIZE); } @@ -4498,6 +4498,7 @@ public abstract class ShortVector extends AbstractVector { = new ShortSpecies(VectorShape.S_64_BIT, Short64Vector.class, Short64Vector.Short64Mask.class, + Short64Vector.Short64Shuffle.class, Short64Vector::new); /** Species representing {@link ShortVector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ @@ -4505,6 +4506,7 @@ public abstract class ShortVector extends AbstractVector { = new ShortSpecies(VectorShape.S_128_BIT, Short128Vector.class, Short128Vector.Short128Mask.class, + Short128Vector.Short128Shuffle.class, Short128Vector::new); /** Species representing {@link ShortVector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ @@ -4512,6 +4514,7 @@ public abstract class ShortVector extends AbstractVector { = new ShortSpecies(VectorShape.S_256_BIT, Short256Vector.class, Short256Vector.Short256Mask.class, + Short256Vector.Short256Shuffle.class, Short256Vector::new); /** Species representing {@link ShortVector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ @@ -4519,6 +4522,7 @@ public abstract class ShortVector extends AbstractVector { = new ShortSpecies(VectorShape.S_512_BIT, Short512Vector.class, Short512Vector.Short512Mask.class, + Short512Vector.Short512Shuffle.class, Short512Vector::new); /** Species representing {@link ShortVector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ @@ -4526,6 +4530,7 @@ public abstract class ShortVector extends AbstractVector { = new ShortSpecies(VectorShape.S_Max_BIT, ShortMaxVector.class, ShortMaxVector.ShortMaxMask.class, + ShortMaxVector.ShortMaxShuffle.class, ShortMaxVector::new); /** diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java index 1ffbcef821a..89e0d38bcb0 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShape.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -124,6 +124,7 @@ public enum VectorShape { * @throws IllegalArgumentException if no such vector shape exists * @see #vectorBitSize() */ + @ForceInline public static VectorShape forBitSize(int bitSize) { switch (bitSize) { case 64: diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShuffle.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShuffle.java index b046874c390..bc1780a81ac 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShuffle.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorShuffle.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -133,8 +133,8 @@ import java.util.function.IntUnaryOperator; */ @SuppressWarnings("exports") public abstract class VectorShuffle extends jdk.internal.vm.vector.VectorSupport.VectorShuffle { - VectorShuffle(byte[] reorder) { - super(reorder); + VectorShuffle(Object indices) { + super(indices); } /** @@ -556,7 +556,7 @@ public abstract class VectorShuffle extends jdk.internal.vm.vector.VectorSupp * @param i the lane index * @return the {@code int} lane element at lane index {@code i} */ - public int laneSource(int i) { return toArray()[i]; } + public abstract int laneSource(int i); /** * Rearranges the lane elements of this shuffle selecting lane indexes diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorSpecies.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorSpecies.java index 6cfe5f6d9d9..03cb0ae559e 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorSpecies.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorSpecies.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,8 @@ */ package jdk.incubator.vector; +import jdk.internal.vm.annotation.ForceInline; + import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; @@ -342,6 +344,7 @@ public interface VectorSpecies { * @see #withLanes(Class) * @see #withShape(VectorShape) */ + @ForceInline static VectorSpecies of(Class elementType, VectorShape shape) { LaneType laneType = LaneType.of(elementType); return AbstractSpecies.findSpecies(elementType, laneType, shape); @@ -367,6 +370,7 @@ public interface VectorSpecies { * or if the given type is not a valid {@code ETYPE} * @see VectorSpecies#ofPreferred(Class) */ + @ForceInline static VectorSpecies ofLargestShape(Class etype) { return VectorSpecies.of(etype, VectorShape.largestShapeFor(etype)); } @@ -410,6 +414,7 @@ public interface VectorSpecies { * @see VectorShape#preferredShape() * @see VectorSpecies#ofLargestShape(Class) */ + @ForceInline public static VectorSpecies ofPreferred(Class etype) { return of(etype, VectorShape.preferredShape()); } @@ -432,6 +437,7 @@ public interface VectorSpecies { * if the given {@code elementType} argument is not * a valid vector {@code ETYPE} */ + @ForceInline static int elementSize(Class elementType) { return LaneType.of(elementType).elementSize; } diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template index 7eb1d9810b1..835868e61cd 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template @@ -1229,7 +1229,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { // and broadcast, but it would be more surprising not to continue // the obvious pattern started by unary and binary. - /** + /** * {@inheritDoc} * @see #lanewise(VectorOperators.Ternary,$type$,$type$,VectorMask) * @see #lanewise(VectorOperators.Ternary,Vector,$type$,VectorMask) @@ -2685,9 +2685,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ that = ($abstractvectortype$) v1; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle<$Boxtype$> iota = iotaShuffle(); - VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast(($type$)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector(); +#if[FP] + $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)(length() - origin)); + VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies()); +#else[FP] + $abstractvectortype$ filter = broadcast(($type$)(length() - origin)); + VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter); +#end[FP] + AbstractShuffle<$Boxtype$> iota = iotaShuffle(origin, 1, true); return that.rearrange(iota).blend(this.rearrange(iota), blendMask); } @@ -2715,9 +2721,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline $abstractvectortype$ sliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle<$Boxtype$> iota = iotaShuffle(); - VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.LT, (broadcast(($type$)(length() - origin)))); - iota = iotaShuffle(origin, 1, true); + $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector(); +#if[FP] + $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)(length() - origin)); + VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter).cast(vspecies()); +#else[FP] + $abstractvectortype$ filter = broadcast(($type$)(length() - origin)); + VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.LT, filter); +#end[FP] + AbstractShuffle<$Boxtype$> iota = iotaShuffle(origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2736,10 +2748,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ that = ($abstractvectortype$) w; that.check(this); Objects.checkIndex(origin, length() + 1); - VectorShuffle<$Boxtype$> iota = iotaShuffle(); - VectorMask<$Boxtype$> blendMask = iota.toVector().compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, - (broadcast(($type$)(origin)))); - iota = iotaShuffle(-origin, 1, true); + $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector(); +#if[FP] + $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)origin); + VectorMask<$Boxtype$> blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter).cast(vspecies()); +#else[FP] + $abstractvectortype$ filter = broadcast(($type$)origin); + VectorMask<$Boxtype$> blendMask = iotaVector.compare((part == 0) ? VectorOperators.GE : VectorOperators.LT, filter); +#end[FP] + AbstractShuffle<$Boxtype$> iota = iotaShuffle(-origin, 1, true); return that.blend(this.rearrange(iota), blendMask); } @@ -2776,10 +2793,15 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { $abstractvectortype$ unsliceTemplate(int origin) { Objects.checkIndex(origin, length() + 1); - VectorShuffle<$Boxtype$> iota = iotaShuffle(); - VectorMask<$Boxtype$> blendMask = iota.toVector().compare(VectorOperators.GE, - (broadcast(($type$)(origin)))); - iota = iotaShuffle(-origin, 1, true); + $Bitstype$Vector iotaVector = ($Bitstype$Vector) iotaShuffle().toBitsVector(); +#if[FP] + $Bitstype$Vector filter = $Bitstype$Vector.broadcast(($Bitstype$Vector.$Bitstype$Species) vspecies().asIntegral(), ($bitstype$)origin); + VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.GE, filter).cast(vspecies()); +#else[FP] + $abstractvectortype$ filter = broadcast(($type$)origin); + VectorMask<$Boxtype$> blendMask = iotaVector.compare(VectorOperators.GE, filter); +#end[FP] + AbstractShuffle<$Boxtype$> iota = iotaShuffle(-origin, 1, true); return vspecies().zero().blend(this.rearrange(iota), blendMask); } @@ -2802,13 +2824,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { final > $abstractvectortype$ rearrangeTemplate(Class shuffletype, S shuffle) { - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, null, $type$.class, length(), - this, ws, null, + this, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); } @@ -2831,13 +2851,11 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { M m) { m.check(masktype, this); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); return VectorSupport.rearrangeOp( getClass(), shuffletype, masktype, $type$.class, length(), - this, ws, m, + this, shuffle, m, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return !m_.laneIsSet(i) ? 0 : v1.lane(ei); })); } @@ -2858,48 +2876,61 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { S shuffle, $abstractvectortype$ v) { VectorMask<$Boxtype$> valid = shuffle.laneIsValid(); - @SuppressWarnings("unchecked") - S ws = (S) shuffle.wrapIndexes(); $abstractvectortype$ r0 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, $type$.class, length(), - this, ws, null, + this, shuffle, null, (v0, s_, m_) -> v0.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length()); return v0.lane(ei); })); $abstractvectortype$ r1 = VectorSupport.rearrangeOp( getClass(), shuffletype, null, $type$.class, length(), - v, ws, null, + v, shuffle, null, (v1, s_, m_) -> v1.uOp((i, a) -> { - int ei = s_.laneSource(i); + int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length()); return v1.lane(ei); })); return r1.blend(r0, valid); } + @Override @ForceInline - private final - VectorShuffle<$Boxtype$> toShuffle0($Type$Species dsp) { + final VectorShuffle bitsToShuffle0(AbstractSpecies dsp) { +#if[FP] + throw new AssertionError(); +#else[FP] + assert(dsp.length() == vspecies().length()); $type$[] a = toArray(); int[] sa = new int[a.length]; for (int i = 0; i < a.length; i++) { sa[i] = (int) a[i]; } return VectorShuffle.fromArray(dsp, sa, 0); +#end[FP] } - /*package-private*/ @ForceInline - final - VectorShuffle<$Boxtype$> toShuffleTemplate(Class shuffleType) { - $Type$Species vsp = vspecies(); - return VectorSupport.convert(VectorSupport.VECTOR_OP_CAST, - getClass(), $type$.class, length(), - shuffleType, byte.class, length(), - this, vsp, - $Type$Vector::toShuffle0); + final + VectorShuffle toShuffle(AbstractSpecies dsp, boolean wrap) { + assert(dsp.elementSize() == vspecies().elementSize()); +#if[float] + IntVector idx = convert(VectorOperators.F2I, 0).reinterpretAsInts(); +#end[float] +#if[double] + LongVector idx = convert(VectorOperators.D2L, 0).reinterpretAsLongs(); +#end[double] +#if[!FP] + $Type$Vector idx = this; +#end[!FP] + $Bitstype$Vector wrapped = idx.lanewise(VectorOperators.AND, length() - 1); + if (!wrap) { + $Bitstype$Vector wrappedEx = wrapped.lanewise(VectorOperators.SUB, length()); + VectorMask<$Boxbitstype$> inBound = wrapped.compare(VectorOperators.EQ, idx); + wrapped = wrappedEx.blend(wrapped, inBound); + } + return wrapped.bitsToShuffle(dsp); } /** @@ -5447,9 +5478,10 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { private $Type$Species(VectorShape shape, Class vectorType, Class> maskType, + Class> shuffleType, Function vectorFactory) { super(shape, LaneType.of($type$.class), - vectorType, maskType, + vectorType, maskType, shuffleType, vectorFactory); assert(this.elementSize() == $Boxtype$.SIZE); } @@ -5742,6 +5774,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { = new $Type$Species(VectorShape.S_64_BIT, $Type$64Vector.class, $Type$64Vector.$Type$64Mask.class, + $Type$64Vector.$Type$64Shuffle.class, $Type$64Vector::new); /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_128_BIT VectorShape.S_128_BIT}. */ @@ -5749,6 +5782,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { = new $Type$Species(VectorShape.S_128_BIT, $Type$128Vector.class, $Type$128Vector.$Type$128Mask.class, + $Type$128Vector.$Type$128Shuffle.class, $Type$128Vector::new); /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_256_BIT VectorShape.S_256_BIT}. */ @@ -5756,6 +5790,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { = new $Type$Species(VectorShape.S_256_BIT, $Type$256Vector.class, $Type$256Vector.$Type$256Mask.class, + $Type$256Vector.$Type$256Shuffle.class, $Type$256Vector::new); /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_512_BIT VectorShape.S_512_BIT}. */ @@ -5763,6 +5798,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { = new $Type$Species(VectorShape.S_512_BIT, $Type$512Vector.class, $Type$512Vector.$Type$512Mask.class, + $Type$512Vector.$Type$512Shuffle.class, $Type$512Vector::new); /** Species representing {@link $Type$Vector}s of {@link VectorShape#S_Max_BIT VectorShape.S_Max_BIT}. */ @@ -5770,6 +5806,7 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { = new $Type$Species(VectorShape.S_Max_BIT, $Type$MaxVector.class, $Type$MaxVector.$Type$MaxMask.class, + $Type$MaxVector.$Type$MaxShuffle.class, $Type$MaxVector::new); /** diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template index 9752a795ea7..216f28fde87 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template @@ -143,24 +143,23 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline $shuffletype$ iotaShuffle() { return $shuffletype$.IOTA; } + @Override @ForceInline $shuffletype$ iotaShuffle(int start, int step, boolean wrap) { - if (wrap) { - return ($shuffletype$)VectorSupport.shuffleIota(ETYPE, $shuffletype$.class, VSPECIES, VLENGTH, start, step, 1, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (VectorIntrinsics.wrapToRange(i*lstep + lstart, l)))); - } else { - return ($shuffletype$)VectorSupport.shuffleIota(ETYPE, $shuffletype$.class, VSPECIES, VLENGTH, start, step, 0, - (l, lstart, lstep, s) -> s.shuffleFromOp(i -> (i*lstep + lstart))); - } +#if[byte] + return ($shuffletype$) iotaShuffleTemplate((byte) start, (byte) step, wrap); +#end[byte] +#if[short] + return ($shuffletype$) iotaShuffleTemplate((short) start, (short) step, wrap); +#end[short] +#if[!byteOrShort] + return ($shuffletype$) iotaShuffleTemplate(start, step, wrap); +#end[!byteOrShort] } @Override @ForceInline - $shuffletype$ shuffleFromBytes(byte[] reorder) { return new $shuffletype$(reorder); } - - @Override - @ForceInline - $shuffletype$ shuffleFromArray(int[] indexes, int i) { return new $shuffletype$(indexes, i); } + $shuffletype$ shuffleFromArray(int[] indices, int i) { return new $shuffletype$(indices, i); } @Override @ForceInline @@ -361,9 +360,20 @@ final class $vectortype$ extends $abstractvectortype$ { return (long) super.reduceLanesTemplate(op, $masktype$.class, ($masktype$) m); // specialized } + @Override @ForceInline - public VectorShuffle<$Boxtype$> toShuffle() { - return super.toShuffleTemplate($shuffletype$.class); // specialize + final VectorShuffle bitsToShuffle(AbstractSpecies dsp) { +#if[FP] + throw new AssertionError(); +#else[FP] + return bitsToShuffleTemplate(dsp); +#end[FP] + } + + @Override + @ForceInline + public final $shuffletype$ toShuffle() { + return ($shuffletype$) toShuffle(vspecies(), false); } // Specialized unary testing @@ -1077,25 +1087,28 @@ final class $vectortype$ extends $abstractvectortype$ { static final class $shuffletype$ extends AbstractShuffle<$Boxtype$> { static final int VLENGTH = VSPECIES.laneCount(); // used by the JVM - static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM + static final Class<$Boxbitstype$> ETYPE = $bitstype$.class; // used by the JVM - $shuffletype$(byte[] reorder) { - super(VLENGTH, reorder); + $shuffletype$($bitstype$[] indices) { + super(indices); + assert(VLENGTH == indices.length); + assert(indicesInRange(indices)); } - public $shuffletype$(int[] reorder) { - super(VLENGTH, reorder); + $shuffletype$(int[] indices, int i) { + this(prepare(indices, i)); } - public $shuffletype$(int[] reorder, int i) { - super(VLENGTH, reorder, i); + $shuffletype$(IntUnaryOperator fn) { + this(prepare(fn)); } - public $shuffletype$(IntUnaryOperator fn) { - super(VLENGTH, fn); + $bitstype$[] indices() { + return ($bitstype$[])getPayload(); } @Override + @ForceInline public $Type$Species vspecies() { return VSPECIES; } @@ -1103,47 +1116,166 @@ final class $vectortype$ extends $abstractvectortype$ { static { // There must be enough bits in the shuffle lanes to encode // VLENGTH valid indexes and VLENGTH exceptional ones. - assert(VLENGTH < Byte.MAX_VALUE); - assert(Byte.MIN_VALUE <= -VLENGTH); + assert(VLENGTH < $Boxbitstype$.MAX_VALUE); + assert($Boxbitstype$.MIN_VALUE <= -VLENGTH); } static final $shuffletype$ IOTA = new $shuffletype$(IDENTITY); +#if[FP] @Override @ForceInline public $vectortype$ toVector() { - return VectorSupport.shuffleToVector(VCLASS, ETYPE, $shuffletype$.class, this, VLENGTH, - (s) -> (($vectortype$)(((AbstractShuffle<$Boxtype$>)(s)).toVectorTemplate()))); + return ($vectortype$) toBitsVector().castShape(vspecies(), 0); + } +#else[FP] + @Override + @ForceInline + public $vectortype$ toVector() { + return toBitsVector(); + } +#end[FP] + + @Override + @ForceInline + $bitsvectortype$ toBitsVector() { + return ($bitsvectortype$) super.toBitsVectorTemplate(); + } + + @Override + $bitsvectortype$ toBitsVector0() { + return (($bitsvectortype$) vspecies().asIntegral().dummyVector()).vectorFactory(indices()); } @Override @ForceInline - public VectorShuffle cast(VectorSpecies s) { - AbstractSpecies species = (AbstractSpecies) s; - if (length() != species.laneCount()) - throw new IllegalArgumentException("VectorShuffle length and species length differ"); - int[] shuffleArray = toArray(); - return s.shuffleFromArray(shuffleArray, 0).check(s); + public int laneSource(int i) { + return (int)toBitsVector().lane(i); } @Override @ForceInline - public $shuffletype$ wrapIndexes() { - return VectorSupport.wrapShuffleIndexes(ETYPE, $shuffletype$.class, this, VLENGTH, - (s) -> (($shuffletype$)(((AbstractShuffle<$Boxtype$>)(s)).wrapIndexesTemplate()))); - } - - @ForceInline - @Override - public $shuffletype$ rearrange(VectorShuffle<$Boxtype$> shuffle) { - $shuffletype$ s = ($shuffletype$) shuffle; - byte[] reorder1 = reorder(); - byte[] reorder2 = s.reorder(); - byte[] r = new byte[reorder1.length]; - for (int i = 0; i < reorder1.length; i++) { - int ssi = reorder2[i]; - r[i] = reorder1[ssi]; // throws on exceptional index + public void intoArray(int[] a, int offset) { +#if[byte] + VectorSpecies species = IntVector.SPECIES_$BITS$; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.B2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.B2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); + v.convertShape(VectorOperators.B2I, species, 2) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 2); + v.convertShape(VectorOperators.B2I, species, 3) + .reinterpretAsInts() + .intoArray(a, offset + species.length() * 3); +#end[byte] +#if[short] + VectorSpecies species = IntVector.SPECIES_$BITS$; + Vector v = toBitsVector(); + v.convertShape(VectorOperators.S2I, species, 0) + .reinterpretAsInts() + .intoArray(a, offset); + v.convertShape(VectorOperators.S2I, species, 1) + .reinterpretAsInts() + .intoArray(a, offset + species.length()); +#end[short] +#if[intOrFloat] + toBitsVector().intoArray(a, offset); +#end[intOrFloat] +#if[longOrDouble] + switch (length()) { + case 1 -> a[offset] = laneSource(0); + case 2 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_64, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 4 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_128, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 8 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_256, 0) + .reinterpretAsInts() + .intoArray(a, offset); + case 16 -> toBitsVector() + .convertShape(VectorOperators.L2I, IntVector.SPECIES_512, 0) + .reinterpretAsInts() + .intoArray(a, offset); + default -> { + VectorIntrinsics.checkFromIndexSize(offset, length(), a.length); + for (int i = 0; i < length(); i++) { + a[offset + i] = laneSource(i); + } + } } - return new $shuffletype$(r); +#end[longOrDouble] + } + + @Override + @ForceInline + public final $masktype$ laneIsValid() { + return ($masktype$) toBitsVector().compare(VectorOperators.GE, 0) + .cast(vspecies()); + } + + @ForceInline + @Override + public final $shuffletype$ rearrange(VectorShuffle<$Boxtype$> shuffle) { + $shuffletype$ concreteShuffle = ($shuffletype$) shuffle; +#if[FP] + return ($shuffletype$) toBitsVector().rearrange(concreteShuffle.cast($Bitstype$Vector.SPECIES_$BITS$)) + .toShuffle(vspecies(), false); +#else[FP] + return ($shuffletype$) toBitsVector().rearrange(concreteShuffle) + .toShuffle(vspecies(), false); +#end[FP] + } + + @ForceInline + @Override + public final $shuffletype$ wrapIndexes() { + $bitsvectortype$ v = toBitsVector(); + if ((length() & (length() - 1)) == 0) { + v = ($bitsvectortype$) v.lanewise(VectorOperators.AND, length() - 1); + } else { + v = ($bitsvectortype$) v.blend(v.lanewise(VectorOperators.ADD, length()), + v.compare(VectorOperators.LT, 0)); + } + return ($shuffletype$) v.toShuffle(vspecies(), false); + } + + private static $bitstype$[] prepare(int[] indices, int offset) { + $bitstype$[] a = new $bitstype$[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = indices[offset + i]; + si = partiallyWrapIndex(si, VLENGTH); + a[i] = ($bitstype$)si; + } + return a; + } + + private static $bitstype$[] prepare(IntUnaryOperator f) { + $bitstype$[] a = new $bitstype$[VLENGTH]; + for (int i = 0; i < VLENGTH; i++) { + int si = f.applyAsInt(i); + si = partiallyWrapIndex(si, VLENGTH); + a[i] = ($bitstype$)si; + } + return a; + } + + private static boolean indicesInRange($bitstype$[] indices) { + int length = indices.length; + for ($bitstype$ si : indices) { + if (si >= ($bitstype$)length || si < ($bitstype$)(-length)) { + String msg = ("index "+si+"out of range ["+length+"] in "+ + java.util.Arrays.toString(indices)); + throw new AssertionError(msg); + } + } + return true; } }