diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index ebf587f2121..210efa0b760 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -3742,6 +3742,68 @@ instruct reinterpret_resize_gt128b(vReg dst, vReg src, pReg ptmp, rFlagsReg cr) ins_pipe(pipe_slow); %} +// ---------------------------- Vector zero extend -------------------------------- + +instruct vzeroExtBtoX(vReg dst, vReg src) %{ + match(Set dst (VectorUCastB2X src)); + format %{ "vzeroExtBtoX $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "must be"); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + // 4B to 4S/4I, 8B to 8S + __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes, + $src$$FloatRegister, T_BYTE, /* is_unsigned */ true); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt), + $src$$FloatRegister, __ B, /* is_unsigned */ true); + } + %} + ins_pipe(pipe_slow); +%} + +instruct vzeroExtStoX(vReg dst, vReg src) %{ + match(Set dst (VectorUCastS2X src)); + format %{ "vzeroExtStoX $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(bt == T_INT || bt == T_LONG, "must be"); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + // 4S to 4I + __ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes, + $src$$FloatRegister, T_SHORT, /* is_unsigned */ true); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt), + $src$$FloatRegister, __ H, /* is_unsigned */ true); + } + %} + ins_pipe(pipe_slow); +%} + +instruct vzeroExtItoX(vReg dst, vReg src) %{ + match(Set dst (VectorUCastI2X src)); + format %{ "vzeroExtItoX $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert(bt == T_LONG, "must be"); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + // 2I to 2L + __ neon_vector_extend($dst$$FloatRegister, T_LONG, length_in_bytes, + $src$$FloatRegister, T_INT, /* is_unsigned */ true); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_vector_extend($dst$$FloatRegister, __ D, + $src$$FloatRegister, __ S, /* is_unsigned */ true); + } + %} + ins_pipe(pipe_slow); +%} + // ------------------------------ Vector cast ---------------------------------- // VectorCastB2X diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index e27ca993e60..3f4ed020f55 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -2276,6 +2276,33 @@ instruct reinterpret_resize_gt128b(vReg dst, vReg src, pReg ptmp, rFlagsReg cr) ins_pipe(pipe_slow); %} +// ---------------------------- Vector zero extend -------------------------------- +dnl VECTOR_ZERO_EXTEND($1, $2, $3, $4, $5 $6, $7, ) +dnl VECTOR_ZERO_EXTEND(op_name, dst_bt, src_bt, dst_size, src_size, assertion, neon_comment) +define(`VECTOR_ZERO_EXTEND', ` +instruct vzeroExt$1toX(vReg dst, vReg src) %{ + match(Set dst (VectorUCast`$1'2X src)); + format %{ "vzeroExt$1toX $dst, $src" %} + ins_encode %{ + BasicType bt = Matcher::vector_element_basic_type(this); + assert($6, "must be"); + uint length_in_bytes = Matcher::vector_length_in_bytes(this); + if (VM_Version::use_neon_for_vector(length_in_bytes)) { + // $7 + __ neon_vector_extend($dst$$FloatRegister, $2, length_in_bytes, + $src$$FloatRegister, $3, /* is_unsigned */ true); + } else { + assert(UseSVE > 0, "must be sve"); + __ sve_vector_extend($dst$$FloatRegister, __ $4, + $src$$FloatRegister, __ $5, /* is_unsigned */ true); + } + %} + ins_pipe(pipe_slow); +%}')dnl +VECTOR_ZERO_EXTEND(B, bt, T_BYTE, elemType_to_regVariant(bt), B, bt == T_SHORT || bt == T_INT || bt == T_LONG, `4B to 4S/4I, 8B to 8S') +VECTOR_ZERO_EXTEND(S, T_INT, T_SHORT, elemType_to_regVariant(bt), H, bt == T_INT || bt == T_LONG, `4S to 4I') +VECTOR_ZERO_EXTEND(I, T_LONG, T_INT, D, S, bt == T_LONG, `2I to 2L') + // ------------------------------ Vector cast ---------------------------------- // VectorCastB2X diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 2899319f3e8..187c1209303 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -2196,41 +2196,20 @@ public: #undef INSN - enum sign_kind { SIGNED, UNSIGNED }; - private: - void _xcvtf_scalar_integer(sign_kind sign, unsigned sz, - FloatRegister Rd, FloatRegister Rn) { - starti; - f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29); - f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10); - rf(Rn, 5), rf(Rd, 0); - } - -public: -#define INSN(NAME, sign, sz) \ - void NAME(FloatRegister Rd, FloatRegister Rn) { \ - _xcvtf_scalar_integer(sign, sz, Rd, Rn); \ - } - - INSN(scvtfs, SIGNED, 0); - INSN(scvtfd, SIGNED, 1); - -#undef INSN - -private: - void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T, + void _xcvtf_vector_integer(bool is_unsigned, SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) { assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); starti; - f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29); + f(0, 31), f(T & 1, 30), f(is_unsigned ? 1 : 0, 29); f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10); rf(Rn, 5), rf(Rd, 0); } public: + void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) { - _xcvtf_vector_integer(SIGNED, T, Rd, Rn); + _xcvtf_vector_integer(/* is_unsigned */ false, T, Rd, Rn); } // Floating-point compare @@ -2991,8 +2970,8 @@ template #undef INSN -private: - void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { +protected: + void _xshll(bool is_unsigned, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; /* The encodings for the immh:immb fields (bits 22:16) are * 0001 xxx 8H, 8B/16B shift = xxx @@ -3002,7 +2981,7 @@ private: */ assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement"); assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value"); - f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23); + f(0, 31), f(Tb & 1, 30), f(is_unsigned ? 1 : 0, 29), f(0b011110, 28, 23); f((1 << ((Tb>>1)+3))|shift, 22, 16); f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); } @@ -3010,12 +2989,12 @@ private: public: void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement"); - _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift); + _xshll(/* is_unsigned */ true, Vd, Ta, Vn, Tb, shift); } void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement"); - _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift); + _xshll(/* is_unsigned */ true, Vd, Ta, Vn, Tb, shift); } void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { @@ -3024,12 +3003,12 @@ public: void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement"); - _xshll(SIGNED, Vd, Ta, Vn, Tb, shift); + _xshll(/* is_unsigned */ false, Vd, Ta, Vn, Tb, shift); } void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement"); - _xshll(SIGNED, Vd, Ta, Vn, Tb, shift); + _xshll(/* is_unsigned */ false, Vd, Ta, Vn, Tb, shift); } void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { @@ -3862,18 +3841,25 @@ public: } // SVE unpack vector elements -#define INSN(NAME, op) \ - void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \ - starti; \ - assert(T != B && T != Q, "invalid size"); \ - f(0b00000101, 31, 24), f(T, 23, 22), f(0b1100, 21, 18); \ - f(op, 17, 16), f(0b001110, 15, 10), rf(Zn, 5), rf(Zd, 0); \ +protected: + void _sve_xunpk(bool is_unsigned, bool is_high, FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { + starti; + assert(T != B && T != Q, "invalid size"); + f(0b00000101, 31, 24), f(T, 23, 22), f(0b1100, 21, 18); + f(is_unsigned ? 1 : 0, 17), f(is_high ? 1 : 0, 16), + f(0b001110, 15, 10), rf(Zn, 5), rf(Zd, 0); } - INSN(sve_uunpkhi, 0b11); // Signed unpack and extend half of vector - high half - INSN(sve_uunpklo, 0b10); // Signed unpack and extend half of vector - low half - INSN(sve_sunpkhi, 0b01); // Unsigned unpack and extend half of vector - high half - INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half +public: +#define INSN(NAME, is_unsigned, is_high) \ + void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \ + _sve_xunpk(is_unsigned, is_high, Zd, T, Zn); \ + } + + INSN(sve_uunpkhi, true, true ); // Unsigned unpack and extend half of vector - high half + INSN(sve_uunpklo, true, false); // Unsigned unpack and extend half of vector - low half + INSN(sve_sunpkhi, false, true ); // Signed unpack and extend half of vector - high half + INSN(sve_sunpklo, false, false); // Signed unpack and extend half of vector - low half #undef INSN // SVE unpack predicate elements diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 5338eff0134..7b9784ec47a 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -1340,26 +1340,25 @@ void C2_MacroAssembler::sve_vmask_lasttrue(Register dst, BasicType bt, PRegister // Extend integer vector src to dst with the same lane count // but larger element size, e.g. 4B -> 4I void C2_MacroAssembler::neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes, - FloatRegister src, BasicType src_bt) { + FloatRegister src, BasicType src_bt, bool is_unsigned) { if (src_bt == T_BYTE) { if (dst_bt == T_SHORT) { // 4B/8B to 4S/8S - assert(dst_vlen_in_bytes == 8 || dst_vlen_in_bytes == 16, "unsupported"); - sxtl(dst, T8H, src, T8B); + _xshll(is_unsigned, dst, T8H, src, T8B, 0); } else { // 4B to 4I assert(dst_vlen_in_bytes == 16 && dst_bt == T_INT, "unsupported"); - sxtl(dst, T8H, src, T8B); - sxtl(dst, T4S, dst, T4H); + _xshll(is_unsigned, dst, T8H, src, T8B, 0); + _xshll(is_unsigned, dst, T4S, dst, T4H, 0); } } else if (src_bt == T_SHORT) { // 4S to 4I assert(dst_vlen_in_bytes == 16 && dst_bt == T_INT, "unsupported"); - sxtl(dst, T4S, src, T4H); + _xshll(is_unsigned, dst, T4S, src, T4H, 0); } else if (src_bt == T_INT) { // 2I to 2L assert(dst_vlen_in_bytes == 16 && dst_bt == T_LONG, "unsupported"); - sxtl(dst, T2D, src, T2S); + _xshll(is_unsigned, dst, T2D, src, T2S, 0); } else { ShouldNotReachHere(); } @@ -1393,34 +1392,36 @@ void C2_MacroAssembler::neon_vector_narrow(FloatRegister dst, BasicType dst_bt, } void C2_MacroAssembler::sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size, - FloatRegister src, SIMD_RegVariant src_size) { + FloatRegister src, SIMD_RegVariant src_size, + bool is_unsigned) { assert(dst_size > src_size && dst_size <= D && src_size <= S, "invalid element size"); + if (src_size == B) { switch (dst_size) { case H: - sve_sunpklo(dst, H, src); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, H, src); break; case S: - sve_sunpklo(dst, H, src); - sve_sunpklo(dst, S, dst); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, H, src); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, S, dst); break; case D: - sve_sunpklo(dst, H, src); - sve_sunpklo(dst, S, dst); - sve_sunpklo(dst, D, dst); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, H, src); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, S, dst); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, D, dst); break; default: ShouldNotReachHere(); } } else if (src_size == H) { if (dst_size == S) { - sve_sunpklo(dst, S, src); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, S, src); } else { // D - sve_sunpklo(dst, S, src); - sve_sunpklo(dst, D, dst); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, S, src); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, D, dst); } } else if (src_size == S) { - sve_sunpklo(dst, D, src); + _sve_xunpk(is_unsigned, /* is_high */ false, dst, D, src); } } diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index f342ca3c977..dfa7d88cb93 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -94,13 +94,13 @@ // Vector cast void neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes, - FloatRegister src, BasicType src_bt); + FloatRegister src, BasicType src_bt, bool is_unsigned = false); void neon_vector_narrow(FloatRegister dst, BasicType dst_bt, FloatRegister src, BasicType src_bt, unsigned src_vlen_in_bytes); void sve_vector_extend(FloatRegister dst, SIMD_RegVariant dst_size, - FloatRegister src, SIMD_RegVariant src_size); + FloatRegister src, SIMD_RegVariant src_size, bool is_unsigned = false); void sve_vector_narrow(FloatRegister dst, SIMD_RegVariant dst_size, FloatRegister src, SIMD_RegVariant src_size, FloatRegister tmp); diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 2557da40e0d..6992f3516aa 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -1573,38 +1573,6 @@ class VectorCastD2XNode : public VectorCastNode { virtual int Opcode() const; }; -class RoundVFNode : public VectorNode { - public: - RoundVFNode(Node* in, const TypeVect* vt) :VectorNode(in, vt) { - assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float"); - } - virtual int Opcode() const; -}; - -class VectorUCastB2XNode : public VectorCastNode { - public: - VectorUCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) { - assert(in->bottom_type()->is_vect()->element_basic_type() == T_BYTE, "must be byte"); - } - virtual int Opcode() const; -}; - -class RoundVDNode : public VectorNode { - public: - RoundVDNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) { - assert(in->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE, "must be double"); - } - virtual int Opcode() const; -}; - -class VectorUCastS2XNode : public VectorCastNode { - public: - VectorUCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) { - assert(in->bottom_type()->is_vect()->element_basic_type() == T_SHORT, "must be short"); - } - virtual int Opcode() const; -}; - class VectorCastHF2FNode : public VectorCastNode { public: VectorCastHF2FNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) { @@ -1621,10 +1589,50 @@ class VectorCastF2HFNode : public VectorCastNode { virtual int Opcode() const; }; +// So far, VectorUCastNode can only be used in Vector API unsigned extensions +// between integral types. E.g., extending byte to float is not supported now. +class VectorUCastB2XNode : public VectorCastNode { + public: + VectorUCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) { + assert(in->bottom_type()->is_vect()->element_basic_type() == T_BYTE, "must be byte"); + assert(vt->element_basic_type() == T_SHORT || + vt->element_basic_type() == T_INT || + vt->element_basic_type() == T_LONG, "must be"); + } + virtual int Opcode() const; +}; + +class VectorUCastS2XNode : public VectorCastNode { + public: + VectorUCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) { + assert(in->bottom_type()->is_vect()->element_basic_type() == T_SHORT, "must be short"); + assert(vt->element_basic_type() == T_INT || + vt->element_basic_type() == T_LONG, "must be"); + } + virtual int Opcode() const; +}; + class VectorUCastI2XNode : public VectorCastNode { public: VectorUCastI2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) { assert(in->bottom_type()->is_vect()->element_basic_type() == T_INT, "must be int"); + assert(vt->element_basic_type() == T_LONG, "must be"); + } + virtual int Opcode() const; +}; + +class RoundVFNode : public VectorNode { + public: + RoundVFNode(Node* in, const TypeVect* vt) :VectorNode(in, vt) { + assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float"); + } + virtual int Opcode() const; +}; + +class RoundVDNode : public VectorNode { + public: + RoundVDNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) { + assert(in->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE, "must be double"); } virtual int Opcode() const; }; diff --git a/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java b/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java index 2c1c765f4bf..032ed40de98 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java +++ b/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -228,7 +228,74 @@ public class TestCastMethods { makePair(DSPEC512, LSPEC512), makePair(DSPEC128, FSPEC64), makePair(DSPEC256, FSPEC128), - makePair(DSPEC512, FSPEC256) + makePair(DSPEC512, FSPEC256), + + makePair(BSPEC64, SSPEC64, true), + makePair(BSPEC64, SSPEC128, true), + makePair(BSPEC64, SSPEC256, true), + makePair(BSPEC64, SSPEC512, true), + makePair(BSPEC64, ISPEC128, true), + makePair(BSPEC64, ISPEC256, true), + makePair(BSPEC64, ISPEC512, true), + makePair(BSPEC64, LSPEC256, true), + makePair(BSPEC64, LSPEC512, true), + makePair(BSPEC128, SSPEC64, true), + makePair(BSPEC128, SSPEC128, true), + makePair(BSPEC128, SSPEC256, true), + makePair(BSPEC128, SSPEC512, true), + makePair(BSPEC128, ISPEC128, true), + makePair(BSPEC128, ISPEC256, true), + makePair(BSPEC128, ISPEC512, true), + makePair(BSPEC128, LSPEC256, true), + makePair(BSPEC128, LSPEC512, true), + makePair(BSPEC256, SSPEC64, true), + makePair(BSPEC256, SSPEC128, true), + makePair(BSPEC256, SSPEC256, true), + makePair(BSPEC256, SSPEC512, true), + makePair(BSPEC256, ISPEC128, true), + makePair(BSPEC256, ISPEC256, true), + makePair(BSPEC256, ISPEC512, true), + makePair(BSPEC256, LSPEC256, true), + makePair(BSPEC256, LSPEC512, true), + makePair(BSPEC512, SSPEC64, true), + makePair(BSPEC512, SSPEC128, true), + makePair(BSPEC512, SSPEC256, true), + makePair(BSPEC512, SSPEC512, true), + makePair(BSPEC512, ISPEC128, true), + makePair(BSPEC512, ISPEC256, true), + makePair(BSPEC512, ISPEC512, true), + makePair(BSPEC512, LSPEC256, true), + makePair(BSPEC512, LSPEC512, true), + + makePair(SSPEC64, ISPEC128, true), + makePair(SSPEC64, ISPEC256, true), + makePair(SSPEC64, ISPEC512, true), + makePair(SSPEC64, LSPEC256, true), + makePair(SSPEC64, LSPEC512, true), + makePair(SSPEC128, ISPEC128, true), + makePair(SSPEC128, ISPEC256, true), + makePair(SSPEC128, ISPEC512, true), + makePair(SSPEC128, LSPEC256, true), + makePair(SSPEC128, LSPEC512, true), + makePair(SSPEC256, ISPEC128, true), + makePair(SSPEC256, ISPEC256, true), + makePair(SSPEC256, ISPEC512, true), + makePair(SSPEC256, LSPEC256, true), + makePair(SSPEC256, LSPEC512, true), + makePair(SSPEC512, ISPEC128, true), + makePair(SSPEC512, ISPEC256, true), + makePair(SSPEC512, ISPEC512, true), + makePair(SSPEC512, LSPEC256, true), + makePair(SSPEC512, LSPEC512, true), + + makePair(ISPEC64, LSPEC128, true), + makePair(ISPEC64, LSPEC256, true), + makePair(ISPEC128, LSPEC128, true), + makePair(ISPEC128, LSPEC256, true), + makePair(ISPEC256, LSPEC128, true), + makePair(ISPEC256, LSPEC256, true), + makePair(ISPEC512, LSPEC128, true), + makePair(ISPEC512, LSPEC256, true) ); public static final List NEON_CAST_TESTS = List.of( @@ -257,6 +324,16 @@ public class TestCastMethods { makePair(FSPEC64, DSPEC128), makePair(DSPEC128, ISPEC64), makePair(DSPEC128, LSPEC128), - makePair(DSPEC128, FSPEC64) + makePair(DSPEC128, FSPEC64), + + makePair(BSPEC64, SSPEC64, true), + makePair(BSPEC64, SSPEC128, true), + makePair(BSPEC64, ISPEC128, true), + makePair(BSPEC128, SSPEC64, true), + makePair(BSPEC128, SSPEC128, true), + makePair(BSPEC128, ISPEC128, true), + makePair(SSPEC64, ISPEC128, true), + makePair(SSPEC128, ISPEC128, true), + makePair(ISPEC64, LSPEC128, true) ); } diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorZeroExtend.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorZeroExtend.java new file mode 100644 index 00000000000..503d80356da --- /dev/null +++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorZeroExtend.java @@ -0,0 +1,115 @@ +// +// Copyright (c) 2023, Arm Limited. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// +package org.openjdk.bench.jdk.incubator.vector; + +import java.util.concurrent.TimeUnit; +import java.util.Random; +import jdk.incubator.vector.*; +import org.openjdk.jmh.annotations.*; + +import static jdk.incubator.vector.VectorOperators.*; + +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Thread) +@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) +public class VectorZeroExtend { + private static final VectorSpecies B_SPECIES = ByteVector.SPECIES_PREFERRED; + private static final VectorSpecies S_SPECIES = ShortVector.SPECIES_PREFERRED; + private static final VectorSpecies I_SPECIES = IntVector.SPECIES_PREFERRED; + private static final VectorSpecies L_SPECIES = LongVector.SPECIES_PREFERRED; + private static final int LENGTH = 128; + private static final Random RD = new Random(); + private static byte[] ba; + private static short[] sa; + private static int[] ia; + private static long[] la; + + static { + ba = new byte[LENGTH]; + sa = new short[LENGTH]; + ia = new int[LENGTH]; + la = new long[LENGTH]; + + for (int i = 0; i < LENGTH; i++) { + ba[i] = (byte) RD.nextInt(); + sa[i] = (short) RD.nextInt(); + ia[i] = RD.nextInt(); + la[i] = RD.nextLong(); + } + } + + @Benchmark + public void byte2Short() { + for (int i = 0; i < B_SPECIES.loopBound(LENGTH); i += B_SPECIES.length()) { + ByteVector va = ByteVector.fromArray(B_SPECIES, ba, i); + ShortVector vb = (ShortVector) va.convertShape(ZERO_EXTEND_B2S, S_SPECIES, 0); + vb.intoArray(sa, 0); + } + } + + @Benchmark + public void byte2Int() { + for (int i = 0; i < B_SPECIES.loopBound(LENGTH); i += B_SPECIES.length()) { + ByteVector va = ByteVector.fromArray(B_SPECIES, ba, i); + IntVector vb = (IntVector) va.convertShape(ZERO_EXTEND_B2I, I_SPECIES, 0); + vb.intoArray(ia, 0); + } + } + + @Benchmark + public void byte2Long() { + for (int i = 0; i < B_SPECIES.loopBound(LENGTH); i += B_SPECIES.length()) { + ByteVector va = ByteVector.fromArray(B_SPECIES, ba, i); + LongVector vb = (LongVector) va.convertShape(ZERO_EXTEND_B2L, L_SPECIES, 0); + vb.intoArray(la, 0); + } + } + + @Benchmark + public void short2Int() { + for (int i = 0; i < S_SPECIES.loopBound(LENGTH); i += S_SPECIES.length()) { + ShortVector va = ShortVector.fromArray(S_SPECIES, sa, i); + IntVector vb = (IntVector) va.convertShape(ZERO_EXTEND_S2I, I_SPECIES, 0); + vb.intoArray(ia, 0); + } + } + + @Benchmark + public void short2Long() { + for (int i = 0; i < S_SPECIES.loopBound(LENGTH); i += S_SPECIES.length()) { + ShortVector va = ShortVector.fromArray(S_SPECIES, sa, i); + LongVector vb = (LongVector) va.convertShape(ZERO_EXTEND_S2L, L_SPECIES, 0); + vb.intoArray(la, 0); + } + } + + @Benchmark + public void int2Long() { + for (int i = 0; i < I_SPECIES.loopBound(LENGTH); i += I_SPECIES.length()) { + IntVector va = IntVector.fromArray(I_SPECIES, ia, i); + LongVector vb = (LongVector) va.convertShape(ZERO_EXTEND_I2L, L_SPECIES, 0); + vb.intoArray(la, 0); + } + } +}