diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 681b14ab068..404ab8d9ba4 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2362,17 +2362,34 @@ int Matcher::max_vector_size(const BasicType bt) { } int Matcher::min_vector_size(const BasicType bt) { - int max_size = max_vector_size(bt); - // Limit the min vector size to 8 bytes. - int size = 8 / type2aelembytes(bt); - if (bt == T_BYTE) { - // To support vector api shuffle/rearrange. - size = 4; - } else if (bt == T_BOOLEAN) { - // To support vector api load/store mask. - size = 2; + // Usually, the shortest vector length supported by AArch64 ISA and + // Vector API species is 64 bits. However, we allow 32-bit or 16-bit + // vectors in a few special cases. + int size; + switch(bt) { + case T_BOOLEAN: + // Load/store a vector mask with only 2 elements for vector types + // such as "2I/2F/2L/2D". + size = 2; + break; + case T_BYTE: + // Generate a "4B" vector, to support vector cast between "8B/16B" + // and "4S/4I/4L/4F/4D". + size = 4; + break; + case T_SHORT: + // Generate a "2S" vector, to support vector cast between "4S/8S" + // and "2I/2L/2F/2D". + size = 2; + break; + default: + // Limit the min vector length to 64-bit. + size = 8 / type2aelembytes(bt); + // The number of elements in a vector should be at least 2. + size = MAX2(size, 2); } - if (size < 2) size = 2; + + int max_size = max_vector_size(bt); return MIN2(size, max_size); } diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index b4e6d79347f..1b6296ddd8b 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -131,7 +131,7 @@ source %{ // These operations are not profitable to be vectorized on NEON, because no direct // NEON instructions support them. But the match rule support for them is profitable for // Vector API intrinsics. - if ((opcode == Op_VectorCastD2X && bt == T_INT) || + if ((opcode == Op_VectorCastD2X && (bt == T_INT || bt == T_SHORT)) || (opcode == Op_VectorCastL2X && bt == T_FLOAT) || (opcode == Op_CountLeadingZerosV && bt == T_LONG) || (opcode == Op_CountTrailingZerosV && bt == T_LONG) || @@ -189,6 +189,18 @@ source %{ return false; } break; + case Op_AddReductionVI: + case Op_AndReductionV: + case Op_OrReductionV: + case Op_XorReductionV: + case Op_MinReductionV: + case Op_MaxReductionV: + // Reductions with less than 8 bytes vector length are + // not supported. + if (length_in_bytes < 8) { + return false; + } + break; case Op_MulReductionVD: case Op_MulReductionVF: case Op_MulReductionVI: @@ -4244,8 +4256,8 @@ instruct vzeroExtStoX(vReg dst, vReg src) %{ assert(bt == T_INT || bt == T_LONG, "must be"); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // 4S to 4I - __ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes, + // 2S to 2I/2L, 4S to 4I + __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes, $src$$FloatRegister, T_SHORT, /* is_unsigned */ true); } else { assert(UseSVE > 0, "must be sve"); @@ -4265,11 +4277,11 @@ instruct vzeroExtItoX(vReg dst, vReg src) %{ uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { // 2I to 2L - __ neon_vector_extend($dst$$FloatRegister, T_LONG, length_in_bytes, + __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes, $src$$FloatRegister, T_INT, /* is_unsigned */ true); } else { assert(UseSVE > 0, "must be sve"); - __ sve_vector_extend($dst$$FloatRegister, __ D, + __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt), $src$$FloatRegister, __ S, /* is_unsigned */ true); } %} @@ -4343,11 +4355,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{ BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // 4S to 4I/4F - __ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes, - $src$$FloatRegister, T_SHORT); - if (bt == T_FLOAT) { - __ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister); + if (is_floating_point_type(bt)) { + // 2S to 2F/2D, 4S to 4F + __ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG, + length_in_bytes, $src$$FloatRegister, T_SHORT); + __ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister); + } else { + // 2S to 2I/2L, 4S to 4I + __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes, + $src$$FloatRegister, T_SHORT); } } else { assert(UseSVE > 0, "must be sve"); @@ -4371,7 +4387,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{ effect(TEMP_DEF dst); format %{ "vcvtItoX_narrow_neon $dst, $src" %} ins_encode %{ - // 4I to 4B/4S + // 2I to 2S, 4I to 4B/4S BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); __ neon_vector_narrow($dst$$FloatRegister, bt, @@ -4434,28 +4450,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{ // VectorCastL2X -instruct vcvtLtoI_neon(vReg dst, vReg src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT && +instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{ + predicate((Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_SHORT) && VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))); match(Set dst (VectorCastL2X src)); - format %{ "vcvtLtoI_neon $dst, $src" %} + format %{ "vcvtLtoX_narrow_neon $dst, $src" %} ins_encode %{ - // 2L to 2I + // 2L to 2S/2I + BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); - __ neon_vector_narrow($dst$$FloatRegister, T_INT, + __ neon_vector_narrow($dst$$FloatRegister, bt, $src$$FloatRegister, T_LONG, length_in_bytes); %} ins_pipe(pipe_slow); %} -instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{ - predicate((Matcher::vector_element_basic_type(n) == T_INT && - !VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{ + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) && + !is_floating_point_type(Matcher::vector_element_basic_type(n)) && + type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); match(Set dst (VectorCastL2X src)); effect(TEMP_DEF dst, TEMP tmp); - format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %} + format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %} ins_encode %{ assert(UseSVE > 0, "must be sve"); BasicType bt = Matcher::vector_element_basic_type(this); @@ -4521,10 +4538,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{ effect(TEMP_DEF dst); format %{ "vcvtFtoX_narrow_neon $dst, $src" %} ins_encode %{ - // 4F to 4B/4S + // 2F to 2S, 4F to 4B/4S BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); - __ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister); + __ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S, + $src$$FloatRegister); __ neon_vector_narrow($dst$$FloatRegister, bt, $dst$$FloatRegister, T_INT, length_in_bytes); %} @@ -4590,12 +4608,14 @@ instruct vcvtFtoX(vReg dst, vReg src) %{ // VectorCastD2X instruct vcvtDtoI_neon(vReg dst, vReg src) %{ - predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT); + predicate(UseSVE == 0 && + (Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_SHORT)); match(Set dst (VectorCastD2X src)); effect(TEMP_DEF dst); - format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %} + format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/2I" %} ins_encode %{ - // 2D to 2I + // 2D to 2S/2I __ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1); // We can't use fcvtzs(vector, integer) instruction here because we need // saturation arithmetic. See JDK-8276151. @@ -4603,6 +4623,10 @@ instruct vcvtDtoI_neon(vReg dst, vReg src) %{ __ fcvtzdw(rscratch2, $dst$$FloatRegister); __ fmovs($dst$$FloatRegister, rscratch1); __ mov($dst$$FloatRegister, __ S, 1, rscratch2); + if (Matcher::vector_element_basic_type(this) == T_SHORT) { + __ neon_vector_narrow($dst$$FloatRegister, T_SHORT, + $dst$$FloatRegister, T_INT, 8); + } %} ins_pipe(pipe_slow); %} @@ -4676,7 +4700,7 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{ ins_encode %{ uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // 4HF to 4F + // 2HF to 2F, 4HF to 4F __ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H); } else { assert(UseSVE > 0, "must be sve"); @@ -4692,9 +4716,9 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{ instruct vcvtFtoHF_neon(vReg dst, vReg src) %{ predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))); match(Set dst (VectorCastF2HF src)); - format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %} + format %{ "vcvtFtoHF_neon $dst, $src\t# 2F/4F to 2HF/4HF" %} ins_encode %{ - // 4F to 4HF + // 2F to 2HF, 4F to 4HF __ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S); %} ins_pipe(pipe_slow); @@ -6396,14 +6420,12 @@ instruct vpopcountI(vReg dst, vReg src) %{ } else { assert(bt == T_SHORT || bt == T_INT, "unsupported"); if (UseSVE == 0) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); - __ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $src$$FloatRegister); - __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $dst$$FloatRegister); + assert(length_in_bytes <= 16, "unsupported"); + bool isQ = length_in_bytes == 16; + __ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister); + __ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister); if (bt == T_INT) { - __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H, - $dst$$FloatRegister); + __ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister); } } else { __ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt), @@ -6465,7 +6487,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{ format %{ "vblend_neon $dst, $src1, $src2" %} ins_encode %{ uint length_in_bytes = Matcher::vector_length_in_bytes(this); - assert(length_in_bytes == 8 || length_in_bytes == 16, "must be"); + assert(length_in_bytes <= 16, "must be"); __ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src2$$FloatRegister, $src1$$FloatRegister); %} @@ -6852,7 +6874,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{ } else { assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type"); if (UseSVE == 0) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); + assert(length_in_bytes <= 16, "unsupported"); __ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister, bt, /* isQ */ length_in_bytes == 16); if (bt != T_LONG) { @@ -6911,7 +6933,7 @@ instruct vreverse(vReg dst, vReg src) %{ } else { assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type"); if (UseSVE == 0) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); + assert(length_in_bytes <= 16, "unsupported"); __ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister, bt, /* isQ */ length_in_bytes == 16); } else { @@ -6947,7 +6969,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{ BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); + assert(length_in_bytes <= 16, "unsupported"); if (bt == T_BYTE) { if ($dst$$FloatRegister != $src$$FloatRegister) { __ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index cc07e0e4076..efefbf692bd 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -121,7 +121,7 @@ source %{ // These operations are not profitable to be vectorized on NEON, because no direct // NEON instructions support them. But the match rule support for them is profitable for // Vector API intrinsics. - if ((opcode == Op_VectorCastD2X && bt == T_INT) || + if ((opcode == Op_VectorCastD2X && (bt == T_INT || bt == T_SHORT)) || (opcode == Op_VectorCastL2X && bt == T_FLOAT) || (opcode == Op_CountLeadingZerosV && bt == T_LONG) || (opcode == Op_CountTrailingZerosV && bt == T_LONG) || @@ -179,6 +179,18 @@ source %{ return false; } break; + case Op_AddReductionVI: + case Op_AndReductionV: + case Op_OrReductionV: + case Op_XorReductionV: + case Op_MinReductionV: + case Op_MaxReductionV: + // Reductions with less than 8 bytes vector length are + // not supported. + if (length_in_bytes < 8) { + return false; + } + break; case Op_MulReductionVD: case Op_MulReductionVF: case Op_MulReductionVI: @@ -2502,31 +2514,31 @@ instruct reinterpret_resize_gt128b(vReg dst, vReg src, pReg ptmp, rFlagsReg cr) %} // ---------------------------- Vector zero extend -------------------------------- -dnl VECTOR_ZERO_EXTEND($1, $2, $3, $4, $5 $6, $7, ) -dnl VECTOR_ZERO_EXTEND(op_name, dst_bt, src_bt, dst_size, src_size, assertion, neon_comment) +dnl VECTOR_ZERO_EXTEND($1, $2, $3, $4, $5, ) +dnl VECTOR_ZERO_EXTEND(op_name, src_bt, src_size, assertion, neon_comment) define(`VECTOR_ZERO_EXTEND', ` instruct vzeroExt$1toX(vReg dst, vReg src) %{ match(Set dst (VectorUCast`$1'2X src)); format %{ "vzeroExt$1toX $dst, $src" %} ins_encode %{ BasicType bt = Matcher::vector_element_basic_type(this); - assert($6, "must be"); + assert($4, "must be"); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // $7 - __ neon_vector_extend($dst$$FloatRegister, $2, length_in_bytes, - $src$$FloatRegister, $3, /* is_unsigned */ true); + // $5 + __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes, + $src$$FloatRegister, $2, /* is_unsigned */ true); } else { assert(UseSVE > 0, "must be sve"); - __ sve_vector_extend($dst$$FloatRegister, __ $4, - $src$$FloatRegister, __ $5, /* is_unsigned */ true); + __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt), + $src$$FloatRegister, __ $3, /* is_unsigned */ true); } %} ins_pipe(pipe_slow); %}')dnl -VECTOR_ZERO_EXTEND(B, bt, T_BYTE, elemType_to_regVariant(bt), B, bt == T_SHORT || bt == T_INT || bt == T_LONG, `4B to 4S/4I, 8B to 8S') -VECTOR_ZERO_EXTEND(S, T_INT, T_SHORT, elemType_to_regVariant(bt), H, bt == T_INT || bt == T_LONG, `4S to 4I') -VECTOR_ZERO_EXTEND(I, T_LONG, T_INT, D, S, bt == T_LONG, `2I to 2L') +VECTOR_ZERO_EXTEND(B, T_BYTE, B, bt == T_SHORT || bt == T_INT || bt == T_LONG, `4B to 4S/4I, 8B to 8S') +VECTOR_ZERO_EXTEND(S, T_SHORT, H, bt == T_INT || bt == T_LONG, `2S to 2I/2L, 4S to 4I') +VECTOR_ZERO_EXTEND(I, T_INT, S, bt == T_LONG, `2I to 2L') // ------------------------------ Vector cast ---------------------------------- @@ -2595,11 +2607,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{ BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // 4S to 4I/4F - __ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes, - $src$$FloatRegister, T_SHORT); - if (bt == T_FLOAT) { - __ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister); + if (is_floating_point_type(bt)) { + // 2S to 2F/2D, 4S to 4F + __ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG, + length_in_bytes, $src$$FloatRegister, T_SHORT); + __ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister); + } else { + // 2S to 2I/2L, 4S to 4I + __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes, + $src$$FloatRegister, T_SHORT); } } else { assert(UseSVE > 0, "must be sve"); @@ -2623,7 +2639,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{ effect(TEMP_DEF dst); format %{ "vcvtItoX_narrow_neon $dst, $src" %} ins_encode %{ - // 4I to 4B/4S + // 2I to 2S, 4I to 4B/4S BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); __ neon_vector_narrow($dst$$FloatRegister, bt, @@ -2686,28 +2702,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{ // VectorCastL2X -instruct vcvtLtoI_neon(vReg dst, vReg src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT && +instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{ + predicate((Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_SHORT) && VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))); match(Set dst (VectorCastL2X src)); - format %{ "vcvtLtoI_neon $dst, $src" %} + format %{ "vcvtLtoX_narrow_neon $dst, $src" %} ins_encode %{ - // 2L to 2I + // 2L to 2S/2I + BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); - __ neon_vector_narrow($dst$$FloatRegister, T_INT, + __ neon_vector_narrow($dst$$FloatRegister, bt, $src$$FloatRegister, T_LONG, length_in_bytes); %} ins_pipe(pipe_slow); %} -instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{ - predicate((Matcher::vector_element_basic_type(n) == T_INT && - !VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) || - Matcher::vector_element_basic_type(n) == T_BYTE || - Matcher::vector_element_basic_type(n) == T_SHORT); +instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{ + predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) && + !is_floating_point_type(Matcher::vector_element_basic_type(n)) && + type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); match(Set dst (VectorCastL2X src)); effect(TEMP_DEF dst, TEMP tmp); - format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %} + format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %} ins_encode %{ assert(UseSVE > 0, "must be sve"); BasicType bt = Matcher::vector_element_basic_type(this); @@ -2773,10 +2790,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{ effect(TEMP_DEF dst); format %{ "vcvtFtoX_narrow_neon $dst, $src" %} ins_encode %{ - // 4F to 4B/4S + // 2F to 2S, 4F to 4B/4S BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src); - __ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister); + __ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S, + $src$$FloatRegister); __ neon_vector_narrow($dst$$FloatRegister, bt, $dst$$FloatRegister, T_INT, length_in_bytes); %} @@ -2842,12 +2860,14 @@ instruct vcvtFtoX(vReg dst, vReg src) %{ // VectorCastD2X instruct vcvtDtoI_neon(vReg dst, vReg src) %{ - predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT); + predicate(UseSVE == 0 && + (Matcher::vector_element_basic_type(n) == T_INT || + Matcher::vector_element_basic_type(n) == T_SHORT)); match(Set dst (VectorCastD2X src)); effect(TEMP_DEF dst); - format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %} + format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/2I" %} ins_encode %{ - // 2D to 2I + // 2D to 2S/2I __ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1); // We can't use fcvtzs(vector, integer) instruction here because we need // saturation arithmetic. See JDK-8276151. @@ -2855,6 +2875,10 @@ instruct vcvtDtoI_neon(vReg dst, vReg src) %{ __ fcvtzdw(rscratch2, $dst$$FloatRegister); __ fmovs($dst$$FloatRegister, rscratch1); __ mov($dst$$FloatRegister, __ S, 1, rscratch2); + if (Matcher::vector_element_basic_type(this) == T_SHORT) { + __ neon_vector_narrow($dst$$FloatRegister, T_SHORT, + $dst$$FloatRegister, T_INT, 8); + } %} ins_pipe(pipe_slow); %} @@ -2928,7 +2952,7 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{ ins_encode %{ uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - // 4HF to 4F + // 2HF to 2F, 4HF to 4F __ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H); } else { assert(UseSVE > 0, "must be sve"); @@ -2944,9 +2968,9 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{ instruct vcvtFtoHF_neon(vReg dst, vReg src) %{ predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))); match(Set dst (VectorCastF2HF src)); - format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %} + format %{ "vcvtFtoHF_neon $dst, $src\t# 2F/4F to 2HF/4HF" %} ins_encode %{ - // 4F to 4HF + // 2F to 2HF, 4F to 4HF __ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S); %} ins_pipe(pipe_slow); @@ -4417,14 +4441,12 @@ instruct vpopcountI(vReg dst, vReg src) %{ } else { assert(bt == T_SHORT || bt == T_INT, "unsupported"); if (UseSVE == 0) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); - __ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $src$$FloatRegister); - __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, - $dst$$FloatRegister); + assert(length_in_bytes <= 16, "unsupported"); + bool isQ = length_in_bytes == 16; + __ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister); + __ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister); if (bt == T_INT) { - __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H, - $dst$$FloatRegister); + __ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister); } } else { __ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt), @@ -4475,7 +4497,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{ format %{ "vblend_neon $dst, $src1, $src2" %} ins_encode %{ uint length_in_bytes = Matcher::vector_length_in_bytes(this); - assert(length_in_bytes == 8 || length_in_bytes == 16, "must be"); + assert(length_in_bytes <= 16, "must be"); __ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, $src2$$FloatRegister, $src1$$FloatRegister); %} @@ -4851,7 +4873,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{ } else { assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type"); if (UseSVE == 0) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); + assert(length_in_bytes <= 16, "unsupported"); __ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister, bt, /* isQ */ length_in_bytes == 16); if (bt != T_LONG) { @@ -4910,7 +4932,7 @@ instruct vreverse(vReg dst, vReg src) %{ } else { assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type"); if (UseSVE == 0) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); + assert(length_in_bytes <= 16, "unsupported"); __ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister, bt, /* isQ */ length_in_bytes == 16); } else { @@ -4935,7 +4957,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{ BasicType bt = Matcher::vector_element_basic_type(this); uint length_in_bytes = Matcher::vector_length_in_bytes(this); if (VM_Version::use_neon_for_vector(length_in_bytes)) { - assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported"); + assert(length_in_bytes <= 16, "unsupported"); if (bt == T_BYTE) { if ($dst$$FloatRegister != $src$$FloatRegister) { __ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B, diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 914967e4009..a4ecd56af08 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -1778,19 +1778,21 @@ void C2_MacroAssembler::sve_vmask_lasttrue(Register dst, BasicType bt, PRegister void C2_MacroAssembler::neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes, FloatRegister src, BasicType src_bt, bool is_unsigned) { if (src_bt == T_BYTE) { - if (dst_bt == T_SHORT) { - // 4B/8B to 4S/8S - _xshll(is_unsigned, dst, T8H, src, T8B, 0); - } else { - // 4B to 4I - assert(dst_vlen_in_bytes == 16 && dst_bt == T_INT, "unsupported"); - _xshll(is_unsigned, dst, T8H, src, T8B, 0); + // 4B to 4S/4I, 8B to 8S + assert(dst_vlen_in_bytes == 8 || dst_vlen_in_bytes == 16, "unsupported"); + assert(dst_bt == T_SHORT || dst_bt == T_INT, "unsupported"); + _xshll(is_unsigned, dst, T8H, src, T8B, 0); + if (dst_bt == T_INT) { _xshll(is_unsigned, dst, T4S, dst, T4H, 0); } } else if (src_bt == T_SHORT) { - // 4S to 4I - assert(dst_vlen_in_bytes == 16 && dst_bt == T_INT, "unsupported"); + // 2S to 2I/2L, 4S to 4I + assert(dst_vlen_in_bytes == 8 || dst_vlen_in_bytes == 16, "unsupported"); + assert(dst_bt == T_INT || dst_bt == T_LONG, "unsupported"); _xshll(is_unsigned, dst, T4S, src, T4H, 0); + if (dst_bt == T_LONG) { + _xshll(is_unsigned, dst, T2D, dst, T2S, 0); + } } else if (src_bt == T_INT) { // 2I to 2L assert(dst_vlen_in_bytes == 16 && dst_bt == T_LONG, "unsupported"); @@ -1810,18 +1812,21 @@ void C2_MacroAssembler::neon_vector_narrow(FloatRegister dst, BasicType dst_bt, assert(dst_bt == T_BYTE, "unsupported"); xtn(dst, T8B, src, T8H); } else if (src_bt == T_INT) { - // 4I to 4B/4S - assert(src_vlen_in_bytes == 16, "unsupported"); + // 2I to 2S, 4I to 4B/4S + assert(src_vlen_in_bytes == 8 || src_vlen_in_bytes == 16, "unsupported"); assert(dst_bt == T_BYTE || dst_bt == T_SHORT, "unsupported"); xtn(dst, T4H, src, T4S); if (dst_bt == T_BYTE) { xtn(dst, T8B, dst, T8H); } } else if (src_bt == T_LONG) { - // 2L to 2I + // 2L to 2S/2I assert(src_vlen_in_bytes == 16, "unsupported"); - assert(dst_bt == T_INT, "unsupported"); + assert(dst_bt == T_INT || dst_bt == T_SHORT, "unsupported"); xtn(dst, T2S, src, T2D); + if (dst_bt == T_SHORT) { + xtn(dst, T4H, dst, T4S); + } } else { ShouldNotReachHere(); } diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java index cfa19ce385a..24a8581434f 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -597,8 +597,7 @@ public class TestDependencyOffsets { case "byte" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 4 )}; case "char" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4, 4 ), new CPUMinVectorWidth(ASIMD, 8 )}; - case "short" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4, 4 ), - new CPUMinVectorWidth(ASIMD, 8 )}; + case "short" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 4 )}; case "int" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 8 )}; case "long" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 16)}; case "float" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 8 )}; diff --git a/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java b/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java index fac829c82e4..5a4271cc5b0 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java +++ b/test/hotspot/jtreg/compiler/vectorapi/reshape/utils/TestCastMethods.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -649,18 +649,25 @@ public class TestCastMethods { makePair(SSPEC128, BSPEC64), makePair(SSPEC256, BSPEC128), makePair(SSPEC512, BSPEC256), + makePair(SSPEC64, ISPEC64), makePair(SSPEC64, ISPEC128), makePair(SSPEC128, ISPEC256), makePair(SSPEC256, ISPEC512), + makePair(SSPEC64, LSPEC128), makePair(SSPEC64, LSPEC256), + makePair(SSPEC128, LSPEC128), makePair(SSPEC128, LSPEC512), + makePair(SSPEC64, FSPEC64), makePair(SSPEC64, FSPEC128), makePair(SSPEC128, FSPEC256), makePair(SSPEC256, FSPEC512), + makePair(SSPEC64, DSPEC128), makePair(SSPEC64, DSPEC256), + makePair(SSPEC128, DSPEC128), makePair(SSPEC128, DSPEC512), makePair(ISPEC256, BSPEC64), makePair(ISPEC512, BSPEC128), + makePair(ISPEC64, SSPEC64), makePair(ISPEC128, SSPEC64), makePair(ISPEC256, SSPEC128), makePair(ISPEC512, SSPEC256), @@ -675,7 +682,9 @@ public class TestCastMethods { makePair(ISPEC128, DSPEC256), makePair(ISPEC256, DSPEC512), makePair(LSPEC512, BSPEC64), + makePair(LSPEC128, SSPEC64), makePair(LSPEC256, SSPEC64), + makePair(LSPEC128, SSPEC128), makePair(LSPEC512, SSPEC128), makePair(LSPEC128, ISPEC64), makePair(LSPEC256, ISPEC128), @@ -688,6 +697,7 @@ public class TestCastMethods { makePair(LSPEC512, DSPEC512), makePair(FSPEC256, BSPEC64), makePair(FSPEC512, BSPEC128), + makePair(FSPEC64, SSPEC64), makePair(FSPEC128, SSPEC64), makePair(FSPEC256, SSPEC128), makePair(FSPEC512, SSPEC256), @@ -702,7 +712,9 @@ public class TestCastMethods { makePair(FSPEC128, DSPEC256), makePair(FSPEC256, DSPEC512), makePair(DSPEC512, BSPEC64), + makePair(DSPEC128, SSPEC64), makePair(DSPEC256, SSPEC64), + makePair(DSPEC128, SSPEC128), makePair(DSPEC512, SSPEC128), makePair(DSPEC128, ISPEC64), makePair(DSPEC256, ISPEC128), @@ -751,14 +763,17 @@ public class TestCastMethods { makePair(BSPEC512, LSPEC256, true), makePair(BSPEC512, LSPEC512, true), + makePair(SSPEC64, ISPEC64, true), makePair(SSPEC64, ISPEC128, true), makePair(SSPEC64, ISPEC256, true), makePair(SSPEC64, ISPEC512, true), + makePair(SSPEC64, LSPEC128, true), makePair(SSPEC64, LSPEC256, true), makePair(SSPEC64, LSPEC512, true), makePair(SSPEC128, ISPEC128, true), makePair(SSPEC128, ISPEC256, true), makePair(SSPEC128, ISPEC512, true), + makePair(SSPEC128, LSPEC128, true), makePair(SSPEC128, LSPEC256, true), makePair(SSPEC128, LSPEC512, true), makePair(SSPEC256, ISPEC128, true), @@ -789,23 +804,35 @@ public class TestCastMethods { makePair(BSPEC64, FSPEC128), makePair(SSPEC64, BSPEC64), makePair(SSPEC128, BSPEC64), + makePair(SSPEC64, ISPEC64), makePair(SSPEC64, ISPEC128), + makePair(SSPEC64, LSPEC128), + makePair(SSPEC128, LSPEC128), + makePair(SSPEC64, FSPEC64), makePair(SSPEC64, FSPEC128), + makePair(SSPEC64, DSPEC128), + makePair(SSPEC128, DSPEC128), makePair(ISPEC128, BSPEC64), makePair(ISPEC128, SSPEC64), - makePair(ISPEC64, LSPEC128), + makePair(ISPEC64, SSPEC64), + makePair(ISPEC64, LSPEC128), makePair(ISPEC64, FSPEC64), makePair(ISPEC128, FSPEC128), makePair(ISPEC64, DSPEC128), + makePair(LSPEC128, SSPEC64), + makePair(LSPEC128, SSPEC128), makePair(LSPEC128, ISPEC64), makePair(LSPEC128, FSPEC64), makePair(LSPEC128, DSPEC128), makePair(FSPEC128, BSPEC64), + makePair(FSPEC64, SSPEC64), makePair(FSPEC128, SSPEC64), makePair(FSPEC64, ISPEC64), makePair(FSPEC128, ISPEC128), makePair(FSPEC64, LSPEC128), makePair(FSPEC64, DSPEC128), + makePair(DSPEC128, SSPEC64), + makePair(DSPEC128, SSPEC128), makePair(DSPEC128, ISPEC64), makePair(DSPEC128, LSPEC128), makePair(DSPEC128, FSPEC64), @@ -816,8 +843,11 @@ public class TestCastMethods { makePair(BSPEC128, SSPEC64, true), makePair(BSPEC128, SSPEC128, true), makePair(BSPEC128, ISPEC128, true), + makePair(SSPEC64, ISPEC64, true), makePair(SSPEC64, ISPEC128, true), + makePair(SSPEC64, LSPEC128, true), makePair(SSPEC128, ISPEC128, true), + makePair(SSPEC128, LSPEC128, true), makePair(ISPEC64, LSPEC128, true) ); } diff --git a/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java b/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java index f777206bab5..482dcf934c5 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestFloatConversionsVector.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -84,10 +84,13 @@ public class TestFloatConversionsVector { } @Test + @IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_2, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, + applyIfCPUFeature = {"asimd", "true"}) public void test_float_float16_short_vector(short[] sout, float[] finp) { - for (int i = 0; i < finp.length; i+= 4) { - sout[i+0] = Float.floatToFloat16(finp[i+0]); - sout[i+1] = Float.floatToFloat16(finp[i+1]); + for (int i = 0; i < finp.length; i += 4) { + sout[i] = Float.floatToFloat16(finp[i]); + sout[i + 1] = Float.floatToFloat16(finp[i + 1]); } } @@ -124,8 +127,9 @@ public class TestFloatConversionsVector { } // Verifying the result - for (int i = 0; i < ARRLEN; i++) { + for (int i = 0; i < ARRLEN; i += 4) { Asserts.assertEquals(Float.floatToFloat16(finp[i]), sout[i]); + Asserts.assertEquals(Float.floatToFloat16(finp[i + 1]), sout[i + 1]); } } @@ -152,7 +156,19 @@ public class TestFloatConversionsVector { } } - @Run(test = {"test_float16_float", "test_float16_float_strided"}, mode = RunMode.STANDALONE) + @Test + @IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_2, "> 0"}, + applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, + applyIfCPUFeature = {"asimd", "true"}) + public void test_float16_float_short_vector(float[] fout, short[] sinp) { + for (int i = 0; i < sinp.length; i += 4) { + fout[i] = Float.float16ToFloat(sinp[i]); + fout[i + 1] = Float.float16ToFloat(sinp[i + 1]); + } + } + + @Run(test = {"test_float16_float", "test_float16_float_strided", + "test_float16_float_short_vector"}, mode = RunMode.STANDALONE) public void kernel_test_float16_float() { sinp = new short[ARRLEN]; fout = new float[ARRLEN]; @@ -178,5 +194,15 @@ public class TestFloatConversionsVector { for (int i = 0; i < ARRLEN/2; i++) { Asserts.assertEquals(Float.float16ToFloat(sinp[i*2]), fout[i*2]); } + + for (int i = 0; i < ITERS; i++) { + test_float16_float_short_vector(fout, sinp); + } + + // Verifying the result + for (int i = 0; i < ARRLEN; i += 4) { + Asserts.assertEquals(Float.float16ToFloat(sinp[i]), fout[i]); + Asserts.assertEquals(Float.float16ToFloat(sinp[i + 1]), fout[i + 1]); + } } } diff --git a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java index 11b07d57dd9..3fa636b42f7 100644 --- a/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java +++ b/test/hotspot/jtreg/compiler/vectorization/runner/ArrayTypeConvertTest.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. - * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -252,9 +252,12 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test - @IR(applyIfCPUFeatureOr = {"sve", "true", "avx2", "true", "rvv", "true"}, + @IR(applyIfCPUFeature = {"rvv", "true"}, applyIf = {"MaxVectorSize", ">=32"}, counts = {IRNode.VECTOR_CAST_S2D, IRNode.VECTOR_SIZE + "min(max_short, max_double)", ">0"}) + @IR(applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"}, + applyIf = {"MaxVectorSize", ">=16"}, + counts = {IRNode.VECTOR_CAST_S2D, IRNode.VECTOR_SIZE + "min(max_short, max_double)", ">0"}) public double[] convertShortToDouble() { double[] res = new double[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -374,9 +377,12 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test - @IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true", "rvv", "true"}, + @IR(applyIfCPUFeature = {"rvv", "true"}, applyIf = {"MaxVectorSize", ">=32"}, counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_short)", ">0"}) + @IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true"}, + applyIf = {"MaxVectorSize", ">=16"}, + counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_short)", ">0"}) public short[] convertDoubleToShort() { short[] res = new short[SIZE]; for (int i = 0; i < SIZE; i++) { @@ -386,9 +392,12 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner { } @Test - @IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true", "rvv", "true"}, + @IR(applyIfCPUFeature = {"rvv", "true"}, applyIf = {"MaxVectorSize", ">=32"}, counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_char)", ">0"}) + @IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true"}, + applyIf = {"MaxVectorSize", ">=16"}, + counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_char)", ">0"}) public char[] convertDoubleToChar() { char[] res = new char[SIZE]; for (int i = 0; i < SIZE; i++) { diff --git a/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorFPtoIntCastOperations.java b/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorFPtoIntCastOperations.java index 6c3f004dcd9..6e4a57b79e5 100644 --- a/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorFPtoIntCastOperations.java +++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/VectorFPtoIntCastOperations.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -120,6 +120,18 @@ public class VectorFPtoIntCastOperations { } } + @Benchmark + public void microFloat64ToShort64() { + VectorSpecies ISPECIES = FloatVector.SPECIES_64; + VectorSpecies OSPECIES = ShortVector.SPECIES_64; + for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) { + FloatVector.fromArray(ISPECIES, float_arr, i) + .convertShape(VectorOperators.F2S, OSPECIES, 0) + .reinterpretAsShorts() + .intoArray(short_res, j); + } + } + @Benchmark public void microFloat128ToShort128() { VectorSpecies ISPECIES = FloatVector.SPECIES_128; diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorTwoShorts.java b/test/micro/org/openjdk/bench/vm/compiler/VectorTwoShorts.java new file mode 100644 index 00000000000..445f67552ab --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorTwoShorts.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; + +import java.util.concurrent.TimeUnit; +import java.util.Random; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS) +@Fork(value = 3) +public class VectorTwoShorts { + @Param({"64", "128", "512", "1024"}) + public int LEN; + + private short[] sA; + private short[] sB; + private short[] sC; + + @Param("0") + private int seed; + private Random r = new Random(seed); + + @Setup + public void init() { + sA = new short[LEN]; + sB = new short[LEN]; + sC = new short[LEN]; + + for (int i = 0; i < LEN; i++) { + sA[i] = (short) r.nextInt(); + sB[i] = (short) r.nextInt(); + } + } + + @Benchmark + public void addVec2S() { + for (int i = 0; i < LEN - 3; i++) { + sC[i + 3] = (short) (sA[i] + sB[i]); + } + } + + @Benchmark + public void mulVec2S() { + for (int i = 0; i < LEN - 3; i++) { + sC[i + 3] = (short) (sA[i] * sB[i]); + } + } + + @Benchmark + public void reverseBytesVec2S() { + for (int i = 0; i < LEN - 3; i++) { + sC[i + 3] = (short) Short.reverseBytes(sA[i]); + } + } +} \ No newline at end of file