mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 12:09:14 +00:00
8359419: AArch64: Relax min vector length to 32-bit for short vectors
Reviewed-by: aph, fgao, bkilambi, dlunden
This commit is contained in:
parent
ed70910b0f
commit
ac141c2fa1
@ -2362,17 +2362,34 @@ int Matcher::max_vector_size(const BasicType bt) {
|
||||
}
|
||||
|
||||
int Matcher::min_vector_size(const BasicType bt) {
|
||||
int max_size = max_vector_size(bt);
|
||||
// Limit the min vector size to 8 bytes.
|
||||
int size = 8 / type2aelembytes(bt);
|
||||
if (bt == T_BYTE) {
|
||||
// To support vector api shuffle/rearrange.
|
||||
size = 4;
|
||||
} else if (bt == T_BOOLEAN) {
|
||||
// To support vector api load/store mask.
|
||||
size = 2;
|
||||
// Usually, the shortest vector length supported by AArch64 ISA and
|
||||
// Vector API species is 64 bits. However, we allow 32-bit or 16-bit
|
||||
// vectors in a few special cases.
|
||||
int size;
|
||||
switch(bt) {
|
||||
case T_BOOLEAN:
|
||||
// Load/store a vector mask with only 2 elements for vector types
|
||||
// such as "2I/2F/2L/2D".
|
||||
size = 2;
|
||||
break;
|
||||
case T_BYTE:
|
||||
// Generate a "4B" vector, to support vector cast between "8B/16B"
|
||||
// and "4S/4I/4L/4F/4D".
|
||||
size = 4;
|
||||
break;
|
||||
case T_SHORT:
|
||||
// Generate a "2S" vector, to support vector cast between "4S/8S"
|
||||
// and "2I/2L/2F/2D".
|
||||
size = 2;
|
||||
break;
|
||||
default:
|
||||
// Limit the min vector length to 64-bit.
|
||||
size = 8 / type2aelembytes(bt);
|
||||
// The number of elements in a vector should be at least 2.
|
||||
size = MAX2(size, 2);
|
||||
}
|
||||
if (size < 2) size = 2;
|
||||
|
||||
int max_size = max_vector_size(bt);
|
||||
return MIN2(size, max_size);
|
||||
}
|
||||
|
||||
|
||||
@ -131,7 +131,7 @@ source %{
|
||||
// These operations are not profitable to be vectorized on NEON, because no direct
|
||||
// NEON instructions support them. But the match rule support for them is profitable for
|
||||
// Vector API intrinsics.
|
||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||
if ((opcode == Op_VectorCastD2X && (bt == T_INT || bt == T_SHORT)) ||
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
|
||||
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
|
||||
@ -189,6 +189,18 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_AddReductionVI:
|
||||
case Op_AndReductionV:
|
||||
case Op_OrReductionV:
|
||||
case Op_XorReductionV:
|
||||
case Op_MinReductionV:
|
||||
case Op_MaxReductionV:
|
||||
// Reductions with less than 8 bytes vector length are
|
||||
// not supported.
|
||||
if (length_in_bytes < 8) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_MulReductionVD:
|
||||
case Op_MulReductionVF:
|
||||
case Op_MulReductionVI:
|
||||
@ -4244,8 +4256,8 @@ instruct vzeroExtStoX(vReg dst, vReg src) %{
|
||||
assert(bt == T_INT || bt == T_LONG, "must be");
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 4S to 4I
|
||||
__ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
|
||||
// 2S to 2I/2L, 4S to 4I
|
||||
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
|
||||
$src$$FloatRegister, T_SHORT, /* is_unsigned */ true);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
@ -4265,11 +4277,11 @@ instruct vzeroExtItoX(vReg dst, vReg src) %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 2I to 2L
|
||||
__ neon_vector_extend($dst$$FloatRegister, T_LONG, length_in_bytes,
|
||||
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
|
||||
$src$$FloatRegister, T_INT, /* is_unsigned */ true);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_vector_extend($dst$$FloatRegister, __ D,
|
||||
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$src$$FloatRegister, __ S, /* is_unsigned */ true);
|
||||
}
|
||||
%}
|
||||
@ -4343,11 +4355,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 4S to 4I/4F
|
||||
__ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
|
||||
$src$$FloatRegister, T_SHORT);
|
||||
if (bt == T_FLOAT) {
|
||||
__ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister);
|
||||
if (is_floating_point_type(bt)) {
|
||||
// 2S to 2F/2D, 4S to 4F
|
||||
__ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG,
|
||||
length_in_bytes, $src$$FloatRegister, T_SHORT);
|
||||
__ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister);
|
||||
} else {
|
||||
// 2S to 2I/2L, 4S to 4I
|
||||
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
|
||||
$src$$FloatRegister, T_SHORT);
|
||||
}
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
@ -4371,7 +4387,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcvtItoX_narrow_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// 4I to 4B/4S
|
||||
// 2I to 2S, 4I to 4B/4S
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
__ neon_vector_narrow($dst$$FloatRegister, bt,
|
||||
@ -4434,28 +4450,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{
|
||||
|
||||
// VectorCastL2X
|
||||
|
||||
instruct vcvtLtoI_neon(vReg dst, vReg src) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_INT &&
|
||||
instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT ||
|
||||
Matcher::vector_element_basic_type(n) == T_SHORT) &&
|
||||
VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastL2X src));
|
||||
format %{ "vcvtLtoI_neon $dst, $src" %}
|
||||
format %{ "vcvtLtoX_narrow_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// 2L to 2I
|
||||
// 2L to 2S/2I
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
__ neon_vector_narrow($dst$$FloatRegister, T_INT,
|
||||
__ neon_vector_narrow($dst$$FloatRegister, bt,
|
||||
$src$$FloatRegister, T_LONG, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT &&
|
||||
!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) ||
|
||||
Matcher::vector_element_basic_type(n) == T_BYTE ||
|
||||
Matcher::vector_element_basic_type(n) == T_SHORT);
|
||||
instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) &&
|
||||
!is_floating_point_type(Matcher::vector_element_basic_type(n)) &&
|
||||
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
|
||||
match(Set dst (VectorCastL2X src));
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %}
|
||||
format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
@ -4521,10 +4538,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcvtFtoX_narrow_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// 4F to 4B/4S
|
||||
// 2F to 2S, 4F to 4B/4S
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
__ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister);
|
||||
__ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S,
|
||||
$src$$FloatRegister);
|
||||
__ neon_vector_narrow($dst$$FloatRegister, bt,
|
||||
$dst$$FloatRegister, T_INT, length_in_bytes);
|
||||
%}
|
||||
@ -4590,12 +4608,14 @@ instruct vcvtFtoX(vReg dst, vReg src) %{
|
||||
// VectorCastD2X
|
||||
|
||||
instruct vcvtDtoI_neon(vReg dst, vReg src) %{
|
||||
predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT);
|
||||
predicate(UseSVE == 0 &&
|
||||
(Matcher::vector_element_basic_type(n) == T_INT ||
|
||||
Matcher::vector_element_basic_type(n) == T_SHORT));
|
||||
match(Set dst (VectorCastD2X src));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %}
|
||||
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/2I" %}
|
||||
ins_encode %{
|
||||
// 2D to 2I
|
||||
// 2D to 2S/2I
|
||||
__ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1);
|
||||
// We can't use fcvtzs(vector, integer) instruction here because we need
|
||||
// saturation arithmetic. See JDK-8276151.
|
||||
@ -4603,6 +4623,10 @@ instruct vcvtDtoI_neon(vReg dst, vReg src) %{
|
||||
__ fcvtzdw(rscratch2, $dst$$FloatRegister);
|
||||
__ fmovs($dst$$FloatRegister, rscratch1);
|
||||
__ mov($dst$$FloatRegister, __ S, 1, rscratch2);
|
||||
if (Matcher::vector_element_basic_type(this) == T_SHORT) {
|
||||
__ neon_vector_narrow($dst$$FloatRegister, T_SHORT,
|
||||
$dst$$FloatRegister, T_INT, 8);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -4676,7 +4700,7 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 4HF to 4F
|
||||
// 2HF to 2F, 4HF to 4F
|
||||
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
@ -4692,9 +4716,9 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
|
||||
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
|
||||
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastF2HF src));
|
||||
format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
|
||||
format %{ "vcvtFtoHF_neon $dst, $src\t# 2F/4F to 2HF/4HF" %}
|
||||
ins_encode %{
|
||||
// 4F to 4HF
|
||||
// 2F to 2HF, 4F to 4HF
|
||||
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -6396,14 +6420,12 @@ instruct vpopcountI(vReg dst, vReg src) %{
|
||||
} else {
|
||||
assert(bt == T_SHORT || bt == T_INT, "unsupported");
|
||||
if (UseSVE == 0) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
__ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$dst$$FloatRegister);
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
bool isQ = length_in_bytes == 16;
|
||||
__ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister);
|
||||
if (bt == T_INT) {
|
||||
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H,
|
||||
$dst$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister);
|
||||
}
|
||||
} else {
|
||||
__ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
@ -6465,7 +6487,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{
|
||||
format %{ "vblend_neon $dst, $src1, $src2" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||
assert(length_in_bytes <= 16, "must be");
|
||||
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
@ -6852,7 +6874,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
|
||||
} else {
|
||||
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
|
||||
if (UseSVE == 0) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
|
||||
bt, /* isQ */ length_in_bytes == 16);
|
||||
if (bt != T_LONG) {
|
||||
@ -6911,7 +6933,7 @@ instruct vreverse(vReg dst, vReg src) %{
|
||||
} else {
|
||||
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
|
||||
if (UseSVE == 0) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
|
||||
bt, /* isQ */ length_in_bytes == 16);
|
||||
} else {
|
||||
@ -6947,7 +6969,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
if (bt == T_BYTE) {
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
|
||||
@ -121,7 +121,7 @@ source %{
|
||||
// These operations are not profitable to be vectorized on NEON, because no direct
|
||||
// NEON instructions support them. But the match rule support for them is profitable for
|
||||
// Vector API intrinsics.
|
||||
if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
|
||||
if ((opcode == Op_VectorCastD2X && (bt == T_INT || bt == T_SHORT)) ||
|
||||
(opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
|
||||
(opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
|
||||
(opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
|
||||
@ -179,6 +179,18 @@ source %{
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_AddReductionVI:
|
||||
case Op_AndReductionV:
|
||||
case Op_OrReductionV:
|
||||
case Op_XorReductionV:
|
||||
case Op_MinReductionV:
|
||||
case Op_MaxReductionV:
|
||||
// Reductions with less than 8 bytes vector length are
|
||||
// not supported.
|
||||
if (length_in_bytes < 8) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case Op_MulReductionVD:
|
||||
case Op_MulReductionVF:
|
||||
case Op_MulReductionVI:
|
||||
@ -2502,31 +2514,31 @@ instruct reinterpret_resize_gt128b(vReg dst, vReg src, pReg ptmp, rFlagsReg cr)
|
||||
%}
|
||||
|
||||
// ---------------------------- Vector zero extend --------------------------------
|
||||
dnl VECTOR_ZERO_EXTEND($1, $2, $3, $4, $5 $6, $7, )
|
||||
dnl VECTOR_ZERO_EXTEND(op_name, dst_bt, src_bt, dst_size, src_size, assertion, neon_comment)
|
||||
dnl VECTOR_ZERO_EXTEND($1, $2, $3, $4, $5, )
|
||||
dnl VECTOR_ZERO_EXTEND(op_name, src_bt, src_size, assertion, neon_comment)
|
||||
define(`VECTOR_ZERO_EXTEND', `
|
||||
instruct vzeroExt$1toX(vReg dst, vReg src) %{
|
||||
match(Set dst (VectorUCast`$1'2X src));
|
||||
format %{ "vzeroExt$1toX $dst, $src" %}
|
||||
ins_encode %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
assert($6, "must be");
|
||||
assert($4, "must be");
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// $7
|
||||
__ neon_vector_extend($dst$$FloatRegister, $2, length_in_bytes,
|
||||
$src$$FloatRegister, $3, /* is_unsigned */ true);
|
||||
// $5
|
||||
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
|
||||
$src$$FloatRegister, $2, /* is_unsigned */ true);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
__ sve_vector_extend($dst$$FloatRegister, __ $4,
|
||||
$src$$FloatRegister, __ $5, /* is_unsigned */ true);
|
||||
__ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
$src$$FloatRegister, __ $3, /* is_unsigned */ true);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
VECTOR_ZERO_EXTEND(B, bt, T_BYTE, elemType_to_regVariant(bt), B, bt == T_SHORT || bt == T_INT || bt == T_LONG, `4B to 4S/4I, 8B to 8S')
|
||||
VECTOR_ZERO_EXTEND(S, T_INT, T_SHORT, elemType_to_regVariant(bt), H, bt == T_INT || bt == T_LONG, `4S to 4I')
|
||||
VECTOR_ZERO_EXTEND(I, T_LONG, T_INT, D, S, bt == T_LONG, `2I to 2L')
|
||||
VECTOR_ZERO_EXTEND(B, T_BYTE, B, bt == T_SHORT || bt == T_INT || bt == T_LONG, `4B to 4S/4I, 8B to 8S')
|
||||
VECTOR_ZERO_EXTEND(S, T_SHORT, H, bt == T_INT || bt == T_LONG, `2S to 2I/2L, 4S to 4I')
|
||||
VECTOR_ZERO_EXTEND(I, T_INT, S, bt == T_LONG, `2I to 2L')
|
||||
|
||||
// ------------------------------ Vector cast ----------------------------------
|
||||
|
||||
@ -2595,11 +2607,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 4S to 4I/4F
|
||||
__ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
|
||||
$src$$FloatRegister, T_SHORT);
|
||||
if (bt == T_FLOAT) {
|
||||
__ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister);
|
||||
if (is_floating_point_type(bt)) {
|
||||
// 2S to 2F/2D, 4S to 4F
|
||||
__ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG,
|
||||
length_in_bytes, $src$$FloatRegister, T_SHORT);
|
||||
__ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister);
|
||||
} else {
|
||||
// 2S to 2I/2L, 4S to 4I
|
||||
__ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
|
||||
$src$$FloatRegister, T_SHORT);
|
||||
}
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
@ -2623,7 +2639,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcvtItoX_narrow_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// 4I to 4B/4S
|
||||
// 2I to 2S, 4I to 4B/4S
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
__ neon_vector_narrow($dst$$FloatRegister, bt,
|
||||
@ -2686,28 +2702,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{
|
||||
|
||||
// VectorCastL2X
|
||||
|
||||
instruct vcvtLtoI_neon(vReg dst, vReg src) %{
|
||||
predicate(Matcher::vector_element_basic_type(n) == T_INT &&
|
||||
instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT ||
|
||||
Matcher::vector_element_basic_type(n) == T_SHORT) &&
|
||||
VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastL2X src));
|
||||
format %{ "vcvtLtoI_neon $dst, $src" %}
|
||||
format %{ "vcvtLtoX_narrow_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// 2L to 2I
|
||||
// 2L to 2S/2I
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
__ neon_vector_narrow($dst$$FloatRegister, T_INT,
|
||||
__ neon_vector_narrow($dst$$FloatRegister, bt,
|
||||
$src$$FloatRegister, T_LONG, length_in_bytes);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate((Matcher::vector_element_basic_type(n) == T_INT &&
|
||||
!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) ||
|
||||
Matcher::vector_element_basic_type(n) == T_BYTE ||
|
||||
Matcher::vector_element_basic_type(n) == T_SHORT);
|
||||
instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{
|
||||
predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) &&
|
||||
!is_floating_point_type(Matcher::vector_element_basic_type(n)) &&
|
||||
type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
|
||||
match(Set dst (VectorCastL2X src));
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %}
|
||||
format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %}
|
||||
ins_encode %{
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
@ -2773,10 +2790,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcvtFtoX_narrow_neon $dst, $src" %}
|
||||
ins_encode %{
|
||||
// 4F to 4B/4S
|
||||
// 2F to 2S, 4F to 4B/4S
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
|
||||
__ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister);
|
||||
__ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S,
|
||||
$src$$FloatRegister);
|
||||
__ neon_vector_narrow($dst$$FloatRegister, bt,
|
||||
$dst$$FloatRegister, T_INT, length_in_bytes);
|
||||
%}
|
||||
@ -2842,12 +2860,14 @@ instruct vcvtFtoX(vReg dst, vReg src) %{
|
||||
// VectorCastD2X
|
||||
|
||||
instruct vcvtDtoI_neon(vReg dst, vReg src) %{
|
||||
predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT);
|
||||
predicate(UseSVE == 0 &&
|
||||
(Matcher::vector_element_basic_type(n) == T_INT ||
|
||||
Matcher::vector_element_basic_type(n) == T_SHORT));
|
||||
match(Set dst (VectorCastD2X src));
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %}
|
||||
format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/2I" %}
|
||||
ins_encode %{
|
||||
// 2D to 2I
|
||||
// 2D to 2S/2I
|
||||
__ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1);
|
||||
// We can't use fcvtzs(vector, integer) instruction here because we need
|
||||
// saturation arithmetic. See JDK-8276151.
|
||||
@ -2855,6 +2875,10 @@ instruct vcvtDtoI_neon(vReg dst, vReg src) %{
|
||||
__ fcvtzdw(rscratch2, $dst$$FloatRegister);
|
||||
__ fmovs($dst$$FloatRegister, rscratch1);
|
||||
__ mov($dst$$FloatRegister, __ S, 1, rscratch2);
|
||||
if (Matcher::vector_element_basic_type(this) == T_SHORT) {
|
||||
__ neon_vector_narrow($dst$$FloatRegister, T_SHORT,
|
||||
$dst$$FloatRegister, T_INT, 8);
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -2928,7 +2952,7 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
// 4HF to 4F
|
||||
// 2HF to 2F, 4HF to 4F
|
||||
__ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
|
||||
} else {
|
||||
assert(UseSVE > 0, "must be sve");
|
||||
@ -2944,9 +2968,9 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
|
||||
instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
|
||||
predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
|
||||
match(Set dst (VectorCastF2HF src));
|
||||
format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
|
||||
format %{ "vcvtFtoHF_neon $dst, $src\t# 2F/4F to 2HF/4HF" %}
|
||||
ins_encode %{
|
||||
// 4F to 4HF
|
||||
// 2F to 2HF, 4F to 4HF
|
||||
__ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
@ -4417,14 +4441,12 @@ instruct vpopcountI(vReg dst, vReg src) %{
|
||||
} else {
|
||||
assert(bt == T_SHORT || bt == T_INT, "unsupported");
|
||||
if (UseSVE == 0) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
__ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$dst$$FloatRegister);
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
bool isQ = length_in_bytes == 16;
|
||||
__ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister);
|
||||
if (bt == T_INT) {
|
||||
__ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H,
|
||||
$dst$$FloatRegister);
|
||||
__ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister);
|
||||
}
|
||||
} else {
|
||||
__ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
|
||||
@ -4475,7 +4497,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{
|
||||
format %{ "vblend_neon $dst, $src1, $src2" %}
|
||||
ins_encode %{
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "must be");
|
||||
assert(length_in_bytes <= 16, "must be");
|
||||
__ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
$src2$$FloatRegister, $src1$$FloatRegister);
|
||||
%}
|
||||
@ -4851,7 +4873,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
|
||||
} else {
|
||||
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
|
||||
if (UseSVE == 0) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
|
||||
bt, /* isQ */ length_in_bytes == 16);
|
||||
if (bt != T_LONG) {
|
||||
@ -4910,7 +4932,7 @@ instruct vreverse(vReg dst, vReg src) %{
|
||||
} else {
|
||||
assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
|
||||
if (UseSVE == 0) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
__ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
|
||||
bt, /* isQ */ length_in_bytes == 16);
|
||||
} else {
|
||||
@ -4935,7 +4957,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{
|
||||
BasicType bt = Matcher::vector_element_basic_type(this);
|
||||
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
|
||||
if (VM_Version::use_neon_for_vector(length_in_bytes)) {
|
||||
assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
|
||||
assert(length_in_bytes <= 16, "unsupported");
|
||||
if (bt == T_BYTE) {
|
||||
if ($dst$$FloatRegister != $src$$FloatRegister) {
|
||||
__ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
|
||||
|
||||
@ -1778,19 +1778,21 @@ void C2_MacroAssembler::sve_vmask_lasttrue(Register dst, BasicType bt, PRegister
|
||||
void C2_MacroAssembler::neon_vector_extend(FloatRegister dst, BasicType dst_bt, unsigned dst_vlen_in_bytes,
|
||||
FloatRegister src, BasicType src_bt, bool is_unsigned) {
|
||||
if (src_bt == T_BYTE) {
|
||||
if (dst_bt == T_SHORT) {
|
||||
// 4B/8B to 4S/8S
|
||||
_xshll(is_unsigned, dst, T8H, src, T8B, 0);
|
||||
} else {
|
||||
// 4B to 4I
|
||||
assert(dst_vlen_in_bytes == 16 && dst_bt == T_INT, "unsupported");
|
||||
_xshll(is_unsigned, dst, T8H, src, T8B, 0);
|
||||
// 4B to 4S/4I, 8B to 8S
|
||||
assert(dst_vlen_in_bytes == 8 || dst_vlen_in_bytes == 16, "unsupported");
|
||||
assert(dst_bt == T_SHORT || dst_bt == T_INT, "unsupported");
|
||||
_xshll(is_unsigned, dst, T8H, src, T8B, 0);
|
||||
if (dst_bt == T_INT) {
|
||||
_xshll(is_unsigned, dst, T4S, dst, T4H, 0);
|
||||
}
|
||||
} else if (src_bt == T_SHORT) {
|
||||
// 4S to 4I
|
||||
assert(dst_vlen_in_bytes == 16 && dst_bt == T_INT, "unsupported");
|
||||
// 2S to 2I/2L, 4S to 4I
|
||||
assert(dst_vlen_in_bytes == 8 || dst_vlen_in_bytes == 16, "unsupported");
|
||||
assert(dst_bt == T_INT || dst_bt == T_LONG, "unsupported");
|
||||
_xshll(is_unsigned, dst, T4S, src, T4H, 0);
|
||||
if (dst_bt == T_LONG) {
|
||||
_xshll(is_unsigned, dst, T2D, dst, T2S, 0);
|
||||
}
|
||||
} else if (src_bt == T_INT) {
|
||||
// 2I to 2L
|
||||
assert(dst_vlen_in_bytes == 16 && dst_bt == T_LONG, "unsupported");
|
||||
@ -1810,18 +1812,21 @@ void C2_MacroAssembler::neon_vector_narrow(FloatRegister dst, BasicType dst_bt,
|
||||
assert(dst_bt == T_BYTE, "unsupported");
|
||||
xtn(dst, T8B, src, T8H);
|
||||
} else if (src_bt == T_INT) {
|
||||
// 4I to 4B/4S
|
||||
assert(src_vlen_in_bytes == 16, "unsupported");
|
||||
// 2I to 2S, 4I to 4B/4S
|
||||
assert(src_vlen_in_bytes == 8 || src_vlen_in_bytes == 16, "unsupported");
|
||||
assert(dst_bt == T_BYTE || dst_bt == T_SHORT, "unsupported");
|
||||
xtn(dst, T4H, src, T4S);
|
||||
if (dst_bt == T_BYTE) {
|
||||
xtn(dst, T8B, dst, T8H);
|
||||
}
|
||||
} else if (src_bt == T_LONG) {
|
||||
// 2L to 2I
|
||||
// 2L to 2S/2I
|
||||
assert(src_vlen_in_bytes == 16, "unsupported");
|
||||
assert(dst_bt == T_INT, "unsupported");
|
||||
assert(dst_bt == T_INT || dst_bt == T_SHORT, "unsupported");
|
||||
xtn(dst, T2S, src, T2D);
|
||||
if (dst_bt == T_SHORT) {
|
||||
xtn(dst, T4H, dst, T4S);
|
||||
}
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -597,8 +597,7 @@ public class TestDependencyOffsets {
|
||||
case "byte" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 4 )};
|
||||
case "char" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4, 4 ),
|
||||
new CPUMinVectorWidth(ASIMD, 8 )};
|
||||
case "short" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4, 4 ),
|
||||
new CPUMinVectorWidth(ASIMD, 8 )};
|
||||
case "short" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 4 )};
|
||||
case "int" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 8 )};
|
||||
case "long" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 16)};
|
||||
case "float" -> new CPUMinVectorWidth[]{new CPUMinVectorWidth(SSE4_ASIMD, 8 )};
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -649,18 +649,25 @@ public class TestCastMethods {
|
||||
makePair(SSPEC128, BSPEC64),
|
||||
makePair(SSPEC256, BSPEC128),
|
||||
makePair(SSPEC512, BSPEC256),
|
||||
makePair(SSPEC64, ISPEC64),
|
||||
makePair(SSPEC64, ISPEC128),
|
||||
makePair(SSPEC128, ISPEC256),
|
||||
makePair(SSPEC256, ISPEC512),
|
||||
makePair(SSPEC64, LSPEC128),
|
||||
makePair(SSPEC64, LSPEC256),
|
||||
makePair(SSPEC128, LSPEC128),
|
||||
makePair(SSPEC128, LSPEC512),
|
||||
makePair(SSPEC64, FSPEC64),
|
||||
makePair(SSPEC64, FSPEC128),
|
||||
makePair(SSPEC128, FSPEC256),
|
||||
makePair(SSPEC256, FSPEC512),
|
||||
makePair(SSPEC64, DSPEC128),
|
||||
makePair(SSPEC64, DSPEC256),
|
||||
makePair(SSPEC128, DSPEC128),
|
||||
makePair(SSPEC128, DSPEC512),
|
||||
makePair(ISPEC256, BSPEC64),
|
||||
makePair(ISPEC512, BSPEC128),
|
||||
makePair(ISPEC64, SSPEC64),
|
||||
makePair(ISPEC128, SSPEC64),
|
||||
makePair(ISPEC256, SSPEC128),
|
||||
makePair(ISPEC512, SSPEC256),
|
||||
@ -675,7 +682,9 @@ public class TestCastMethods {
|
||||
makePair(ISPEC128, DSPEC256),
|
||||
makePair(ISPEC256, DSPEC512),
|
||||
makePair(LSPEC512, BSPEC64),
|
||||
makePair(LSPEC128, SSPEC64),
|
||||
makePair(LSPEC256, SSPEC64),
|
||||
makePair(LSPEC128, SSPEC128),
|
||||
makePair(LSPEC512, SSPEC128),
|
||||
makePair(LSPEC128, ISPEC64),
|
||||
makePair(LSPEC256, ISPEC128),
|
||||
@ -688,6 +697,7 @@ public class TestCastMethods {
|
||||
makePair(LSPEC512, DSPEC512),
|
||||
makePair(FSPEC256, BSPEC64),
|
||||
makePair(FSPEC512, BSPEC128),
|
||||
makePair(FSPEC64, SSPEC64),
|
||||
makePair(FSPEC128, SSPEC64),
|
||||
makePair(FSPEC256, SSPEC128),
|
||||
makePair(FSPEC512, SSPEC256),
|
||||
@ -702,7 +712,9 @@ public class TestCastMethods {
|
||||
makePair(FSPEC128, DSPEC256),
|
||||
makePair(FSPEC256, DSPEC512),
|
||||
makePair(DSPEC512, BSPEC64),
|
||||
makePair(DSPEC128, SSPEC64),
|
||||
makePair(DSPEC256, SSPEC64),
|
||||
makePair(DSPEC128, SSPEC128),
|
||||
makePair(DSPEC512, SSPEC128),
|
||||
makePair(DSPEC128, ISPEC64),
|
||||
makePair(DSPEC256, ISPEC128),
|
||||
@ -751,14 +763,17 @@ public class TestCastMethods {
|
||||
makePair(BSPEC512, LSPEC256, true),
|
||||
makePair(BSPEC512, LSPEC512, true),
|
||||
|
||||
makePair(SSPEC64, ISPEC64, true),
|
||||
makePair(SSPEC64, ISPEC128, true),
|
||||
makePair(SSPEC64, ISPEC256, true),
|
||||
makePair(SSPEC64, ISPEC512, true),
|
||||
makePair(SSPEC64, LSPEC128, true),
|
||||
makePair(SSPEC64, LSPEC256, true),
|
||||
makePair(SSPEC64, LSPEC512, true),
|
||||
makePair(SSPEC128, ISPEC128, true),
|
||||
makePair(SSPEC128, ISPEC256, true),
|
||||
makePair(SSPEC128, ISPEC512, true),
|
||||
makePair(SSPEC128, LSPEC128, true),
|
||||
makePair(SSPEC128, LSPEC256, true),
|
||||
makePair(SSPEC128, LSPEC512, true),
|
||||
makePair(SSPEC256, ISPEC128, true),
|
||||
@ -789,23 +804,35 @@ public class TestCastMethods {
|
||||
makePair(BSPEC64, FSPEC128),
|
||||
makePair(SSPEC64, BSPEC64),
|
||||
makePair(SSPEC128, BSPEC64),
|
||||
makePair(SSPEC64, ISPEC64),
|
||||
makePair(SSPEC64, ISPEC128),
|
||||
makePair(SSPEC64, LSPEC128),
|
||||
makePair(SSPEC128, LSPEC128),
|
||||
makePair(SSPEC64, FSPEC64),
|
||||
makePair(SSPEC64, FSPEC128),
|
||||
makePair(SSPEC64, DSPEC128),
|
||||
makePair(SSPEC128, DSPEC128),
|
||||
makePair(ISPEC128, BSPEC64),
|
||||
makePair(ISPEC128, SSPEC64),
|
||||
makePair(ISPEC64, LSPEC128),
|
||||
makePair(ISPEC64, SSPEC64),
|
||||
makePair(ISPEC64, LSPEC128),
|
||||
makePair(ISPEC64, FSPEC64),
|
||||
makePair(ISPEC128, FSPEC128),
|
||||
makePair(ISPEC64, DSPEC128),
|
||||
makePair(LSPEC128, SSPEC64),
|
||||
makePair(LSPEC128, SSPEC128),
|
||||
makePair(LSPEC128, ISPEC64),
|
||||
makePair(LSPEC128, FSPEC64),
|
||||
makePair(LSPEC128, DSPEC128),
|
||||
makePair(FSPEC128, BSPEC64),
|
||||
makePair(FSPEC64, SSPEC64),
|
||||
makePair(FSPEC128, SSPEC64),
|
||||
makePair(FSPEC64, ISPEC64),
|
||||
makePair(FSPEC128, ISPEC128),
|
||||
makePair(FSPEC64, LSPEC128),
|
||||
makePair(FSPEC64, DSPEC128),
|
||||
makePair(DSPEC128, SSPEC64),
|
||||
makePair(DSPEC128, SSPEC128),
|
||||
makePair(DSPEC128, ISPEC64),
|
||||
makePair(DSPEC128, LSPEC128),
|
||||
makePair(DSPEC128, FSPEC64),
|
||||
@ -816,8 +843,11 @@ public class TestCastMethods {
|
||||
makePair(BSPEC128, SSPEC64, true),
|
||||
makePair(BSPEC128, SSPEC128, true),
|
||||
makePair(BSPEC128, ISPEC128, true),
|
||||
makePair(SSPEC64, ISPEC64, true),
|
||||
makePair(SSPEC64, ISPEC128, true),
|
||||
makePair(SSPEC64, LSPEC128, true),
|
||||
makePair(SSPEC128, ISPEC128, true),
|
||||
makePair(SSPEC128, LSPEC128, true),
|
||||
makePair(ISPEC64, LSPEC128, true)
|
||||
);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -84,10 +84,13 @@ public class TestFloatConversionsVector {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_CAST_F2HF, IRNode.VECTOR_SIZE_2, "> 0"},
|
||||
applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
||||
applyIfCPUFeature = {"asimd", "true"})
|
||||
public void test_float_float16_short_vector(short[] sout, float[] finp) {
|
||||
for (int i = 0; i < finp.length; i+= 4) {
|
||||
sout[i+0] = Float.floatToFloat16(finp[i+0]);
|
||||
sout[i+1] = Float.floatToFloat16(finp[i+1]);
|
||||
for (int i = 0; i < finp.length; i += 4) {
|
||||
sout[i] = Float.floatToFloat16(finp[i]);
|
||||
sout[i + 1] = Float.floatToFloat16(finp[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,8 +127,9 @@ public class TestFloatConversionsVector {
|
||||
}
|
||||
|
||||
// Verifying the result
|
||||
for (int i = 0; i < ARRLEN; i++) {
|
||||
for (int i = 0; i < ARRLEN; i += 4) {
|
||||
Asserts.assertEquals(Float.floatToFloat16(finp[i]), sout[i]);
|
||||
Asserts.assertEquals(Float.floatToFloat16(finp[i + 1]), sout[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,7 +156,19 @@ public class TestFloatConversionsVector {
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"test_float16_float", "test_float16_float_strided"}, mode = RunMode.STANDALONE)
|
||||
@Test
|
||||
@IR(counts = {IRNode.VECTOR_CAST_HF2F, IRNode.VECTOR_SIZE_2, "> 0"},
|
||||
applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
|
||||
applyIfCPUFeature = {"asimd", "true"})
|
||||
public void test_float16_float_short_vector(float[] fout, short[] sinp) {
|
||||
for (int i = 0; i < sinp.length; i += 4) {
|
||||
fout[i] = Float.float16ToFloat(sinp[i]);
|
||||
fout[i + 1] = Float.float16ToFloat(sinp[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"test_float16_float", "test_float16_float_strided",
|
||||
"test_float16_float_short_vector"}, mode = RunMode.STANDALONE)
|
||||
public void kernel_test_float16_float() {
|
||||
sinp = new short[ARRLEN];
|
||||
fout = new float[ARRLEN];
|
||||
@ -178,5 +194,15 @@ public class TestFloatConversionsVector {
|
||||
for (int i = 0; i < ARRLEN/2; i++) {
|
||||
Asserts.assertEquals(Float.float16ToFloat(sinp[i*2]), fout[i*2]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < ITERS; i++) {
|
||||
test_float16_float_short_vector(fout, sinp);
|
||||
}
|
||||
|
||||
// Verifying the result
|
||||
for (int i = 0; i < ARRLEN; i += 4) {
|
||||
Asserts.assertEquals(Float.float16ToFloat(sinp[i]), fout[i]);
|
||||
Asserts.assertEquals(Float.float16ToFloat(sinp[i + 1]), fout[i + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2023, Arm Limited. All rights reserved.
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -252,9 +252,12 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"sve", "true", "avx2", "true", "rvv", "true"},
|
||||
@IR(applyIfCPUFeature = {"rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
counts = {IRNode.VECTOR_CAST_S2D, IRNode.VECTOR_SIZE + "min(max_short, max_double)", ">0"})
|
||||
@IR(applyIfCPUFeatureOr = {"asimd", "true", "avx", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"},
|
||||
counts = {IRNode.VECTOR_CAST_S2D, IRNode.VECTOR_SIZE + "min(max_short, max_double)", ">0"})
|
||||
public double[] convertShortToDouble() {
|
||||
double[] res = new double[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
@ -374,9 +377,12 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true", "rvv", "true"},
|
||||
@IR(applyIfCPUFeature = {"rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_short)", ">0"})
|
||||
@IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"},
|
||||
counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_short)", ">0"})
|
||||
public short[] convertDoubleToShort() {
|
||||
short[] res = new short[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
@ -386,9 +392,12 @@ public class ArrayTypeConvertTest extends VectorizationTestRunner {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true", "rvv", "true"},
|
||||
@IR(applyIfCPUFeature = {"rvv", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=32"},
|
||||
counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_char)", ">0"})
|
||||
@IR(applyIfCPUFeatureOr = {"sve", "true", "avx", "true"},
|
||||
applyIf = {"MaxVectorSize", ">=16"},
|
||||
counts = {IRNode.VECTOR_CAST_D2S, IRNode.VECTOR_SIZE + "min(max_double, max_char)", ">0"})
|
||||
public char[] convertDoubleToChar() {
|
||||
char[] res = new char[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -120,6 +120,18 @@ public class VectorFPtoIntCastOperations {
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void microFloat64ToShort64() {
|
||||
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_64;
|
||||
VectorSpecies<Short> OSPECIES = ShortVector.SPECIES_64;
|
||||
for (int i = 0, j = 0; i < ISPECIES.loopBound(SIZE / 2); i += ISPECIES.length(), j += OSPECIES.length()) {
|
||||
FloatVector.fromArray(ISPECIES, float_arr, i)
|
||||
.convertShape(VectorOperators.F2S, OSPECIES, 0)
|
||||
.reinterpretAsShorts()
|
||||
.intoArray(short_res, j);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void microFloat128ToShort128() {
|
||||
VectorSpecies<Float> ISPECIES = FloatVector.SPECIES_128;
|
||||
|
||||
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Random;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Warmup(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS)
|
||||
@Measurement(iterations = 4, time = 2, timeUnit = TimeUnit.SECONDS)
|
||||
@Fork(value = 3)
|
||||
public class VectorTwoShorts {
|
||||
@Param({"64", "128", "512", "1024"})
|
||||
public int LEN;
|
||||
|
||||
private short[] sA;
|
||||
private short[] sB;
|
||||
private short[] sC;
|
||||
|
||||
@Param("0")
|
||||
private int seed;
|
||||
private Random r = new Random(seed);
|
||||
|
||||
@Setup
|
||||
public void init() {
|
||||
sA = new short[LEN];
|
||||
sB = new short[LEN];
|
||||
sC = new short[LEN];
|
||||
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
sA[i] = (short) r.nextInt();
|
||||
sB[i] = (short) r.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void addVec2S() {
|
||||
for (int i = 0; i < LEN - 3; i++) {
|
||||
sC[i + 3] = (short) (sA[i] + sB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void mulVec2S() {
|
||||
for (int i = 0; i < LEN - 3; i++) {
|
||||
sC[i + 3] = (short) (sA[i] * sB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void reverseBytesVec2S() {
|
||||
for (int i = 0; i < LEN - 3; i++) {
|
||||
sC[i + 3] = (short) Short.reverseBytes(sA[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user