8370691: Add new Float16Vector type and enable intrinsification of vector operations supported by auto-vectorizer

Co-authored-by: Bhavana Kilambi <bkilambi@openjdk.org>
Reviewed-by: psandoz, epeter, xgong, sherman
This commit is contained in:
Jatin Bhateja 2026-06-11 03:03:04 +00:00
parent dc4bb5acbe
commit 90dc4208f8
49 changed files with 46460 additions and 254 deletions

View File

@ -8294,6 +8294,34 @@ instruct castII_checked(iRegI dst, rFlagsReg cr)
ins_pipe(pipe_slow);
%}
// The unchecked and checked variants for CastII below both use iRegINoSp for src and dst
// as some consumers of CastII node like ConvHF2F forbid the stack pointer as an input
// (please see convHF2F_reg_reg rule which requires input to be in an iRegINoSp register).
instruct castII_nosp(iRegINoSp dst)
%{
predicate(VerifyConstraintCasts == 0);
match(Set dst (CastII dst));
size(0);
format %{ "# castII of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castII_checked_nosp(iRegINoSp dst, rFlagsReg cr)
%{
predicate(VerifyConstraintCasts > 0);
match(Set dst (CastII dst));
effect(KILL cr);
format %{ "# castII_checked of $dst" %}
ins_encode %{
__ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register, rscratch1);
%}
ins_pipe(pipe_slow);
%}
instruct castLL(iRegL dst)
%{
predicate(VerifyConstraintCasts == 0);

View File

@ -241,9 +241,9 @@ source %{
return false;
}
break;
// At the time of writing this, the Vector API has no half-float (FP16) species.
// Consequently, AddReductionVHF and MulReductionVHF are only produced by the
// auto-vectorizer, which requires strictly ordered semantics for FP reductions.
// AddReductionVHF and MulReductionVHF are currently only produced by the
// auto-vectorizer (the Vector API does not yet intrinsify Float16 reductions),
// which requires strictly ordered semantics for FP reductions.
//
// There is no direct Neon instruction that performs strictly ordered floating
// point add reduction. Hence, on Neon only machines, the add reduction operation
@ -354,9 +354,9 @@ source %{
opcode = Op_StoreVectorScatterMasked;
break;
// Currently, the masked versions of the following 8 Float16 operations are disabled.
// When the support for Float16 vector classes is added in VectorAPI and the masked
// Float16 IR can be generated, these masked operations will be enabled and relevant
// backend support added.
// The Vector API does not yet emit predicated Float16 IR. When such masked IR can be
// generated, these masked operations will be enabled and the relevant backend support
// added.
case Op_AddVHF:
case Op_SubVHF:
case Op_MulVHF:

View File

@ -298,7 +298,7 @@ static bool is_klass_initialized(const TypeInstPtr* vec_klass) {
}
static bool is_primitive_lane_type(VectorSupport::LaneType laneType) {
return laneType >= VectorSupport::LT_FLOAT && laneType <= VectorSupport::LT_LONG;
return laneType >= VectorSupport::LT_FLOAT && laneType <= VectorSupport::LT_FLOAT16;
}
static BasicType get_vector_primitive_lane_type(VectorSupport::LaneType lanetype) {
@ -310,10 +310,15 @@ static BasicType get_vector_primitive_lane_type(VectorSupport::LaneType lanetype
case VectorSupport::LaneType::LT_INT: return T_INT;
case VectorSupport::LaneType::LT_SHORT: return T_SHORT;
case VectorSupport::LaneType::LT_BYTE: return T_BYTE;
case VectorSupport::LaneType::LT_FLOAT16: return T_SHORT;
}
return T_ILLEGAL;
}
static bool is_supported_lane_type(VectorSupport::LaneType laneType) {
return laneType >= VectorSupport::LT_FLOAT && laneType <= VectorSupport::LT_LONG;
}
//
// <V extends Vector<E>,
// M extends VectorMask<E>,
@ -557,6 +562,11 @@ bool LibraryCallKit::inline_vector_call(int arity) {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(vector_klass)) {
log_if_needed(" ** klass argument not initialized");
@ -651,6 +661,11 @@ bool LibraryCallKit::inline_vector_mask_operation() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
int num_elem = vlen->get_con();
BasicType elem_bt = get_vector_primitive_lane_type(vltype);
int mopc = VectorSupport::vop2ideal(oper->get_con(), vltype);
@ -721,6 +736,12 @@ bool LibraryCallKit::inline_vector_frombits_coerced() {
return false;
}
int bcast_mode = mode->get_con();
if (!is_supported_lane_type(vltype) && bcast_mode != VectorSupport::MODE_BROADCAST) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false; // should be primitive type
}
if (!is_klass_initialized(vector_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;
@ -732,7 +753,6 @@ bool LibraryCallKit::inline_vector_frombits_coerced() {
const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
bool is_mask = is_vector_mask(vbox_klass);
int bcast_mode = mode->get_con();
VectorMaskUseType checkFlags = (VectorMaskUseType)(is_mask ? VecMaskUseAll : VecMaskNotUsed);
int opc = bcast_mode == VectorSupport::MODE_BITS_COERCED_LONG_TO_MASK ? Op_VectorLongToMask : Op_Replicate;
@ -1296,6 +1316,11 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
BasicType elem_bt = get_vector_primitive_lane_type(vltype);
int num_elem = vlen->get_con();
int idx_num_elem = idx_vlen->get_con();
@ -1479,6 +1504,10 @@ bool LibraryCallKit::inline_vector_reduction() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
BasicType elem_bt = get_vector_primitive_lane_type(vltype);
const Type* vmask_type = gvn().type(argument(6));
bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
@ -1624,6 +1653,11 @@ bool LibraryCallKit::inline_vector_test() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(vector_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;
@ -1773,6 +1807,11 @@ bool LibraryCallKit::inline_vector_compare() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(vector_klass) || !is_klass_initialized(mask_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;
@ -1893,6 +1932,10 @@ bool LibraryCallKit::inline_vector_rearrange() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
BasicType elem_bt = get_vector_primitive_lane_type(vltype);
BasicType shuffle_bt = elem_bt;
if (shuffle_bt == T_FLOAT) {
@ -2029,6 +2072,10 @@ bool LibraryCallKit::inline_vector_select_from() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
int num_elem = vlen->get_con();
BasicType elem_bt = get_vector_primitive_lane_type(vltype);
if (!is_power_of_2(num_elem)) {
@ -2193,6 +2240,11 @@ bool LibraryCallKit::inline_vector_broadcast_int() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
const Type* vmask_type = gvn().type(argument(7));
bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
if (is_masked_op) {
@ -2369,6 +2421,16 @@ bool LibraryCallKit::inline_vector_convert() {
log_if_needed(" ** not a primitive to lt=%s", VectorSupport::lanetype2name(vltype_to));
return false; // should be primitive type
}
if (!is_supported_lane_type(vltype_from)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype_from));
return false;
}
if (!is_supported_lane_type(vltype_to)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype_to));
return false;
}
BasicType elem_bt_from = get_vector_primitive_lane_type(vltype_from);
BasicType elem_bt_to = get_vector_primitive_lane_type(vltype_to);
@ -2550,6 +2612,11 @@ bool LibraryCallKit::inline_vector_insert() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(vector_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;
@ -2638,6 +2705,11 @@ bool LibraryCallKit::inline_vector_extract() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(vector_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;
@ -2822,6 +2894,11 @@ bool LibraryCallKit::inline_vector_select_from_two_vectors() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(vector_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;
@ -2960,6 +3037,11 @@ bool LibraryCallKit::inline_vector_compress_expand() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
int num_elem = vlen->get_con();
BasicType elem_bt = get_vector_primitive_lane_type(vltype);
int opc = VectorSupport::vop2ideal(opr->get_con(), vltype);
@ -3035,6 +3117,11 @@ bool LibraryCallKit::inline_index_vector() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(vector_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;
@ -3170,6 +3257,11 @@ bool LibraryCallKit::inline_index_partially_in_upper_range() {
return false;
}
if (!is_supported_lane_type(vltype)) {
log_if_needed(" ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
return false;
}
if (!is_klass_initialized(mask_klass)) {
log_if_needed(" ** klass argument not initialized");
return false;

View File

@ -206,9 +206,10 @@ const char* VectorSupport::lanetype2name(LaneType lane_type) {
"byte",
"short",
"int",
"long"
"long",
"float16",
};
if (lane_type >= LT_FLOAT && lane_type <= LT_LONG) {
if (lane_type >= LT_FLOAT && lane_type <= LT_FLOAT16) {
return lanetype2name[lane_type];
}
assert(false, "unknown lane type: %d", (int)lane_type);
@ -224,6 +225,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: return Op_AddI;
case LT_LONG: return Op_AddL;
case LT_FLOAT16: return Op_AddHF;
case LT_FLOAT: return Op_AddF;
case LT_DOUBLE: return Op_AddD;
default: return 0;
@ -236,6 +238,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: return Op_SubI;
case LT_LONG: return Op_SubL;
case LT_FLOAT16: return Op_SubHF;
case LT_FLOAT: return Op_SubF;
case LT_DOUBLE: return Op_SubD;
default: return 0;
@ -248,6 +251,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: return Op_MulI;
case LT_LONG: return Op_MulL;
case LT_FLOAT16: return Op_MulHF;
case LT_FLOAT: return Op_MulF;
case LT_DOUBLE: return Op_MulD;
default: return 0;
@ -260,6 +264,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: return Op_DivI;
case LT_LONG: return Op_DivL;
case LT_FLOAT16: return Op_DivHF;
case LT_FLOAT: return Op_DivF;
case LT_DOUBLE: return Op_DivD;
default: return 0;
@ -272,6 +277,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT:
case LT_INT: return Op_MinI;
case LT_LONG: return Op_MinL;
case LT_FLOAT16: return Op_MinHF;
case LT_FLOAT: return Op_MinF;
case LT_DOUBLE: return Op_MinD;
default: return 0;
@ -284,6 +290,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT:
case LT_INT: return Op_MaxI;
case LT_LONG: return Op_MaxL;
case LT_FLOAT16: return Op_MaxHF;
case LT_FLOAT: return Op_MaxF;
case LT_DOUBLE: return Op_MaxD;
default: return 0;
@ -316,6 +323,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: return Op_AbsI;
case LT_LONG: return Op_AbsL;
case LT_FLOAT16: return 0;
case LT_FLOAT: return Op_AbsF;
case LT_DOUBLE: return Op_AbsD;
default: return 0;
@ -328,6 +336,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: return Op_NegI;
case LT_LONG: return Op_NegL;
case LT_FLOAT16: return 0;
case LT_FLOAT: return Op_NegF;
case LT_DOUBLE: return Op_NegD;
default: return 0;
@ -366,6 +375,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
}
case VECTOR_OP_SQRT: {
switch (lt) {
case LT_FLOAT16: return Op_SqrtHF;
case LT_FLOAT: return Op_SqrtF;
case LT_DOUBLE: return Op_SqrtD;
default: return 0;
@ -374,6 +384,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
}
case VECTOR_OP_FMA: {
switch (lt) {
case LT_FLOAT16: return Op_FmaHF;
case LT_FLOAT: return Op_FmaF;
case LT_DOUBLE: return Op_FmaD;
default: return 0;
@ -436,6 +447,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: // fall-through
case LT_LONG: // fall-through
case LT_FLOAT16: // fall-through
case LT_FLOAT: // fall-through
case LT_DOUBLE: return Op_VectorMaskLastTrue;
default: return 0;
@ -448,6 +460,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: // fall-through
case LT_LONG: // fall-through
case LT_FLOAT16: // fall-through
case LT_FLOAT: // fall-through
case LT_DOUBLE: return Op_VectorMaskFirstTrue;
default: return 0;
@ -460,6 +473,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: // fall-through
case LT_LONG: // fall-through
case LT_FLOAT16: // fall-through
case LT_FLOAT: // fall-through
case LT_DOUBLE: return Op_VectorMaskTrueCount;
default: return 0;
@ -472,6 +486,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: // fall-through
case LT_LONG: // fall-through
case LT_FLOAT16: // fall-through
case LT_FLOAT: // fall-through
case LT_DOUBLE: return Op_VectorMaskToLong;
default: return 0;
@ -484,6 +499,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: // fall-through
case LT_LONG: // fall-through
case LT_FLOAT16: // fall-through
case LT_FLOAT: // fall-through
case LT_DOUBLE: return Op_ExpandV;
default: return 0;
@ -496,6 +512,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: // fall-through
case LT_LONG: // fall-through
case LT_FLOAT16: // fall-through
case LT_FLOAT: // fall-through
case LT_DOUBLE: return Op_CompressV;
default: return 0;
@ -508,6 +525,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
case LT_SHORT: // fall-through
case LT_INT: // fall-through
case LT_LONG: // fall-through
case LT_FLOAT16: // fall-through
case LT_FLOAT: // fall-through
case LT_DOUBLE: return Op_CompressM;
default: return 0;

View File

@ -144,7 +144,8 @@ class VectorSupport : AllStatic {
LT_BYTE = 2,
LT_SHORT = 3,
LT_INT = 4,
LT_LONG = 5
LT_LONG = 5,
LT_FLOAT16 = 6
};
enum {

View File

@ -156,7 +156,8 @@ public class VectorSupport {
LT_BYTE = 2,
LT_SHORT = 3,
LT_INT = 4,
LT_LONG = 5;
LT_LONG = 5,
LT_FLOAT16 = 6;
/* ============================================================================ */

View File

@ -36,7 +36,9 @@ abstract sealed class AbstractMask<E> extends VectorMask<E>
FloatVector64.FloatMask64, FloatVector128.FloatMask128, FloatVector256.FloatMask256, FloatVector512.FloatMask512, FloatVectorMax.FloatMaskMax,
IntVector64.IntMask64, IntVector128.IntMask128, IntVector256.IntMask256, IntVector512.IntMask512, IntVectorMax.IntMaskMax,
LongVector64.LongMask64, LongVector128.LongMask128, LongVector256.LongMask256, LongVector512.LongMask512, LongVectorMax.LongMaskMax,
ShortVector64.ShortMask64, ShortVector128.ShortMask128, ShortVector256.ShortMask256, ShortVector512.ShortMask512, ShortVectorMax.ShortMaskMax {
ShortVector64.ShortMask64, ShortVector128.ShortMask128, ShortVector256.ShortMask256, ShortVector512.ShortMask512, ShortVectorMax.ShortMaskMax,
Float16Vector64.Float16Mask64, Float16Vector128.Float16Mask128, Float16Vector256.Float16Mask256, Float16Vector512.Float16Mask512,
Float16VectorMax.Float16MaskMax {
AbstractMask(boolean[] bits) {
super(bits);
}

View File

@ -35,7 +35,8 @@ abstract sealed class AbstractShuffle<E> extends VectorShuffle<E>
FloatVector64.FloatShuffle64, FloatVector128.FloatShuffle128, FloatVector256.FloatShuffle256, FloatVector512.FloatShuffle512, FloatVectorMax.FloatShuffleMax,
IntVector64.IntShuffle64, IntVector128.IntShuffle128, IntVector256.IntShuffle256, IntVector512.IntShuffle512, IntVectorMax.IntShuffleMax,
LongVector64.LongShuffle64, LongVector128.LongShuffle128, LongVector256.LongShuffle256, LongVector512.LongShuffle512, LongVectorMax.LongShuffleMax,
ShortVector64.ShortShuffle64, ShortVector128.ShortShuffle128, ShortVector256.ShortShuffle256, ShortVector512.ShortShuffle512, ShortVectorMax.ShortShuffleMax {
ShortVector64.ShortShuffle64, ShortVector128.ShortShuffle128, ShortVector256.ShortShuffle256, ShortVector512.ShortShuffle512, ShortVectorMax.ShortShuffleMax,
Float16Vector64.Float16Shuffle64, Float16Vector128.Float16Shuffle128, Float16Vector256.Float16Shuffle256, Float16Vector512.Float16Shuffle512, Float16VectorMax.Float16ShuffleMax {
static final IntUnaryOperator IDENTITY = i -> i;
// Internal representation allows for a maximum index of E.MAX_VALUE - 1

View File

@ -37,7 +37,7 @@ import jdk.internal.vm.annotation.TrustFinalFields;
abstract sealed class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.VectorSpecies<E>
implements VectorSpecies<E>
permits ByteVector.ByteSpecies, DoubleVector.DoubleSpecies, FloatVector.FloatSpecies,
IntVector.IntSpecies, LongVector.LongSpecies, ShortVector.ShortSpecies {
IntVector.IntSpecies, LongVector.LongSpecies, ShortVector.ShortSpecies, Float16Vector.Float16Species {
final VectorShape vectorShape;
final LaneType laneType;
final int laneCount;
@ -424,14 +424,21 @@ abstract sealed class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSu
Object ia = Array.newInstance(carrierType(), laneCount);
assert(ia.getClass() == laneType.arrayType);
checkValue(laneCount-1); // worst case
for (int i = 0; i < laneCount; i++) {
if ((byte)i == i)
Array.setByte(ia, i, (byte)i);
else if ((short)i == i)
Array.setShort(ia, i, (short)i);
else
Array.setInt(ia, i, i);
assert(Array.getDouble(ia, i) == i);
if (elementType() == Float16.class) {
for (int i = 0; i < laneCount; i++) {
Array.setShort(ia, i, Float.floatToFloat16((float)i));
assert(Float16.shortBitsToFloat16(Array.getShort(ia, i)).intValue() == i);
}
} else {
for (int i = 0; i < laneCount; i++) {
if ((byte)i == i)
Array.setByte(ia, i, (byte)i);
else if ((short)i == i)
Array.setShort(ia, i, (short)i);
else
Array.setInt(ia, i, i);
assert(Array.getDouble(ia, i) == i);
}
}
return ia;
}
@ -629,6 +636,8 @@ abstract sealed class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSu
s = IntVector.species(shape); break;
case LaneType.SK_LONG:
s = LongVector.species(shape); break;
case LaneType.SK_FLOAT16:
s = Float16Vector.species(shape); break;
}
if (s == null) {
// NOTE: The result of this method is guaranteed to be

View File

@ -35,7 +35,7 @@ import static jdk.incubator.vector.VectorOperators.*;
@SuppressWarnings("cast")
abstract sealed class AbstractVector<E> extends Vector<E>
permits ByteVector, DoubleVector, FloatVector, IntVector, LongVector, ShortVector {
permits ByteVector, DoubleVector, FloatVector, IntVector, LongVector, ShortVector, Float16Vector {
/**
* The order of vector bytes when stored in natural,
* array elements of the same lane type.
@ -331,6 +331,15 @@ abstract sealed class AbstractVector<E> extends Vector<E>
return (DoubleVector) asVectorRaw(LaneType.DOUBLE);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@Override
@ForceInline
public Float16Vector reinterpretAsFloat16s() {
return (Float16Vector) asVectorRaw(LaneType.FLOAT16);
}
/**
* {@inheritDoc} <!--workaround-->
*/
@ -682,6 +691,8 @@ abstract sealed class AbstractVector<E> extends Vector<E>
return FloatVector.fromMemorySegment(rsp.check(float.class), ms, 0, bo, m.check(float.class)).check0(rsp);
case LaneType.SK_DOUBLE:
return DoubleVector.fromMemorySegment(rsp.check(double.class), ms, 0, bo, m.check(double.class)).check0(rsp);
case LaneType.SK_FLOAT16:
return Float16Vector.fromMemorySegment(rsp.check(Float16.class), ms, 0, bo, m.check(Float16.class)).check0(rsp);
default:
throw new AssertionError(rsp.toString());
}
@ -744,6 +755,13 @@ abstract sealed class AbstractVector<E> extends Vector<E>
}
return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
}
case LaneType.SK_FLOAT16: {
short[] a = new short[rlength];
for (int i = 0; i < limit; i++) {
a[i] = Float16.float16ToRawShortBits(Float16.valueOf((float) lanes[i]));
}
return Float16Vector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp);
}
default: break;
}
} else {
@ -794,6 +812,13 @@ abstract sealed class AbstractVector<E> extends Vector<E>
}
return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
}
case LaneType.SK_FLOAT16: {
short[] a = new short[rlength];
for (int i = 0; i < limit; i++) {
a[i] = Float16.float16ToRawShortBits(Float16.valueOf((float) lanes[i]));
}
return Float16Vector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp);
}
default: break;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -40,7 +40,8 @@ enum LaneType {
BYTE(byte.class, Byte.class, byte[].class, 'I', -1, Byte.SIZE, byte.class),
SHORT(short.class, Short.class, short[].class, 'I', -1, Short.SIZE, short.class),
INT(int.class, Integer.class, int[].class, 'I', -1, Integer.SIZE, int.class),
LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, long.class);
LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, long.class),
FLOAT16(Float16.class, Short.class, short[].class, 'F', 11, Float16.SIZE, short.class);
LaneType(Class<?> elementType,
Class<?> genericElementType,
@ -66,7 +67,7 @@ enum LaneType {
// printName. If we do unsigned or vector or bit lane types,
// report that condition also.
this.typeChar = genericElementType.getSimpleName().charAt(0);
assert("FDBSIL".indexOf(typeChar) == ordinal()) : this;
assert("FDBSILS".charAt(ordinal()) == typeChar) : this;
this.carrierType = carrierType;
assert(carrierType.isPrimitive());
@ -181,7 +182,8 @@ enum LaneType {
SK_SHORT = 4,
SK_INT = 5,
SK_LONG = 6,
SK_LIMIT = 7;
SK_FLOAT16 = 7,
SK_LIMIT = 8;
/*package-private*/
@ForceInline
@ -278,5 +280,6 @@ enum LaneType {
assert(ofLaneTypeOrdinal(LT_SHORT) == SHORT);
assert(ofLaneTypeOrdinal(LT_INT) == INT);
assert(ofLaneTypeOrdinal(LT_LONG) == LONG);
assert(ofLaneTypeOrdinal(LT_FLOAT16) == FLOAT16);
}
}

View File

@ -4149,22 +4149,14 @@ public abstract sealed class ShortVector extends AbstractVector<Short>
/**
* {@inheritDoc} <!--workaround-->
*
* @implNote This method always throws
* {@code UnsupportedOperationException}, because there is no floating
* point type of the same size as {@code short}. The return type
* of this method is arbitrarily designated as
* {@code Vector<?>}. Future versions of this API may change the return
* type if additional floating point types become available.
*/
@ForceInline
@Override
public final
Vector<?>
Float16Vector
viewAsFloatingLanes() {
LaneType flt = LaneType.SHORT.asFloating();
// asFloating() will throw UnsupportedOperationException for the unsupported type short
throw new AssertionError("Cannot reach here");
return (Float16Vector) asVectorRaw(flt);
}
// ================================================

View File

@ -200,11 +200,11 @@ import java.util.Arrays;
* element type (such as access to element values in lanes, logical operations
* on values of integral elements types, or transcendental operations on values
* of floating point element types).
* There are six abstract subclasses of Vector corresponding to the supported set
* There are seven abstract subclasses of Vector corresponding to the supported set
* of element types, {@link ByteVector}, {@link ShortVector},
* {@link IntVector}, {@link LongVector}, {@link FloatVector}, and
* {@link DoubleVector}. Along with type-specific operations these classes
* support creation of vector values (instances of Vector).
* {@link IntVector}, {@link LongVector}, {@link FloatVector},
* {@link DoubleVector}, and {@link Float16Vector}. Along with type-specific
* operations these classes support creation of vector values (instances of Vector).
* They expose static constants corresponding to the supported species,
* and static methods on these types generally take a species as a parameter.
* For example,
@ -3826,6 +3826,19 @@ public abstract sealed class Vector<E> extends jdk.internal.vm.vector.VectorSupp
*/
public abstract LongVector reinterpretAsLongs();
/**
* Reinterprets this vector as a vector of the same shape
* and contents but a lane type of {@code Float16},
* where the lanes are assembled from successive bytes
* according to little-endian order.
* It is a convenience method for the expression
* {@code reinterpretShape(species().withLanes(Float16.class))}.
* It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
*
* @return a {@code Float16Vector} with the same shape and information content
*/
public abstract Float16Vector reinterpretAsFloat16s();
/**
* Reinterprets this vector as a vector of the same shape
* and contents but a lane type of {@code float},

View File

@ -64,8 +64,9 @@ import static jdk.internal.vm.vector.Utils.isNonCapturingLambda;
*
* <li>{@code bits(x)} &mdash; a function call which produces the
* underlying bits of the value {@code x}. If {@code x} is a floating
* point value, this is either {@code doubleToLongBits(x)} or
* {@code floatToIntBits(x)}. Otherwise, the value is just {@code x}.
* point value, this is {@code doubleToLongBits(x)},
* {@code floatToIntBits(x)}, or {@code float16ToShortBits(x)}.
* Otherwise, the value is just {@code x}.
*
* <li>{@code ESIZE} &mdash; the size in bytes of the operand type
*
@ -73,6 +74,26 @@ import static jdk.internal.vm.vector.Utils.isNonCapturingLambda;
*
* <li>{@code intVal}, {@code byteVal}, etc. &mdash; the operand of a
* conversion, with the indicated type
*
* <li id="type_letters">Single-letter type codes used in the names of
* {@linkplain Conversion conversion} operator tokens (for example
* {@link #B2D}, {@link #F2H}, {@link #H2F}, {@link #REINTERPRET_F2I},
* {@link #ZERO_EXTEND_B2L}) abbreviate lane types as follows:
* <table class="striped">
* <caption style="display:none">Lane type letter codes</caption>
* <thead>
* <tr><th scope="col">Letter</th><th scope="col">Lane type</th></tr>
* </thead>
* <tbody>
* <tr><th scope="row">{@code B}</th><td>{@code byte}</td></tr>
* <tr><th scope="row">{@code S}</th><td>{@code short}</td></tr>
* <tr><th scope="row">{@code I}</th><td>{@code int}</td></tr>
* <tr><th scope="row">{@code L}</th><td>{@code long}</td></tr>
* <tr><th scope="row">{@code F}</th><td>{@code float}</td></tr>
* <tr><th scope="row">{@code D}</th><td>{@code double}</td></tr>
* <tr><th scope="row">{@code H}</th><td>{@link Float16} ("half")</td></tr>
* </tbody>
* </table>
* </ul>
*
* <h2>Operations on floating point vectors</h2>
@ -307,13 +328,13 @@ public final class VectorOperators {
*/
public sealed interface Conversion<E,F> extends Operator {
/**
* The domain of this conversion, a primitive type.
* The domain of this conversion, a supported lane type.
* @return the domain of this conversion
*/
Class<E> domainType();
/**
* The range of this conversion, a primitive type.
* The range of this conversion, a supported lane type.
* @return the range of this conversion
*/
@Override
@ -657,6 +678,8 @@ public final class VectorOperators {
public static final Conversion<Byte,Long> B2L = convert("B2L", 'C', byte.class, long.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code byteVal} to {@code (short)byteVal}. */
public static final Conversion<Byte,Short> B2S = convert("B2S", 'C', byte.class, short.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code byteVal} to {@code (Float16)byteVal}. */
public static final Conversion<Byte,Float16> B2H = convert("B2H", 'C', byte.class, Float16.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code doubleVal} to {@code (byte)doubleVal}. */
public static final Conversion<Double,Byte> D2B = convert("D2B", 'C', double.class, byte.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code doubleVal} to {@code (float)doubleVal}. */
@ -667,6 +690,8 @@ public final class VectorOperators {
public static final Conversion<Double,Long> D2L = convert("D2L", 'C', double.class, long.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code doubleVal} to {@code (short)doubleVal}. */
public static final Conversion<Double,Short> D2S = convert("D2S", 'C', double.class, short.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code doubleVal} to {@code (Float16)doubleVal}. */
public static final Conversion<Double,Float16> D2H = convert("D2H", 'C', double.class, Float16.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code floatVal} to {@code (byte)floatVal}. */
public static final Conversion<Float,Byte> F2B = convert("F2B", 'C', float.class, byte.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code floatVal} to {@code (double)floatVal}. */
@ -677,6 +702,8 @@ public final class VectorOperators {
public static final Conversion<Float,Long> F2L = convert("F2L", 'C', float.class, long.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code floatVal} to {@code (short)floatVal}. */
public static final Conversion<Float,Short> F2S = convert("F2S", 'C', float.class, short.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code floatVal} to {@code (Float16)floatVal}. */
public static final Conversion<Float,Float16> F2H = convert("F2H", 'C', float.class, Float16.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code intVal} to {@code (byte)intVal}. */
public static final Conversion<Integer,Byte> I2B = convert("I2B", 'C', int.class, byte.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code intVal} to {@code (double)intVal}. */
@ -687,6 +714,8 @@ public final class VectorOperators {
public static final Conversion<Integer,Long> I2L = convert("I2L", 'C', int.class, long.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code intVal} to {@code (short)intVal}. */
public static final Conversion<Integer,Short> I2S = convert("I2S", 'C', int.class, short.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code intVal} to {@code (Float16)intVal}. */
public static final Conversion<Integer,Float16> I2H = convert("I2H", 'C', int.class, Float16.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code longVal} to {@code (byte)longVal}. */
public static final Conversion<Long,Byte> L2B = convert("L2B", 'C', long.class, byte.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code longVal} to {@code (double)longVal}. */
@ -697,6 +726,8 @@ public final class VectorOperators {
public static final Conversion<Long,Integer> L2I = convert("L2I", 'C', long.class, int.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code longVal} to {@code (short)longVal}. */
public static final Conversion<Long,Short> L2S = convert("L2S", 'C', long.class, short.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code longVal} to {@code (Float16)longVal}. */
public static final Conversion<Long,Float16> L2H = convert("L2H", 'C', long.class, Float16.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code shortVal} to {@code (byte)shortVal}. */
public static final Conversion<Short,Byte> S2B = convert("S2B", 'C', short.class, byte.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code shortVal} to {@code (double)shortVal}. */
@ -707,6 +738,21 @@ public final class VectorOperators {
public static final Conversion<Short,Integer> S2I = convert("S2I", 'C', short.class, int.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code shortVal} to {@code (long)shortVal}. */
public static final Conversion<Short,Long> S2L = convert("S2L", 'C', short.class, long.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code shortVal} to {@code (Float16)shortVal}. */
public static final Conversion<Short,Float16> S2H = convert("S2H", 'C', short.class, Float16.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code Float16Val} to {@code (byte)Float16Val}. */
public static final Conversion<Float16,Byte> H2B = convert("H2B", 'C', Float16.class, byte.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code Float16Val} to {@code (short)Float16Val}. */
public static final Conversion<Float16,Short> H2S = convert("H2S", 'C', Float16.class, short.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code Float16Val} to {@code (double)Float16Val}. */
public static final Conversion<Float16,Double> H2D = convert("H2D", 'C', Float16.class, double.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code Float16Val} to {@code (float)Float16Val}. */
public static final Conversion<Float16,Float> H2F = convert("H2F", 'C', Float16.class, float.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code Float16Val} to {@code (int)Float16Val}. */
public static final Conversion<Float16,Integer> H2I = convert("H2I", 'C', Float16.class, int.class, VO_KIND_CAST, VO_ALL);
/** Convert {@code Float16Val} to {@code (long)Float16Val}. */
public static final Conversion<Float16,Long> H2L = convert("H2L", 'C', Float16.class, long.class, VO_KIND_CAST, VO_ALL);
/** Reinterpret bits of {@code doubleVal} as {@code long}. As if by {@link Double#doubleToRawLongBits(double)} */
public static final Conversion<Double,Long> REINTERPRET_D2L = convert("REINTERPRET_D2L", 'R', double.class, long.class, VO_KIND_BITWISE, VO_ALL);
/** Reinterpret bits of {@code floatVal} as {@code int}. As if by {@link Float#floatToRawIntBits(float)} */

View File

@ -41,12 +41,65 @@ import static jdk.internal.vm.vector.VectorSupport.*;
import static jdk.incubator.vector.VectorIntrinsics.*;
import static jdk.incubator.vector.VectorOperators.*;
#if[FP16]
import jdk.incubator.vector.Float16;
import static jdk.incubator.vector.Float16.*;
import static java.lang.Float.*;
#end[FP16]
#warn This file is preprocessed before being compiled
/**
* A specialized {@link Vector} representing an ordered immutable sequence of
#if[FP16]
* 16-bit data values in the IEEE 754 binary16 format.
* <p>
* The scalar {@linkplain Float16Vector#elementType() element type} of {@code Float16Vector}
* is the class {@link Float16}, a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
* class holding 16-bit data in IEEE 754 binary16 format. However, the {@code Float16}
* class is not used by vector operations that accept scalar element values, or
* arrays of scalar element values. Instead, the primitive type {@code short} is
* used to explicitly hold 16-bit data in IEEE 754 binary16 format. For such operations
* it may be necessary to explicitly convert between floating-point values of {@code Float16}
* or {@code float} and values of {@code short} using the appropriate conversion
* methods on {@code Float16} or {@code Float}.
*
* <p>
* The specifications for operations on elements of this class are written as if
* {@code Float16} is a primitive floating-point type. An operation referencing a
* Java operator is mapped to a method on {@code Float16} that specifies that
* operator's semantics. For example, the semantics of the {@code +} operator,
* as referenced by {@link Vector#add(Vector)} and {@link VectorOperators#ADD},
* is mapped to the method {@link Float16#add(Float16, Float16)}.
* An operation referencing a method on {@link Math} is mapped to a method of the
* same name on {@code Float16}, if it exists. For example, {@link Math#fma} is
* mapped to {@link Float16#fma}, as referenced by {@link Float16Vector#fma(short, short)}
* and {@link VectorOperators#FMA}.
* Otherwise, if there is no equivalent method on {@code Float16}, the expression that is
* an invocation of a method on {@code Math} is mapped to an expression that converts
* the {@code Float16} arguments to {@code double} values or {@code float} values as
* required by the method's parameter types, invokes the method on {@code Math} with
* the converted values, and converts the resulting {@code double} or {@code float} value
* to a {@code Float16} value. For example, {@link Math#sin} is mapped to the expression
* {@code Float16.valueOf(Math.sin(a.doubleValue()))}, where {@code a} is the
* {@code Float16} lane value, as referenced by {@link VectorOperators#SIN}.
*
* @apiNote
* {@code Float16} is currently a value-based class and therefore cannot be optimally
* used as the scalar element type of vector operations until it becomes a value class
* that behaves similarly to the primitive type {@code short} and to arrays of {@code short}.
* For example, accessing {@code Float16} vectors using arrays requires those arrays be
* {@code short[]} arrays. Accessing vectors using memory segments requires, naturally,
* that consecutive 16-bits of memory hold 16-bit data values in the IEEE 754 binary16
* format.
* @see Float16
* @see Float16#float16ToRawShortBits(Float16)
* @see Float16#shortBitsToFloat16(short)
* @see Float#floatToFloat16(float)
* @see Float#float16ToFloat(short)
#else[FP16]
* {@code $type$} values.
#end[FP16]
*/
@SuppressWarnings("cast") // warning: redundant cast
public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtype$>
@ -62,7 +115,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
static final int FORBID_OPCODE_KIND = VO_ONLYFP;
#end[FP]
static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);
static final ValueLayout.Of$ElemLayout$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);
static final int LANE_TYPE_ORDINAL = $laneType$;
@ -158,7 +211,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/*package-private*/
interface FUnOp {
$type$ apply(int i, $type$ a);
$fallbacktype$ apply(int i, $fallbacktype$ a);
}
/*package-private*/
@ -170,7 +223,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] vec = vec();
$type$[] res = new $type$[length()];
for (int i = 0; i < res.length; i++) {
#if[FP16]
res[i] = floatToFloat16(f.apply(i, float16ToFloat(vec[i])));
#else[FP16]
res[i] = f.apply(i, vec[i]);
#end[FP16]
}
return vectorFactory(res);
}
@ -190,16 +247,60 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] res = new $type$[length()];
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
for (int i = 0; i < res.length; i++) {
#if[FP16]
res[i] = mbits[i] ? floatToFloat16(f.apply(i, float16ToFloat(vec[i]))) : vec[i];
#else[FP16]
res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
#end[FP16]
}
return vectorFactory(res);
}
#if[FP16]
/*package-private*/
interface FUnRawOp {
$type$ apply(int i, $type$ a);
}
/*package-private*/
abstract
$abstractvectortype$ uRawOp(FUnRawOp f);
@ForceInline
final
$abstractvectortype$ uRawOpTemplate(FUnRawOp f) {
$type$[] vec = vec();
$type$[] res = new $type$[length()];
for (int i = 0; i < res.length; i++) {
res[i] = f.apply(i, vec[i]);
}
return vectorFactory(res);
}
/*package-private*/
abstract
$abstractvectortype$ uRawOp(VectorMask<$Boxtype$> m,
FUnRawOp f);
@ForceInline
final
$abstractvectortype$ uRawOpTemplate(VectorMask<$Boxtype$> m,
FUnRawOp f) {
if (m == null) {
return uRawOpTemplate(f);
}
$type$[] vec = vec();
$type$[] res = new $type$[length()];
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
for (int i = 0; i < res.length; i++) {
res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
}
return vectorFactory(res);
}
#end[FP16]
// Binary operator
/*package-private*/
interface FBinOp {
$type$ apply(int i, $type$ a, $type$ b);
$fallbacktype$ apply(int i, $fallbacktype$ a, $fallbacktype$ b);
}
/*package-private*/
@ -214,7 +315,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] vec1 = this.vec();
$type$[] vec2 = (($abstractvectortype$)o).vec();
for (int i = 0; i < res.length; i++) {
#if[FP16]
res[i] = floatToFloat16(f.apply(i, float16ToFloat(vec1[i]), float16ToFloat(vec2[i])));
#else[FP16]
res[i] = f.apply(i, vec1[i], vec2[i]);
#end[FP16]
}
return vectorFactory(res);
}
@ -237,7 +342,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] vec2 = (($abstractvectortype$)o).vec();
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
for (int i = 0; i < res.length; i++) {
#if[FP16]
res[i] = mbits[i] ? floatToFloat16(f.apply(i, float16ToFloat(vec1[i]), float16ToFloat(vec2[i]))) : vec1[i];
#else[FP16]
res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
#end[FP16]
}
return vectorFactory(res);
}
@ -310,7 +419,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] vec = vec();
boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
for (int i = 0; i < vec.length; i++) {
#if[FP16]
v = mbits[i] ? floatToFloat16(f.apply(i, float16ToFloat(v), float16ToFloat(vec[i]))) : v;
#else[FP16]
v = mbits[i] ? f.apply(i, v, vec[i]) : v;
#end[FP16]
}
return v;
}
@ -320,7 +433,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$ rOpTemplate($type$ v, FBinOp f) {
$type$[] vec = vec();
for (int i = 0; i < vec.length; i++) {
#if[FP16]
v = floatToFloat16(f.apply(i, float16ToFloat(v), float16ToFloat(vec[i])));
#else[FP16]
v = f.apply(i, v, vec[i]);
#end[FP16]
}
return v;
}
@ -516,13 +633,21 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/*package-private*/
@ForceInline
static long toBits($type$ e) {
#if[FP16]
return e;
#else[FP16]
return {#if[FP]? $Type$.$type$ToRaw$Bitstype$Bits(e): e};
#end[FP16]
}
/*package-private*/
@ForceInline
static $type$ fromBits(long bits) {
#if[FP16]
return (short)bits;
#else[FP16]
return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits);
#end[FP16]
}
static $abstractvectortype$ expandHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
@ -562,7 +687,12 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] vecPayload2 = (($abstractvectortype$)src1).vec();
$type$[] vecPayload3 = (($abstractvectortype$)src2).vec();
for (int i = 0; i < vlen; i++) {
#if[FP16]
int index = shortBitsToFloat16(vecPayload1[i]).intValue();
int wrapped_index = VectorIntrinsics.wrapToRange(index, 2 * vlen);
#else[FP16]
int wrapped_index = VectorIntrinsics.wrapToRange((int)vecPayload1[i], 2 * vlen);
#end[FP16]
res[i] = wrapped_index >= vlen ? vecPayload3[wrapped_index - vlen] : vecPayload2[wrapped_index];
}
return (($abstractvectortype$)src1).vectorFactory(res);
@ -594,7 +724,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$Type$Species vsp = ($Type$Species) species;
#if[FP]
return VectorSupport.fromBitsCoerced(vsp.vectorType(), LANE_TYPE_ORDINAL, species.length(),
toBits(0.0f), MODE_BROADCAST, vsp,
toBits({#if[FP16]?(short) 0:0.0f}), MODE_BROADCAST, vsp,
((bits_, s_) -> s_.rvOp(i -> bits_)));
#else[FP]
return VectorSupport.fromBitsCoerced(vsp.vectorType(), LANE_TYPE_ORDINAL, species.length(),
@ -784,27 +914,32 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
private static UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> unaryOperations(int opc_) {
switch (opc_) {
#if[FP16]
case VECTOR_OP_NEG: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) -a);
v0.uOp(m, (i, a) -> Float16.negate(Float16.valueOf(a)).floatValue());
#else[FP16]
case VECTOR_OP_NEG: return (v0, m) ->
v0.uOp(m, (i, a) -> ($fallbacktype$) -a);
#end[FP16]
case VECTOR_OP_ABS: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.abs(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.abs(a));
#if[!FP]
#if[intOrLong]
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.bitCount(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfTrailingZeros(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfLeadingZeros(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverse(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.reverse(a));
#else[intOrLong]
case VECTOR_OP_BIT_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) bitCount(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) bitCount(a));
case VECTOR_OP_TZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) numberOfTrailingZeros(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) numberOfTrailingZeros(a));
case VECTOR_OP_LZ_COUNT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) numberOfLeadingZeros(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) numberOfLeadingZeros(a));
case VECTOR_OP_REVERSE: return (v0, m) ->
v0.uOp(m, (i, a) -> reverse(a));
#end[intOrLong]
@ -814,43 +949,47 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
v0.uOp(m, (i, a) -> a);
#else[byte]
case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverseBytes(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.reverseBytes(a));
#end[byte]
#end[BITWISE]
#end[!FP]
#if[FP]
case VECTOR_OP_SIN: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.sin(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.sin(a));
case VECTOR_OP_COS: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.cos(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.cos(a));
case VECTOR_OP_TAN: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.tan(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.tan(a));
case VECTOR_OP_ASIN: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.asin(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.asin(a));
case VECTOR_OP_ACOS: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.acos(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.acos(a));
case VECTOR_OP_ATAN: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.atan(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.atan(a));
case VECTOR_OP_EXP: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.exp(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.exp(a));
case VECTOR_OP_LOG: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.log(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.log(a));
case VECTOR_OP_LOG10: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.log10(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.log10(a));
case VECTOR_OP_SQRT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.sqrt(a));
#if[FP16]
v0.uOp(m, (i, a) -> Float16.sqrt(Float16.valueOf(a)).floatValue());
#else[FP16]
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.sqrt(a));
#end[FP16]
case VECTOR_OP_CBRT: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.cbrt(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.cbrt(a));
case VECTOR_OP_SINH: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.sinh(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.sinh(a));
case VECTOR_OP_COSH: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.cosh(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.cosh(a));
case VECTOR_OP_TANH: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.tanh(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.tanh(a));
case VECTOR_OP_EXPM1: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.expm1(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.expm1(a));
case VECTOR_OP_LOG1P: return (v0, m) ->
v0.uOp(m, (i, a) -> ($type$) Math.log1p(a));
v0.uOp(m, (i, a) -> ($fallbacktype$) Math.log1p(a));
#end[FP]
default: return null;
}
@ -996,46 +1135,46 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) {
switch (opc_) {
case VECTOR_OP_ADD: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a + b));
case VECTOR_OP_SUB: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a - b));
case VECTOR_OP_MUL: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a * b));
case VECTOR_OP_DIV: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a / b));
case VECTOR_OP_MAX: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)Math.max(a, b));
case VECTOR_OP_MIN: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)Math.min(a, b));
#if[BITWISE]
case VECTOR_OP_AND: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a & b));
case VECTOR_OP_OR: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(a | b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a | b));
case VECTOR_OP_XOR: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(a ^ b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a ^ b));
case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> ($type$)(a << n));
v0.bOp(v1, vm, (i, a, n) -> ($fallbacktype$)(a << n));
case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> ($type$)(a >> n));
v0.bOp(v1, vm, (i, a, n) -> ($fallbacktype$)(a >> n));
case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
v0.bOp(v1, vm, (i, a, n) -> ($fallbacktype$)((a & LSHR_SETUP_MASK) >>> n));
case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
case VECTOR_OP_UMAX: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)VectorMath.maxUnsigned(a, b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)VectorMath.maxUnsigned(a, b));
case VECTOR_OP_UMIN: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)VectorMath.minUnsigned(a, b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)VectorMath.minUnsigned(a, b));
case VECTOR_OP_SADD: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.addSaturating(a, b)));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.addSaturating(a, b)));
case VECTOR_OP_SSUB: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.subSaturating(a, b)));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.subSaturating(a, b)));
case VECTOR_OP_SUADD: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.addSaturatingUnsigned(a, b)));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.addSaturatingUnsigned(a, b)));
case VECTOR_OP_SUSUB: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.subSaturatingUnsigned(a, b)));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.subSaturatingUnsigned(a, b)));
#if[intOrLong]
case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.compress(a, n));
@ -1045,13 +1184,17 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
#end[BITWISE]
#if[FP]
case VECTOR_OP_OR: return (v0, v1, vm) ->
#if[FP16]
v0.bOp(v1, vm, (i, a, b) -> FloatVector.fromBits(FloatVector.toBits(a) | FloatVector.toBits(b)));
#else[FP16]
v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
#end[FP16]
case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.atan2(a, b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$) Math.atan2(a, b));
case VECTOR_OP_POW: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$) Math.pow(a, b));
case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b));
v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$) Math.hypot(a, b));
#end[FP]
default: return null;
}
@ -1147,13 +1290,13 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
public final
$abstractvectortype$ lanewise(VectorOperators.Binary op,
long e) {
$type$ e1 = ($type$) e;
$type$ e1 = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(e)):($type$) e};
#if[BITWISE]
if ((long)e1 != e
// allow shift ops to clip down their int parameters
&& !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
#else[BITWISE]
if ((long)e1 != e) {
if ({#if[FP16]?shortBitsToFloat16(e1).longValue():(long)e1} != e) {
#end[BITWISE]
vspecies().checkValue(e); // for exception
}
@ -1174,13 +1317,13 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
public final
$abstractvectortype$ lanewise(VectorOperators.Binary op,
long e, VectorMask<$Boxtype$> m) {
$type$ e1 = ($type$) e;
$type$ e1 = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(e)):($type$) e};
#if[BITWISE]
if ((long)e1 != e
// allow shift ops to clip down their int parameters
&& !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
#else[BITWISE]
if ((long)e1 != e) {
if ({#if[FP16]?shortBitsToFloat16(e1).longValue():(long)e1} != e) {
#end[BITWISE]
vspecies().checkValue(e); // for exception
}
@ -1255,12 +1398,12 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
// since our lane types are first-class types, not just dressed
// up ints.
private static final int SHIFT_MASK = ($Boxtype$.SIZE - 1);
#if[byteOrShort]
#if[byteOrStrictShort]
// Also simulate >>> on sub-word variables with a mask.
private static final int LSHR_SETUP_MASK = ((1 << $Boxtype$.SIZE) - 1);
#else[byteOrShort]
#else[byteOrStrictShort]
private static final $type$ LSHR_SETUP_MASK = -1;
#end[byteOrShort]
#end[byteOrStrictShort]
#end[BITWISE]
// Ternary lanewise support
@ -1363,7 +1506,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
switch (opc_) {
#if[FP]
case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
#if[FP16]
v0.tOp(v1_, v2_, m, (i, a, b, c) -> float16ToRawShortBits(Float16.fma(shortBitsToFloat16(a), shortBitsToFloat16(b), shortBitsToFloat16(c))));
#else[FP16]
v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
#end[FP16]
#end[FP]
default: return null;
}
@ -2392,7 +2539,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
// first kill the sign:
bits = bits.and($Boxbitstype$.MAX_VALUE);
// next find the bit pattern for infinity:
$bitstype$ infbits = ($bitstype$) toBits($Boxtype$.POSITIVE_INFINITY);
$bitstype$ infbits = ($bitstype$) toBits({#if[FP16]?float16ToRawShortBits($Boxtype$.POSITIVE_INFINITY):$Boxtype$.POSITIVE_INFINITY});
// now compare:
if (op == IS_FINITE) {
m = bits.compare(LT, infbits);
@ -2446,7 +2593,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
// first kill the sign:
bits = bits.and($Boxbitstype$.MAX_VALUE);
// next find the bit pattern for infinity:
$bitstype$ infbits = ($bitstype$) toBits($Boxtype$.POSITIVE_INFINITY);
$bitstype$ infbits = ($bitstype$) toBits({#if[FP16]?float16ToRawShortBits($Boxtype$.POSITIVE_INFINITY):$Boxtype$.POSITIVE_INFINITY});
// now compare:
if (op == IS_FINITE) {
m = bits.compare(LT, infbits, m);
@ -2517,14 +2664,23 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
}
@ForceInline
private static boolean compareWithOp(int cond, $type$ a, $type$ b) {
private static boolean compareWithOp(int cond, $carriertype$ a, $carriertype$ b) {
return switch (cond) {
#if[FP16]
case BT_eq -> Float.float16ToFloat(a) == Float.float16ToFloat(b);
case BT_ne -> Float.float16ToFloat(a) != Float.float16ToFloat(b);
case BT_lt -> Float.float16ToFloat(a) < Float.float16ToFloat(b);
case BT_le -> Float.float16ToFloat(a) <= Float.float16ToFloat(b);
case BT_gt -> Float.float16ToFloat(a) > Float.float16ToFloat(b);
case BT_ge -> Float.float16ToFloat(a) >= Float.float16ToFloat(b);
#else[FP16]
case BT_eq -> a == b;
case BT_ne -> a != b;
case BT_lt -> a < b;
case BT_le -> a <= b;
case BT_gt -> a > b;
case BT_ge -> a >= b;
#end[FP16]
#if[!FP]
case BT_ult -> $Boxtype$.compareUnsigned(a, b) < 0;
case BT_ule -> $Boxtype$.compareUnsigned(a, b) <= 0;
@ -2665,7 +2821,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
// and multiply.
$abstractvectortype$ iota = s.iota();
$type$ sc = ($type$) scale_;
return v.add(sc == 1 ? iota : iota.mul(sc));
return v.add(sc == 1 ? iota : iota.mul({#if[FP16]?float16ToRawShortBits(Float16.valueOf(sc)):sc}));
});
}
@ -2875,7 +3031,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
return VectorSupport.rearrangeOp(
getClass(), shuffletype, null, laneTypeOrdinal(), length(),
this, shuffle, null,
(v1, s_, m_) -> v1.uOp((i, a) -> {
(v1, s_, m_) -> v1.{#if[FP16]?uRawOp:uOp}((i, a) -> {
int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
return v1.lane(ei);
}));
@ -2902,7 +3058,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
return VectorSupport.rearrangeOp(
getClass(), shuffletype, masktype, laneTypeOrdinal(), length(),
this, shuffle, m,
(v1, s_, m_) -> v1.uOp((i, a) -> {
(v1, s_, m_) -> v1.{#if[FP16]?uRawOp:uOp}((i, a) -> {
int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
return !m_.laneIsSet(i) ? 0 : v1.lane(ei);
}));
@ -2928,7 +3084,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
VectorSupport.rearrangeOp(
getClass(), shuffletype, null, laneTypeOrdinal(), length(),
this, shuffle, null,
(v0, s_, m_) -> v0.uOp((i, a) -> {
(v0, s_, m_) -> v0.{#if[FP16]?uRawOp:uOp}((i, a) -> {
int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length());
return v0.lane(ei);
}));
@ -2936,7 +3092,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
VectorSupport.rearrangeOp(
getClass(), shuffletype, null, laneTypeOrdinal(), length(),
v, shuffle, null,
(v1, s_, m_) -> v1.uOp((i, a) -> {
(v1, s_, m_) -> v1.{#if[FP16]?uRawOp:uOp}((i, a) -> {
int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
return v1.lane(ei);
}));
@ -2963,6 +3119,9 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
final <F>
VectorShuffle<F> toShuffle(AbstractSpecies<F> dsp, boolean wrap) {
assert(dsp.elementSize() == vspecies().elementSize());
#if[FP16]
ShortVector idx = convert(VectorOperators.H2S, 0).reinterpretAsShorts();
#end[FP16]
#if[float]
IntVector idx = convert(VectorOperators.F2I, 0).reinterpretAsInts();
#end[float]
@ -2983,7 +3142,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/**
* {@inheritDoc} <!--workaround-->
* @since 19
* @since {#if[FP16]?27:19}
*/
@Override
public abstract
@ -3002,7 +3161,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/**
* {@inheritDoc} <!--workaround-->
* @since 19
* @since {#if[FP16]?27:19}
*/
@Override
public abstract
@ -3199,7 +3358,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
*
* This is a lane-wise ternary operation which applies an operation
* conforming to the specification of
* {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
* {@link Math#fma($fallbacktype$,$fallbacktype$,$fallbacktype$) Math.fma(a,b,c)}
* to each lane.
#if[intOrFloat]
* The operation is adapted to cast the operands and the result,
@ -3240,7 +3399,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
*
* This is a lane-wise ternary operation which applies an operation
* conforming to the specification of
* {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
* {@link Math#fma($fallbacktype$,$fallbacktype$,$fallbacktype$) Math.fma(a,b,c)}
* to each lane.
#if[intOrFloat]
* The operation is adapted to cast the operands and the result,
@ -3447,13 +3606,13 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) {
switch (opc_) {
case VECTOR_OP_ADD: return (v, m) ->
toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b)));
toBits(v.rOp(($type$)0, m, (i, a, b) -> ($fallbacktype$)(a + b)));
case VECTOR_OP_MUL: return (v, m) ->
toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b)));
toBits(v.rOp(($type$){#if[FP16]?floatToFloat16(1.0f):1}, m, (i, a, b) -> ($fallbacktype$)(a * b)));
case VECTOR_OP_MIN: return (v, m) ->
toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b)));
toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($fallbacktype$) Math.min(a, b)));
case VECTOR_OP_MAX: return (v, m) ->
toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b)));
toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($fallbacktype$) Math.max(a, b)));
#if[!FP]
case VECTOR_OP_UMIN: return (v, m) ->
toBits(v.rOp(UMAX_VALUE, m, (i, a, b) -> ($type$) VectorMath.minUnsigned(a, b)));
@ -3475,8 +3634,8 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
}
#if[FP]
private static final $type$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY;
private static final $type$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY;
private static final $type$ MIN_OR_INF = {#if[FP16]?float16ToRawShortBits($Boxtype$.NEGATIVE_INFINITY):$Boxtype$.NEGATIVE_INFINITY};
private static final $type$ MAX_OR_INF = {#if[FP16]?float16ToRawShortBits($Boxtype$.POSITIVE_INFINITY):$Boxtype$.POSITIVE_INFINITY};
#else[FP]
private static final $type$ MIN_OR_INF = $Boxtype$.MIN_VALUE;
private static final $type$ MAX_OR_INF = $Boxtype$.MAX_VALUE;
@ -3656,14 +3815,18 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] a = toArray();
double[] res = new double[a.length];
for (int i = 0; i < a.length; i++) {
res[i] = (double) a[i];
res[i] = (double) {#if[FP16]?shortBitsToFloat16(a[i]).doubleValue():a[i]};
}
return res;
}
#end[double]
/**
#if[FP16]
* Loads a vector from an array of type {@code $type$[]} holding IEEE 754 binary16 values
#else[FP16]
* Loads a vector from an array of type {@code $type$[]}
#end[FP16]
* starting at an offset.
* For each vector lane, where {@code N} is the vector lane index, the
* array element at index {@code offset + N} is placed into the
@ -3687,7 +3850,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
}
/**
#if[FP16]
* Loads a vector from an array of type {@code $type$[]} holding IEEE 754 binary16 values
#else[FP16]
* Loads a vector from an array of type {@code $type$[]}
#end[FP16]
* starting at an offset and using a mask.
* Lanes where the mask is unset are filled with the default
* value of {@code $type$} ({#if[FP]?positive }zero).
@ -3724,7 +3891,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/**
* Gathers a new vector composed of elements from an array of type
#if[FP16]
* {@code $type$[]} holding IEEE 754 binary16 values,
#else[FP16]
* {@code $type$[]},
#end[FP16]
* using indexes obtained by adding a fixed {@code offset} to a
* series of secondary offsets from an <em>index map</em>.
* The index map is a contiguous sequence of {@code VLENGTH}
@ -3869,7 +4040,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/**
* Gathers a new vector composed of elements from an array of type
#if[FP16]
* {@code $type$[]} holding IEEE 754 binary16 values,
#else[FP16]
* {@code $type$[]},
#end[FP16]
* under the control of a mask, and
* using indexes obtained by adding a fixed {@code offset} to a
* series of secondary offsets from an <em>index map</em>.
@ -3918,7 +4093,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
}
}
#if[short]
#if[strictShort]
/**
* Loads a vector from an array of type {@code char[]}
* starting at an offset.
@ -4069,7 +4244,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$Type$Species vsp = ($Type$Species) species;
return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
}
#end[short]
#end[strictShort]
#if[byte]
/**
@ -4258,7 +4433,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
* for any lane {@code N} in the vector
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
* @since {#if[FP16]?27:19}
*/
@ForceInline
public static
@ -4317,7 +4492,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
* where the mask is set
* @throws IllegalStateException if the memory segment's session is not alive,
* or if access occurs from a thread other than the thread owning the session.
* @since 19
* @since {#if[FP16]?27:19}
*/
@ForceInline
public static
@ -4555,7 +4730,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
}
#end[byteOrShort]
#if[short]
#if[strictShort]
/**
* Stores this vector into an array of type {@code char[]}
* starting at an offset.
@ -4711,7 +4886,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
arr[off + j] = (char) e;
});
}
#end[short]
#end[strictShort]
#if[byte]
/**
@ -4886,7 +5061,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/**
* {@inheritDoc} <!--workaround-->
* @since 19
* @since {#if[FP16]?27:19}
*/
@Override
@ForceInline
@ -4903,7 +5078,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/**
* {@inheritDoc} <!--workaround-->
* @since 19
* @since {#if[FP16]?27:19}
*/
@Override
@ForceInline
@ -5100,7 +5275,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
}
#end[byteOrShort]
#if[short]
#if[strictShort]
/*package-private*/
abstract
$abstractvectortype$ fromCharArray0(char[] a, int offset);
@ -5132,7 +5307,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
(arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
(arr_, off_, i) -> (short) arr_[off_ + i]));
}
#end[short]
#end[strictShort]
#if[byte]
/*package-private*/
@ -5346,7 +5521,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
});
}
#if[short]
#if[strictShort]
/*package-private*/
abstract
void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m);
@ -5364,7 +5539,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
-> v.stOp(arr, (int) off, vm,
(arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
}
#end[short]
#end[strictShort]
// End of low-level memory operations.
@ -5423,7 +5598,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
}
#if[short]
#if[strictShort]
static final int ARRAY_CHAR_SHIFT =
31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
static final long ARRAY_CHAR_BASE =
@ -5433,7 +5608,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
static long charArrayAddress(char[] a, int index) {
return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
}
#end[short]
#end[strictShort]
#if[byte]
static final int ARRAY_BOOLEAN_SHIFT =
@ -5490,7 +5665,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/**
* {@inheritDoc} <!--workaround-->
#if[byteOrShort]
#if[byte]
*
* @implNote This method always throws
* {@code UnsupportedOperationException}, because there is no floating
@ -5498,23 +5673,27 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
* of this method is arbitrarily designated as
* {@code Vector<?>}. Future versions of this API may change the return
* type if additional floating point types become available.
#end[byteOrShort]
#end[byte]
*/
@ForceInline
@Override
public final
{#if[byteOrShort]?Vector<?>:$Fptype$Vector}
#if[FP16]
$Type$Vector
#else[FP16]
{#if[byte]?Vector<?>:$Fptype$Vector}
#end[FP16]
viewAsFloatingLanes() {
#if[FP]
return this;
#else[FP]
LaneType flt = LaneType.$TYPE$.asFloating();
#if[!byteOrShort]
#if[!byte]
return ($Fptype$Vector) asVectorRaw(flt);
#else[!byteOrShort]
#else[!byte]
// asFloating() will throw UnsupportedOperationException for the unsupported type $type$
throw new AssertionError("Cannot reach here");
#end[!byteOrShort]
#end[!byte]
#end[FP]
}
@ -5588,7 +5767,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
Class<? extends AbstractMask<$Boxtype$>> maskType,
Class<? extends AbstractShuffle<$Boxtype$>> shuffleType,
Function<Object, $abstractvectortype$> vectorFactory) {
super(shape, LaneType.of($type$.class),
super(shape, LaneType.of($elemtype$.class),
vectorType, maskType, shuffleType,
vectorFactory);
assert(this.elementSize() == $Boxtype$.SIZE);
@ -5599,7 +5778,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
@Override
@ForceInline
public final Class<$Boxtype$> elementType() {
return $type$.class;
return $elemtype$.class;
}
@Override
@ -5656,8 +5835,8 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
return value;
#else[long]
// Do the conversion, and then test it for failure.
$type$ e = ($type$) value;
if ((long) e != value) {
$type$ e = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(value)):($type$) value};
if ({#if[FP16]?shortBitsToFloat16(e).longValue():(long) e} != value) {
throw badElementBits(value, e);
}
return toBits(e);
@ -5667,10 +5846,18 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
/*package-private*/
@ForceInline
static long toIntegralChecked($type$ e, boolean convertToInt) {
#if[FP16]
float ef = shortBitsToFloat16(e).floatValue();
long value = convertToInt ? (int) ef : (long) ef;
if ((float) value != ef) {
throw badArrayBits(e, convertToInt, value);
}
#else[FP16]
long value = convertToInt ? (int) e : (long) e;
if (($type$) value != e) {
throw badArrayBits(e, convertToInt, value);
}
#end[FP16]
return value;
}
@ -5682,11 +5869,19 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
$type$[] va = new $type$[laneCount()];
for (int i = 0; i < va.length; i++) {
int lv = values[i];
#if[FP16]
$type$ v = float16ToRawShortBits(Float16.valueOf(lv));
va[i] = v;
if (Float16.valueOf(lv).intValue() != lv) {
throw badElementBits(lv, v);
}
#else[FP16]
$type$ v = ($type$) lv;
va[i] = v;
if ((int)v != lv) {
throw badElementBits(lv, v);
}
#end[FP16]
}
return dummyVector().fromArray0(va, 0);
}
@ -5859,10 +6054,17 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
}
/**
#if[FP16]
* Finds a species for an element type of {@code $elemtype$} and shape.
*
* @param s the shape
* @return a species for an element type of {@code $elemtype$} and shape
#else[FP16]
* Finds a species for an element type of {@code $type$} and shape.
*
* @param s the shape
* @return a species for an element type of {@code $type$} and shape
#end[FP16]
* @throws IllegalArgumentException if no such species exists for the shape
*/
static $Type$Species species(VectorShape s) {
@ -5922,6 +6124,6 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
* A preferred species is a species of maximal bit-size for the platform.
*/
public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
= ($Type$Species) VectorSpecies.ofPreferred($type$.class);
= ($Type$Species) VectorSpecies.ofPreferred($elemtype$.class);
}

View File

@ -59,7 +59,7 @@ final class $vectortype$ extends $abstractvectortype$ {
static final Class<$Carriertype$> CTYPE = $carriertype$.class; // carrier type used by the JVM
static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM
$vectortype$($type$[] v) {
super(v);
@ -95,7 +95,7 @@ final class $vectortype$ extends $abstractvectortype$ {
@ForceInline
@Override
public final Class<$Boxtype$> elementType() { return $type$.class; }
public final Class<$Boxtype$> elementType() { return $elemtype$.class; }
@ForceInline
final Class<$Carriertype$> carrierType() { return CTYPE; }
@ -216,6 +216,20 @@ final class $vectortype$ extends $abstractvectortype$ {
super.uOpTemplate(($masktype$)m, f); // specialize
}
#if[FP16]
@ForceInline
final @Override
$vectortype$ uRawOp(FUnRawOp f) {
return ($vectortype$) super.uRawOpTemplate(f); // specialize
}
@ForceInline
final @Override
$vectortype$ uRawOp(VectorMask<$Boxtype$> m, FUnRawOp f) {
return ($vectortype$)
super.uRawOpTemplate(($masktype$)m, f); // specialize
}
#end[FP16]
// Binary operator
@ForceInline
@ -574,6 +588,24 @@ final class $vectortype$ extends $abstractvectortype$ {
case 13: bits = laneHelper(13); break;
case 14: bits = laneHelper(14); break;
case 15: bits = laneHelper(15); break;
#if[!16L]
case 16: bits = laneHelper(16); break;
case 17: bits = laneHelper(17); break;
case 18: bits = laneHelper(18); break;
case 19: bits = laneHelper(19); break;
case 20: bits = laneHelper(20); break;
case 21: bits = laneHelper(21); break;
case 22: bits = laneHelper(22); break;
case 23: bits = laneHelper(23); break;
case 24: bits = laneHelper(24); break;
case 25: bits = laneHelper(25); break;
case 26: bits = laneHelper(26); break;
case 27: bits = laneHelper(27); break;
case 28: bits = laneHelper(28); break;
case 29: bits = laneHelper(29); break;
case 30: bits = laneHelper(30); break;
case 31: bits = laneHelper(31); break;
#end[!16L]
#end[!8L]
#end[!4L]
#end[!2L]
@ -586,7 +618,7 @@ final class $vectortype$ extends $abstractvectortype$ {
}
$bitstype$ bits = laneHelper(i);
#end[!Max]
return $Type$.$bitstype$BitsTo$Fptype$(bits);
return {#if[FP16]?bits:$Type$.$bitstype$BitsTo$Fptype$(bits)};
}
@ForceInline
@ -596,7 +628,7 @@ final class $vectortype$ extends $abstractvectortype$ {
this, i,
(vec, ix) -> {
$type$[] vecarr = vec.vec();
return (long)$Type$.$type$ToRaw$Bitstype$Bits(vecarr[ix]);
return {#if[FP16]?vecarr[ix]:(long)$Type$.$type$ToRaw$Bitstype$Bits(vecarr[ix])};
});
}
@ -625,6 +657,24 @@ final class $vectortype$ extends $abstractvectortype$ {
case 13: return withLaneHelper(13, e);
case 14: return withLaneHelper(14, e);
case 15: return withLaneHelper(15, e);
#if[!16L]
case 16: return withLaneHelper(16, e);
case 17: return withLaneHelper(17, e);
case 18: return withLaneHelper(18, e);
case 19: return withLaneHelper(19, e);
case 20: return withLaneHelper(20, e);
case 21: return withLaneHelper(21, e);
case 22: return withLaneHelper(22, e);
case 23: return withLaneHelper(23, e);
case 24: return withLaneHelper(24, e);
case 25: return withLaneHelper(25, e);
case 26: return withLaneHelper(26, e);
case 27: return withLaneHelper(27, e);
case 28: return withLaneHelper(28, e);
case 29: return withLaneHelper(29, e);
case 30: return withLaneHelper(30, e);
case 31: return withLaneHelper(31, e);
#end[!16L]
#end[!8L]
#end[!4L]
#end[!2L]
@ -643,10 +693,10 @@ final class $vectortype$ extends $abstractvectortype$ {
public $vectortype$ withLaneHelper(int i, $type$ e) {
return VectorSupport.insert(
VCLASS, LANE_TYPE_ORDINAL, VLENGTH,
this, i, (long)$Type$.$type$ToRaw$Bitstype$Bits(e),
this, i, (long){#if[FP16]?e:$Type$.$type$ToRaw$Bitstype$Bits(e)},
(v, ix, bits) -> {
$type$[] res = v.vec().clone();
res[ix] = $Type$.$bitstype$BitsTo$Type$(($bitstype$)bits);
res[ix] = {#if[FP16]?($bitstype$)bits:$Type$.$bitstype$BitsTo$Type$(($bitstype$)bits)};
return v.vectorFactory(res);
});
}
@ -981,7 +1031,7 @@ final class $vectortype$ extends $abstractvectortype$ {
public $masktype$ compress() {
return ($masktype$)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
$vectortype$.class, $masktype$.class, LANE_TYPE_ORDINAL, VLENGTH, null, this,
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
(v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, {#if[FP16]?Float16.float16ToRawShortBits(Float16.valueOf(m1.trueCount())):m1.trueCount()}));
}
@ -1389,7 +1439,7 @@ final class $vectortype$ extends $abstractvectortype$ {
return super.fromArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m);
}
#if[short]
#if[strictShort]
@ForceInline
@Override
final
@ -1403,7 +1453,7 @@ final class $vectortype$ extends $abstractvectortype$ {
$abstractvectortype$ fromCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange) {
return super.fromCharArray0Template($masktype$.class, a, offset, ($masktype$) m, offsetInRange); // specialize
}
#end[short]
#end[strictShort]
#if[byte]
@ForceInline
@ -1474,14 +1524,14 @@ final class $vectortype$ extends $abstractvectortype$ {
super.intoMemorySegment0Template($masktype$.class, ms, offset, ($masktype$) m);
}
#if[short]
#if[strictShort]
@ForceInline
@Override
final
void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m) {
super.intoCharArray0Template($masktype$.class, a, offset, ($masktype$) m);
}
#end[short]
#end[strictShort]
// End of specialized low-level memory operations.

View File

@ -53,19 +53,29 @@ typeprefix=
globalArgs=""
#globalArgs="$globalArgs -KextraOverrides"
for type in byte short int long float double
for type in byte short int long float double float16
do
Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}"
TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})"
case $type in
float16)
type=short
TYPE=SHORT
;;
esac
args=$globalArgs
args="$args -K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE"
Boxtype=$Type
Wideboxtype=$Boxtype
ElemLayout=$Type
kind=BITWISE
bitstype=$type
maskbitstype=$type
Bitstype=$Type
Boxbitstype=$Boxtype
@ -74,23 +84,28 @@ do
Boxfptype=$Boxtype
carriertype=$type
Carriertype=$Type
elemtype=$type
fallbacktype=$type
case $type in
byte)
case $Type in
Byte)
Wideboxtype=Integer
sizeInBytes=1
laneType=LT_BYTE
lanebitsType=LT_BYTE
args="$args -KbyteOrShort"
args="$args -KbyteOrShort -KbyteOrStrictShort"
;;
short)
Short)
fptype=Float16
Fptype=Float16
Boxfptype=Float16
Wideboxtype=Integer
sizeInBytes=2
laneType=LT_SHORT
lanebitsType=LT_SHORT
args="$args -KbyteOrShort"
args="$args -KbyteOrShort -KbyteOrStrictShort -KstrictShort"
;;
int)
Int)
Boxtype=Integer
Carriertype=Integer
Wideboxtype=Integer
@ -103,7 +118,7 @@ do
lanebitsType=LT_INT
args="$args -KintOrLong -KintOrFP -KintOrFloat"
;;
long)
Long)
fptype=double
Fptype=Double
Boxfptype=Double
@ -112,33 +127,53 @@ do
lanebitsType=LT_LONG
args="$args -KintOrLong -KlongOrDouble"
;;
float)
Float)
kind=FP
bitstype=int
maskbitstype=int
Bitstype=Int
Boxbitstype=Integer
sizeInBytes=4
laneType=LT_FLOAT
lanebitsType=LT_INT
args="$args -KintOrFP -KintOrFloat"
args="$args -KFP32 -KintOrFP -KintOrFloat"
;;
double)
Double)
kind=FP
bitstype=long
maskbitstype=long
Bitstype=Long
Boxbitstype=Long
sizeInBytes=8
laneType=LT_DOUBLE
lanebitsType=LT_LONG
args="$args -KintOrFP -KlongOrDouble"
args="$args -KFP64 -KintOrFP -KlongOrDouble"
;;
Float16)
kind=FP
bitstype=short
maskbitstype=short
Bitstype=Short
Boxbitstype=Short
sizeInBytes=2
carriertype=short
Carriertype=Short
Boxtype=Float16
elemtype=Float16
ElemLayout=Short
laneType=LT_FLOAT16
lanebitsType=LT_SHORT
fallbacktype=float
args="$args -KFP16 -KbyteOrShort"
;;
esac
args="$args -K$kind -DlaneType=$laneType -DlanebitsType=$lanebitsType -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype"
args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
args="$args -K$kind -DlaneType=$laneType -DlanebitsType=$lanebitsType -Dfallbacktype=$fallbacktype -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype"
args="$args -DElemLayout=$ElemLayout -Dbitstype=$bitstype -Dmaskbitstype=$maskbitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"
args="$args -DsizeInBytes=$sizeInBytes"
args="$args -Dcarriertype=$carriertype -DCarriertype=$Carriertype"
args="$args -Dcarriertype=$carriertype -Delemtype=$elemtype -DCarriertype=$Carriertype"
abstractvectortype=${typeprefix}${Type}Vector
abstractbitsvectortype=${typeprefix}Vector${Bitstype}

View File

@ -0,0 +1,113 @@
/*
* Copyright 2025 Arm Limited and/or its affiliates.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @test
* @bug 8370691 8373574
* @summary Verify correct execution of CastII -> ConvHF2F IR sequence on AArch64
* @modules jdk.incubator.vector
* @library /test/lib /
* @compile TestCastIIToConvHF2FNoSp.java
* @run driver/timeout=480 compiler.vectorapi.TestCastIIToConvHF2FNoSp
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import jdk.incubator.vector.*;
import static jdk.incubator.vector.Float16.*;
import static java.lang.Float.*;
import java.util.Arrays;
import jdk.test.lib.*;
import compiler.lib.generators.Generator;
import static compiler.lib.generators.Generators.G;
public class TestCastIIToConvHF2FNoSp {
short[] input1;
short[] output;
static final int LEN = 527;
static final Float16 FP16_CONST = Float16.valueOf(1023.0f);
static final VectorSpecies<Float16> SPECIES = Float16Vector.SPECIES_PREFERRED;
public static void main(String args[]) {
// Test with default MaxVectorSize
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
// Test with different values of MaxVectorSize
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=8");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=16");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=32");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=64");
}
static void assertResults(int arity, short ... values) {
assert values.length == (arity + 2);
Float16 expected_fp16 = shortBitsToFloat16(values[arity]);
Float16 actual_fp16 = shortBitsToFloat16(values[arity + 1]);
if(!expected_fp16.equals(actual_fp16)) {
String inputs = Arrays.toString(Arrays.copyOfRange(values, 0, arity - 1));
throw new AssertionError("Result Mismatch!, input = " + inputs + " actual = " + actual_fp16 + " expected = " + expected_fp16);
}
}
public TestCastIIToConvHF2FNoSp() {
input1 = new short[LEN];
output = new short[LEN];
Generator<Short> gen = G.float16s();
for (int i = 0; i < LEN; ++i) {
input1[i] = gen.next();
}
}
@Test
@IR(counts = {IRNode.MIN_VHF, " >0 "},
applyIfCPUFeature = {"sve", "true"})
@IR(counts = {IRNode.MIN_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true", "sve", "false"})
void vectorMinConstantInputFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.MIN,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.MIN,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i, mask);
}
}
@Check(test="vectorMinConstantInputFloat16")
void checkResultMinConstantInputFloat16() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(Math.min(FP16_CONST.floatValue(), float16ToFloat(input1[i])));
assertResults(2, float16ToRawShortBits(FP16_CONST), input1[i], expected, output[i]);
}
}
}

View File

@ -0,0 +1,620 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8370691
* @summary Test intrinsification of Float16Vector operations
* @modules jdk.incubator.vector
* @library /test/lib /
* @compile TestFloat16VectorOperations.java
* @run driver/timeout=480 compiler.vectorapi.TestFloat16VectorOperations
*/
package compiler.vectorapi;
import compiler.lib.ir_framework.*;
import jdk.incubator.vector.*;
import static jdk.incubator.vector.Float16.*;
import static java.lang.Float.*;
import java.util.Arrays;
import jdk.test.lib.*;
import compiler.lib.generators.Generator;
import static compiler.lib.generators.Generators.G;
public class TestFloat16VectorOperations {
short[] input1;
short[] input2;
short[] input3;
short[] output;
static final int LEN = 527;
static short FP16_SCALAR = (short)0x7777;
static final Float16 FP16_CONST = Float16.valueOf(1023.0f);
static final VectorSpecies<Float16> SPECIES = Float16Vector.SPECIES_PREFERRED;
public static void main(String args[]) {
// Test with default MaxVectorSize
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
// Test with different values of MaxVectorSize
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=8");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=16");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=32");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=64");
}
static void assertResults(int arity, short ... values) {
assert values.length == (arity + 2);
Float16 expected_fp16 = shortBitsToFloat16(values[arity]);
Float16 actual_fp16 = shortBitsToFloat16(values[arity + 1]);
if(!expected_fp16.equals(actual_fp16)) {
String inputs = Arrays.toString(Arrays.copyOfRange(values, 0, arity - 1));
throw new AssertionError("Result Mismatch!, input = " + inputs + " actual = " + actual_fp16 + " expected = " + expected_fp16);
}
}
public TestFloat16VectorOperations() {
input1 = new short[LEN];
input2 = new short[LEN];
input3 = new short[LEN];
output = new short[LEN];
Generator<Short> gen = G.float16s();
for (int i = 0; i < LEN; ++i) {
input1[i] = gen.next();
input2[i] = gen.next();
input3[i] = gen.next();
}
}
@Test
@IR(counts = {IRNode.ADD_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.ADD_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorAddFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.ADD,
Float16Vector.fromArray(SPECIES, input2, i))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.ADD,
Float16Vector.fromArray(SPECIES, input2, i, mask))
.intoArray(output, i, mask);
}
}
@Check(test="vectorAddFloat16")
void checkResultAdd() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(float16ToFloat(input1[i]) + float16ToFloat(input2[i]));
assertResults(2, input1[i], input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.SUB_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.SUB_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorSubFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.SUB,
Float16Vector.fromArray(SPECIES, input2, i))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.SUB,
Float16Vector.fromArray(SPECIES, input2, i, mask))
.intoArray(output, i, mask);
}
}
@Check(test="vectorSubFloat16")
void checkResultSub() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(float16ToFloat(input1[i]) - float16ToFloat(input2[i]));
assertResults(2, input1[i], input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.MUL_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.MUL_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorMulFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.MUL,
Float16Vector.fromArray(SPECIES, input2, i))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.MUL,
Float16Vector.fromArray(SPECIES, input2, i, mask))
.intoArray(output, i, mask);
}
}
@Check(test="vectorMulFloat16")
void checkResultMul() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(float16ToFloat(input1[i]) * float16ToFloat(input2[i]));
assertResults(2, input1[i], input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.DIV_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.DIV_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorDivFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.DIV,
Float16Vector.fromArray(SPECIES, input2, i))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.DIV,
Float16Vector.fromArray(SPECIES, input2, i, mask))
.intoArray(output, i, mask);
}
}
@Check(test="vectorDivFloat16")
void checkResultDiv() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(float16ToFloat(input1[i]) / float16ToFloat(input2[i]));
assertResults(2, input1[i], input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.MIN_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.MIN_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorMinFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.MIN,
Float16Vector.fromArray(SPECIES, input2, i))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.MIN,
Float16Vector.fromArray(SPECIES, input2, i, mask))
.intoArray(output, i, mask);
}
}
@Check(test="vectorMinFloat16")
void checkResultMin() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(Math.min(float16ToFloat(input1[i]), float16ToFloat(input2[i])));
assertResults(2, input1[i], input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.MAX_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.MAX_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorMaxFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.MAX,
Float16Vector.fromArray(SPECIES, input2, i))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.MAX,
Float16Vector.fromArray(SPECIES, input2, i, mask))
.intoArray(output, i, mask);
}
}
@Check(test="vectorMaxFloat16")
void checkResultMax() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(Math.max(float16ToFloat(input1[i]), float16ToFloat(input2[i])));
assertResults(2, input1[i], input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.SQRT_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.SQRT_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorSqrtFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.SQRT)
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.SQRT)
.intoArray(output, i, mask);
}
}
@Check(test="vectorSqrtFloat16")
void checkResultSqrt() {
for (int i = 0; i < LEN; ++i) {
short expected = float16ToRawShortBits(sqrt(shortBitsToFloat16(input1[i])));
assertResults(1, input1[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorFmaFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(SPECIES, input2, i),
Float16Vector.fromArray(SPECIES, input3, i))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(SPECIES, input2, i, mask),
Float16Vector.fromArray(SPECIES, input3, i, mask))
.intoArray(output, i, mask);
}
}
@Check(test="vectorFmaFloat16")
void checkResultFma() {
for (int i = 0; i < LEN; ++i) {
short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1[i]), shortBitsToFloat16(input2[i]),
shortBitsToFloat16(input3[i])));
assertResults(3, input1[i], input2[i], input3[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorFmaFloat16ScalarMixedConstants() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.FMA,
FP16_SCALAR,
floatToFloat16(3.0f))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.FMA,
FP16_SCALAR,
floatToFloat16(3.0f))
.intoArray(output, i, mask);
}
}
@Check(test="vectorFmaFloat16ScalarMixedConstants")
void checkResultFmaScalarMixedConstants() {
for (int i = 0; i < LEN; ++i) {
short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1[i]), shortBitsToFloat16(FP16_SCALAR),
shortBitsToFloat16(floatToFloat16(3.0f))));
assertResults(2, input1[i], FP16_SCALAR, expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorFmaFloat16MixedConstants() {
short input3 = floatToFloat16(3.0f);
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(SPECIES, input2, i),
input3)
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(SPECIES, input2, i, mask),
input3)
.intoArray(output, i, mask);
}
}
@Check(test="vectorFmaFloat16MixedConstants")
void checkResultFmaMixedConstants() {
short input3 = floatToFloat16(3.0f);
for (int i = 0; i < LEN; ++i) {
short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1[i]), shortBitsToFloat16(input2[i]), shortBitsToFloat16(input3)));
assertResults(3, input1[i], input2[i], input3, expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.FMA_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorFmaFloat16AllConstants() {
short input1 = floatToFloat16(1.0f);
short input2 = floatToFloat16(2.0f);
short input3 = floatToFloat16(3.0f);
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.broadcast(SPECIES, input1)
.lanewise(VectorOperators.FMA,
input2,
input3)
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.broadcast(SPECIES, input1)
.lanewise(VectorOperators.FMA,
input2,
input3)
.intoArray(output, i, mask);
}
}
@Check(test="vectorFmaFloat16AllConstants")
void checkResultFmaAllConstants() {
short input1 = floatToFloat16(1.0f);
short input2 = floatToFloat16(2.0f);
short input3 = floatToFloat16(3.0f);
for (int i = 0; i < LEN; ++i) {
short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1), shortBitsToFloat16(input2), shortBitsToFloat16(input3)));
assertResults(3, input1, input2, input3, expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.ADD_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
@IR(counts = {IRNode.ADD_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorAddConstInputFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.ADD,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.ADD,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i, mask);
}
}
@Check(test="vectorAddConstInputFloat16")
void checkResultAddConstantInputFloat16() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(float16ToFloat(input1[i]) + FP16_CONST.floatValue());
assertResults(2, input1[i], float16ToRawShortBits(FP16_CONST), expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.SUB_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
@IR(counts = {IRNode.SUB_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorSubConstInputFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input1, i)
.lanewise(VectorOperators.SUB,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input1, i, mask)
.lanewise(VectorOperators.SUB,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i, mask);
}
}
@Check(test="vectorSubConstInputFloat16")
void checkResultSubConstantInputFloat16() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(float16ToFloat(input1[i]) - FP16_CONST.floatValue());
assertResults(2, input1[i], float16ToRawShortBits(FP16_CONST), expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.MUL_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
@IR(counts = {IRNode.MUL_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorMulConstantInputFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input2, i)
.lanewise(VectorOperators.MUL,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input2, i, mask)
.lanewise(VectorOperators.MUL,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i, mask);
}
}
@Check(test="vectorMulConstantInputFloat16")
void checkResultMulConstantInputFloat16() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(FP16_CONST.floatValue() * float16ToFloat(input2[i]));
assertResults(2, float16ToRawShortBits(FP16_CONST), input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.DIV_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
@IR(counts = {IRNode.DIV_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorDivConstantInputFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input2, i)
.lanewise(VectorOperators.DIV,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input2, i, mask)
.lanewise(VectorOperators.DIV,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i, mask);
}
}
@Check(test="vectorDivConstantInputFloat16")
void checkResultDivConstantInputFloat16() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(float16ToFloat(input2[i]) / FP16_CONST.floatValue());
assertResults(2, input2[i], float16ToRawShortBits(FP16_CONST), expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.MAX_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
@IR(counts = {IRNode.MAX_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorMaxConstantInputFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input2, i)
.lanewise(VectorOperators.MAX,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input2, i, mask)
.lanewise(VectorOperators.MAX,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i, mask);
}
}
@Check(test="vectorMaxConstantInputFloat16")
void checkResultMaxConstantInputFloat16() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(Math.max(FP16_CONST.floatValue(), float16ToFloat(input2[i])));
assertResults(2, float16ToRawShortBits(FP16_CONST), input2[i], expected, output[i]);
}
}
@Test
@IR(counts = {IRNode.MIN_VHF, " >0 "},
applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
@IR(counts = {IRNode.MIN_VHF, " >0 "},
applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
void vectorMinConstantInputFloat16() {
int i = 0;
for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
Float16Vector.fromArray(SPECIES, input2, i)
.lanewise(VectorOperators.MIN,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i);
}
if (i < LEN) {
VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
Float16Vector.fromArray(SPECIES, input2, i, mask)
.lanewise(VectorOperators.MIN,
float16ToRawShortBits(FP16_CONST))
.intoArray(output, i, mask);
}
}
@Check(test="vectorMinConstantInputFloat16")
void checkResultMinConstantInputFloat16() {
for (int i = 0; i < LEN; ++i) {
short expected = floatToFloat16(Math.min(FP16_CONST.floatValue(), float16ToFloat(input2[i])));
assertResults(2, float16ToRawShortBits(FP16_CONST), input2[i], expected, output[i]);
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -41,6 +41,7 @@ import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.function.IntFunction;
import jdk.incubator.vector.Float16;
abstract class AbstractVectorConversionTest {
@ -156,6 +157,31 @@ abstract class AbstractVectorConversionTest {
return a;
}
interface ToFloat16F {
short apply(int i);
}
static short[] fill_float16(int s, ToFloat16F f) {
return fill_float16(new short[s], f);
}
static short[] fill_float16(short[] a, ToFloat16F f) {
for (int i = 0; i < a.length; i++) {
a[i] = f.apply(i);
}
if (a.length > 7) {
a[0] = Float16.float16ToRawShortBits(Float16.MAX_VALUE);
a[1] = Float16.float16ToRawShortBits(Float16.MIN_VALUE);
a[2] = Float16.float16ToRawShortBits(Float16.NEGATIVE_INFINITY);
a[3] = Float16.float16ToRawShortBits(Float16.POSITIVE_INFINITY);
a[4] = Float16.float16ToRawShortBits(Float16.NaN);
a[5] = (short)0.0;
a[6] = Short.MIN_VALUE;
}
return a;
}
static final List<IntFunction<byte[]>> BYTE_GENERATORS = List.of(
withToString("byte(i)", (int s) -> fill_byte(s, i -> (byte) (i + 1)))
);
@ -180,6 +206,10 @@ abstract class AbstractVectorConversionTest {
withToString("double(i)", (int s) -> fill_double(s, i -> (double) (i * 10 + 0.1)))
);
static final List<IntFunction<short[]>> FLOAT16_GENERATORS = List.of(
withToString("Float16(i)", (int s) -> fill_float16(s, i -> (short) (i * 100 + 1)))
);
static List<?> sourceGenerators(Class<?> src) {
if (src == byte.class) {
return BYTE_GENERATORS;
@ -199,6 +229,9 @@ abstract class AbstractVectorConversionTest {
else if (src == double.class) {
return DOUBLE_GENERATORS;
}
else if (src == Float16.class) {
return FLOAT16_GENERATORS;
}
else
throw new IllegalStateException();
}
@ -206,11 +239,11 @@ abstract class AbstractVectorConversionTest {
static Object[][] fixedShapeXFixedShapeSpeciesArgs(VectorShape shape) {
List<Object[]> args = new ArrayList<>();
for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
VectorSpecies<?> src = VectorSpecies.of(srcE, shape);
List<?> srcGens = sourceGenerators(srcE);
for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
VectorSpecies<?> dst = VectorSpecies.of(dstE, shape);
for (Object srcGen : srcGens) {
@ -225,12 +258,12 @@ abstract class AbstractVectorConversionTest {
static Object[][] fixedShapeXShapeSpeciesArgs(VectorShape srcShape) {
List<Object[]> args = new ArrayList<>();
for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
VectorSpecies<?> src = VectorSpecies.of(srcE, srcShape);
List<?> srcGens = sourceGenerators(srcE);
for (VectorShape dstShape : VectorShape.values()) {
for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
VectorSpecies<?> dst = VectorSpecies.of(dstE, dstShape);
for (Object srcGen : srcGens) {
@ -245,10 +278,10 @@ abstract class AbstractVectorConversionTest {
static Object[][] fixedShapeXSegmentedCastSpeciesArgs(VectorShape srcShape, boolean legal) {
List<Object[]> args = new ArrayList<>();
for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
VectorSpecies<?> src = VectorSpecies.of(srcE, srcShape);
for (VectorShape dstShape : VectorShape.values()) {
for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
VectorSpecies<?> dst = VectorSpecies.of(dstE, dstShape);
if (legal == (dst.length() == src.length())) {
args.add(new Object[]{src, dst});
@ -261,6 +294,22 @@ abstract class AbstractVectorConversionTest {
public enum ConvAPI {CONVERT, CONVERTSHAPE, CASTSHAPE, REINTERPRETSHAPE}
static Short float16_conversion_adapter(Number in) {
if (in.getClass() == Short.class)
return Float16.float16ToRawShortBits(Float16.valueOf(in.shortValue()));
else if (in.getClass() == Integer.class)
return Float16.float16ToRawShortBits(Float16.valueOf(in.intValue()));
else if (in.getClass() == Long.class)
return Float16.float16ToRawShortBits(Float16.valueOf(in.longValue()));
else if (in.getClass() == Float.class)
return Float16.float16ToRawShortBits(Float16.valueOf(in.floatValue()));
else if (in.getClass() == Double.class)
return Float16.float16ToRawShortBits(Float16.valueOf(in.doubleValue()));
else if (in.getClass() == Byte.class)
return Float16.float16ToRawShortBits(Float16.valueOf(in.byteValue()));
else
throw new IllegalStateException();
}
static Function<Number, Object> convertValueFunction(Class<?> to) {
if (to == byte.class)
@ -273,6 +322,8 @@ abstract class AbstractVectorConversionTest {
return Number::longValue;
else if (to == float.class)
return Number::floatValue;
else if (to == Float16.class)
return (N) -> float16_conversion_adapter(N);
else if (to == double.class)
return Number::doubleValue;
else
@ -282,7 +333,7 @@ abstract class AbstractVectorConversionTest {
static BiConsumer<ByteBuffer, Object> putBufferValueFunction(Class<?> from) {
if (from == byte.class)
return (bb, o) -> bb.put((byte) o);
else if (from == short.class)
else if (from == short.class || from == Float16.class)
return (bb, o) -> bb.putShort((short) o);
else if (from == int.class)
return (bb, o) -> bb.putInt((int) o);
@ -299,7 +350,7 @@ abstract class AbstractVectorConversionTest {
static Function<ByteBuffer, Number> getBufferValueFunction(Class<?> to) {
if (to == byte.class)
return ByteBuffer::get;
else if (to == short.class)
else if (to == short.class || to == Float16.class)
return ByteBuffer::getShort;
else if (to == int.class)
return ByteBuffer::getInt;
@ -335,10 +386,23 @@ abstract class AbstractVectorConversionTest {
static void copyConversionArray(Object src, int srcPos,
Object dest, int destPos,
int length,
VectorSpecies srcSpecies,
VectorSpecies dstSpecies,
Function<Number, Object> c) {
if (srcSpecies.elementType() == dstSpecies.elementType()) {
System.arraycopy(src, srcPos, dest, destPos, length);
return;
}
for (int i = 0; i < length; i++) {
Number v = (Number) Array.get(src, srcPos + i);
Array.set(dest, destPos + i, c.apply(v));
if (srcSpecies.elementType() == Float16.class) {
v = (Number) Float16.shortBitsToFloat16(v.shortValue());
}
v = (Number) c.apply(v);
if (dstSpecies.elementType() == Float16.class) {
v = (Number) v.shortValue();
}
Array.set(dest, destPos + i, v);
}
}
@ -420,8 +484,14 @@ abstract class AbstractVectorConversionTest {
int[] parts = getPartsArray(m, is_contracting_conv);
Object expected = Array.newInstance(destSpecies.elementType(), out_len);
Object actual = Array.newInstance(destSpecies.elementType(), out_len);
Object expected = null, actual = null;
if (destSpecies.elementType() == Float16.class) {
expected = Array.newInstance(short.class, out_len);
actual = Array.newInstance(short.class, out_len);
} else {
expected = Array.newInstance(destSpecies.elementType(), out_len);
actual = Array.newInstance(destSpecies.elementType(), out_len);
}
Function<Number, Object> convertValue = convertValueFunction(destSpecies.elementType());
@ -432,11 +502,12 @@ abstract class AbstractVectorConversionTest {
if (is_contracting_conv) {
int start_idx = -part * src_species_len;
zeroArray(expected, j, dst_species_len);
copyConversionArray(in, i, expected, start_idx + j, src_species_len, convertValue);
copyConversionArray(in, i, expected, start_idx + j, src_species_len, srcSpecies, destSpecies, convertValue);
} else {
int start_idx = part * dst_species_len;
copyConversionArray(in, start_idx + i, expected, j, dst_species_len, convertValue);
copyConversionArray(in, start_idx + i, expected, j, dst_species_len, srcSpecies, destSpecies, convertValue);
}
}
for (int ic = 0; ic < INVOC_COUNT; ic++) {
@ -452,7 +523,6 @@ abstract class AbstractVectorConversionTest {
System.arraycopy(rv.toArray(), 0, actual, j, dst_species_len);
}
}
Assert.assertEquals(actual, expected);
}
@ -469,8 +539,14 @@ abstract class AbstractVectorConversionTest {
int[] parts = getPartsArray(m, is_contracting_conv);
Object expected = Array.newInstance(dstSpecies.elementType(), out_len);
Object actual = Array.newInstance(dstSpecies.elementType(), out_len);
Object expected = null, actual = null;
if (dstSpecies.elementType() == Float16.class) {
expected = Array.newInstance(short.class, out_len);
actual = Array.newInstance(short.class, out_len);
} else {
expected = Array.newInstance(dstSpecies.elementType(), out_len);
actual = Array.newInstance(dstSpecies.elementType(), out_len);
}
BiConsumer<ByteBuffer, Object> putValue = putBufferValueFunction(srcSpecies.elementType());
Function<ByteBuffer, Number> getValue = getBufferValueFunction(dstSpecies.elementType());

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1719,9 +1719,14 @@ public class ShortVector128Tests extends AbstractVectorTest {
assertEquals(asIntegral.species(), SPECIES);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
@Test
void viewAsFloatingLanesTest() {
SPECIES.zero().viewAsFloatingLanes();
Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
VectorSpecies<?> asFloatingSpecies = asFloating.species();
Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
assertEquals(asFloatingSpecies.length(), SPECIES.length());
assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
}
@Test

View File

@ -1719,9 +1719,14 @@ public class ShortVector256Tests extends AbstractVectorTest {
assertEquals(asIntegral.species(), SPECIES);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
@Test
void viewAsFloatingLanesTest() {
SPECIES.zero().viewAsFloatingLanes();
Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
VectorSpecies<?> asFloatingSpecies = asFloating.species();
Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
assertEquals(asFloatingSpecies.length(), SPECIES.length());
assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
}
@Test

View File

@ -1719,9 +1719,14 @@ public class ShortVector512Tests extends AbstractVectorTest {
assertEquals(asIntegral.species(), SPECIES);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
@Test
void viewAsFloatingLanesTest() {
SPECIES.zero().viewAsFloatingLanes();
Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
VectorSpecies<?> asFloatingSpecies = asFloating.species();
Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
assertEquals(asFloatingSpecies.length(), SPECIES.length());
assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
}
@Test

View File

@ -1719,9 +1719,14 @@ public class ShortVector64Tests extends AbstractVectorTest {
assertEquals(asIntegral.species(), SPECIES);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
@Test
void viewAsFloatingLanesTest() {
SPECIES.zero().viewAsFloatingLanes();
Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
VectorSpecies<?> asFloatingSpecies = asFloating.species();
Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
assertEquals(asFloatingSpecies.length(), SPECIES.length());
assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
}
@Test

View File

@ -1725,9 +1725,14 @@ public class ShortVectorMaxTests extends AbstractVectorTest {
assertEquals(asIntegral.species(), SPECIES);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
@Test
void viewAsFloatingLanesTest() {
SPECIES.zero().viewAsFloatingLanes();
Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
VectorSpecies<?> asFloatingSpecies = asFloating.species();
Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
assertEquals(asFloatingSpecies.length(), SPECIES.length());
assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
}
@Test

View File

@ -52,11 +52,11 @@ Log false "$compilation\n"
Log true "done\n"
# For each type
for type in byte short int long float double
for type in byte short int long float double float16
do
Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}"
TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})"
args="-K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE"
args="-K$type -DType=$Type -DTYPE=$TYPE"
Boxtype=$Type
Wideboxtype=$Boxtype
@ -69,6 +69,7 @@ do
bitstype=$type
Bitstype=$Type
Boxbitstype=$Boxtype
testtype=$type
fptype=$type
Fptype=$Type
@ -118,11 +119,23 @@ do
MaxValue=POSITIVE_INFINITY
MinValue=NEGATIVE_INFINITY
;;
float16)
kind=FP
fpkind=FP16
bitstype=short
type=short
Bitstype=Short
Boxbitstype=Short
Wideboxtype=Float16
MaxValue=POSITIVE_INFINITY
MinValue=NEGATIVE_INFINITY
Type=Float16
;;
esac
args="$args -K$kind -K$fpkind -K$Type -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype -DMaxValue=$MaxValue -DMinValue=$MinValue"
args="$args -Dtype=$type -K$kind -K$Type -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype -DMaxValue=$MaxValue -DMinValue=$MinValue"
args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"
args="$args -Dtesttype=$testtype -K$fpkind -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"
abstractvectortype=${typeprefix}${Type}Vector
abstractvectorteststype=${typeprefix}${Type}VectorTests

View File

@ -4,5 +4,5 @@
for (int i = 0; i < a.length; i += SPECIES.length()) {
$abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.[[TEST]], (long)b[i]).intoArray(r, i);
av.lanewise(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}).intoArray(r, i);
}

View File

@ -6,5 +6,5 @@
for (int i = 0; i < a.length; i += SPECIES.length()) {
$abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
av.lanewise(VectorOperators.[[TEST]], (long)b[i], vmask).intoArray(r, i);
av.lanewise(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}, vmask).intoArray(r, i);
}

View File

@ -43,11 +43,11 @@
for (int i = 0; i < a.length; i += SPECIES.length()) {
$abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], (long)b[i]);
VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]});
// Check results as part of computation.
for (int j = 0; j < SPECIES.length(); j++) {
assertEquals(mv.laneIsSet(j), [[TEST_OP]](a[i + j], ($type$)((long)b[i])));
assertEquals(mv.laneIsSet(j), [[TEST_OP]](a[i + j], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[i]).longValue())):($type$)((long)b[i])}));
}
}
}
@ -63,11 +63,11 @@
for (int i = 0; i < a.length; i += SPECIES.length()) {
$abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], (long)b[i], vmask);
VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}, vmask);
// Check results as part of computation.
for (int j = 0; j < SPECIES.length(); j++) {
assertEquals(mv.laneIsSet(j), mask[j] && ([[TEST_OP]](a[i + j], ($type$)((long)b[i]))));
assertEquals(mv.laneIsSet(j), mask[j] && ([[TEST_OP]](a[i + j], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[i]).longValue())):($type$)((long)b[i])})));
}
}
}

View File

@ -210,7 +210,7 @@
$type$[] r = new $type$[a.length];
for (int i = 0; i < a.length; i += SPECIES.length()) {
$abstractvectortype$.broadcast(SPECIES, (long)a[i]).intoArray(r, i);
$abstractvectortype$.broadcast(SPECIES, {#if[FP16]?shortBitsToFloat16(a[i]).longValue():(long)a[i]}).intoArray(r, i);
}
assertBroadcastArraysEquals(r, a);
}
@ -227,7 +227,7 @@
for (int ic = 0; ic < INVOC_COUNT; ic++) {
for (int i = 0; i < a.length; i += SPECIES.length()) {
$abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
av.blend((long)b[i], vmask).intoArray(r, i);
av.blend({#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}, vmask).intoArray(r, i);
}
}
assertBroadcastLongArraysEquals(r, a, b, mask, $vectorteststype$::blend);
@ -588,14 +588,14 @@
static void ShapeWithLanes$vectorteststype$SmokeTest() {
$abstractvectortype$ av = $abstractvectortype$.zero(SPECIES);
VectorShape vsh = av.shape();
VectorSpecies species = vsh.withLanes($type$.class);
VectorSpecies species = vsh.withLanes({#if[FP16]?Float16.class:$type$.class});
assert(species.equals(SPECIES));
}
@Test
static void ElementType$vectorteststype$SmokeTest() {
$abstractvectortype$ av = $abstractvectortype$.zero(SPECIES);
assert(av.species().elementType() == $type$.class);
assert(av.species().elementType() == {#if[FP16]?Float16.class:$type$.class});
}
@Test
@ -613,7 +613,7 @@
@Test
static void WithLanes$vectorteststype$SmokeTest() {
$abstractvectortype$ av = $abstractvectortype$.zero(SPECIES);
VectorSpecies species = av.species().withLanes($type$.class);
VectorSpecies species = av.species().withLanes({#if[FP16]?Float16.class:$type$.class});
assert(species.equals(SPECIES));
}

View File

@ -48,6 +48,11 @@ import jdk.incubator.vector.ByteVector;
#if[Float]
import jdk.incubator.vector.FloatVector;
#end[Float]
#if[FP16]
import jdk.incubator.vector.Float16;
import static jdk.incubator.vector.Float16.*;
import jdk.incubator.vector.Float16Vector;
#end[FP16]
#if[Int]
import jdk.incubator.vector.IntVector;
#end[Int]
@ -86,6 +91,38 @@ public class $vectorteststype$ extends AbstractVectorTest {
#end[MaxBit]
static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 100);
#if[FP16]
static void assertEquals(short actual, short expected) {
Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected));
}
static void assertEquals(short actual, short expected, String msg) {
Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), msg);
}
static void assertEquals(short actual, short expected, short delta) {
Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), Float.float16ToFloat(delta));
}
static void assertEquals(short actual, short expected, short delta, String msg) {
Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), Float.float16ToFloat(delta), msg);
}
static void assertEquals(short [] actual, short [] expected) {
assert actual.length == expected.length;
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]));
}
}
static void assertEquals(short [] actual, short [] expected, String msg) {
assert actual.length == expected.length;
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]), msg);
}
}
static void assertEquals(long actual, long expected) {
Assert.assertEquals(Float.float16ToFloat((short)actual), Float.float16ToFloat((short)expected));
}
static void assertEquals(long actual, long expected, String msg) {
Assert.assertEquals(Float.float16ToFloat((short)actual), Float.float16ToFloat((short)expected), msg);
}
#else[FP16]
static void assertEquals($type$ actual, $type$ expected) {
Assert.assertEquals(actual, expected);
}
@ -112,6 +149,7 @@ public class $vectorteststype$ extends AbstractVectorTest {
Assert.assertEquals(actual, expected, msg);
}
#end[!long]
#end[FP16]
static void assertEquals(String actual, String expected) {
Assert.assertEquals(actual, expected);
}
@ -152,9 +190,9 @@ public class $vectorteststype$ extends AbstractVectorTest {
private static final $type$ AND_IDENTITY = ($type$)-1;
#end[BITWISE]
private static final $type$ FIRST_NONZERO_IDENTITY = ($type$)0;
private static final $type$ MAX_IDENTITY = $Wideboxtype$.$MinValue$;
private static final $type$ MIN_IDENTITY = $Wideboxtype$.$MaxValue$;
private static final $type$ MUL_IDENTITY = ($type$)1;
private static final $type$ MAX_IDENTITY = {#if[FP16]?float16ToRawShortBits($Wideboxtype$.$MinValue$):$Wideboxtype$.$MinValue$};
private static final $type$ MIN_IDENTITY = {#if[FP16]?float16ToRawShortBits($Wideboxtype$.$MaxValue$):$Wideboxtype$.$MaxValue$};
private static final $type$ MUL_IDENTITY = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(1.0f)):($type$)1};
#if[BITWISE]
private static final $type$ OR_IDENTITY = ($type$)0;
private static final $type$ SUADD_IDENTITY = ($type$)0;
@ -165,10 +203,10 @@ public class $vectorteststype$ extends AbstractVectorTest {
#if[FP]
// for floating point addition reduction ops that may introduce rounding errors
private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_ADD = ($type$)10.0;
private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_ADD = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(10.0f)):($type$)10.0};
// for floating point multiplication reduction ops that may introduce rounding errors
private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_MUL = ($type$)50.0;
private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_MUL = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(50.0f)):($type$)50.0};
#end[FP]
static final int BUFFER_REPS = Integer.getInteger("jdk.incubator.vector.test.buffer-vectors", 25000 / $bits$);
@ -176,8 +214,8 @@ public class $vectorteststype$ extends AbstractVectorTest {
static void assertArraysStrictlyEquals($type$[] r, $type$[] a) {
for (int i = 0; i < a.length; i++) {
#if[FP]
$bitstype$ ir = $Wideboxtype$.$type$ToRaw$Bitstype$Bits(r[i]);
$bitstype$ ia = $Wideboxtype$.$type$ToRaw$Bitstype$Bits(a[i]);
$bitstype$ ir = {#if[FP16]?r[i]:$Wideboxtype$.$type$ToRaw$Bitstype$Bits(r[i])};
$bitstype$ ia = {#if[FP16]?a[i]:$Wideboxtype$.$type$ToRaw$Bitstype$Bits(a[i])};
if (ir != ia) {
#if[Float]
Assert.fail(String.format("at index #%d, expected = %08X, actual = %08X", i, ia, ir));
@ -265,6 +303,23 @@ public class $vectorteststype$ extends AbstractVectorTest {
#end[FP]
}
#if[FP]
#if[FP16]
static void assertReductionArraysEquals($type$[] r, $type$ rc, $type$[] a,
FReductionOp f, FReductionAllOp fa,
$type$ relativeErrorFactor) {
int i = 0;
try {
assertEquals(rc, fa.apply(a), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(rc)), shortBitsToFloat16(relativeErrorFactor))));
for (; i < a.length; i += SPECIES.length()) {
assertEquals(r[i], f.apply(a, i), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(r[i])), shortBitsToFloat16(relativeErrorFactor))));
}
} catch (AssertionError e) {
assertEquals(rc, fa.apply(a), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(rc)), shortBitsToFloat16(relativeErrorFactor))), "Final result is incorrect!");
assertEquals(r[i], f.apply(a, i), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(r[i])), shortBitsToFloat16(relativeErrorFactor))), "at index #" + i);
}
}
#else[FP16]
static void assertReductionArraysEquals($type$[] r, $type$ rc, $type$[] a,
FReductionOp f, FReductionAllOp fa,
@ -280,6 +335,7 @@ public class $vectorteststype$ extends AbstractVectorTest {
assertEquals(r[i], f.apply(a, i), Math.ulp(r[i]) * relativeErrorFactor, "at index #" + i);
}
}
#end[FP16]
#end[FP]
interface FReductionMaskedOp {
@ -308,6 +364,23 @@ public class $vectorteststype$ extends AbstractVectorTest {
#end[FP]
}
#if[FP]
#if[FP16]
static void assertReductionArraysEqualsMasked($type$[] r, $type$ rc, $type$[] a, boolean[] mask,
FReductionMaskedOp f, FReductionAllMaskedOp fa,
$type$ relativeError) {
int i = 0;
try {
assertEquals(rc, fa.apply(a, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(rc), shortBitsToFloat16(relativeError)))));
for (; i < a.length; i += SPECIES.length()) {
assertEquals(r[i], f.apply(a, i, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(r[i]), shortBitsToFloat16(relativeError)))));
}
} catch (AssertionError e) {
assertEquals(rc, fa.apply(a, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(rc), shortBitsToFloat16(relativeError)))), "Final result is incorrect!");
assertEquals(r[i], f.apply(a, i, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(r[i]), shortBitsToFloat16(relativeError)))), "at index #" + i);
}
}
#else[FP16]
static void assertReductionArraysEqualsMasked($type$[] r, $type$ rc, $type$[] a, boolean[] mask,
FReductionMaskedOp f, FReductionAllMaskedOp fa,
@ -324,6 +397,7 @@ relativeError));
assertEquals(r[i], f.apply(a, i, mask), Math.abs(r[i] * relativeError), "at index #" + i);
}
}
#end[FP16]
#end[FP]
#if[!Long]
@ -473,7 +547,7 @@ relativeError));
for (; i < a.length; i += vector_len) {
for (j = 0; j < vector_len; j++) {
idx = i + j;
wrapped_index = Math.floorMod((int)order[idx], 2 * vector_len);
wrapped_index = Math.floorMod({#if[FP16]?shortBitsToFloat16(order[idx]).intValue():(int)order[idx]}, 2 * vector_len);
is_exceptional_idx = wrapped_index >= vector_len;
oidx = is_exceptional_idx ? (wrapped_index - vector_len) : wrapped_index;
assertEquals(r[idx], (is_exceptional_idx ? b[i + oidx] : a[i + oidx]));
@ -489,12 +563,12 @@ relativeError));
try {
for (; i < a.length; i += vector_len) {
for (j = 0; j < vector_len; j++) {
assertEquals(r[i+j], a[i+(int)order[i+j]]);
assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}]);
}
}
} catch (AssertionError e) {
int idx = i + j;
assertEquals(r[i+j], a[i+(int)order[i+j]], "at index #" + idx + ", input = " + a[i+(int)order[i+j]]);
assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}], "at index #" + idx + ", input = " + a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}]);
}
}
@ -524,7 +598,7 @@ relativeError));
for (; i < a.length; i += vector_len) {
for (j = 0; j < vector_len; j++) {
if (mask[j % SPECIES.length()])
assertEquals(r[i+j], a[i+(int)order[i+j]]);
assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}]);
else
assertEquals(r[i+j], ($type$)0);
}
@ -532,7 +606,7 @@ relativeError));
} catch (AssertionError e) {
int idx = i + j;
if (mask[j % SPECIES.length()])
assertEquals(r[i+j], a[i+(int)order[i+j]], "at index #" + idx + ", input = " + a[i+(int)order[i+j]] + ", mask = " + mask[j % SPECIES.length()]);
assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}], "at index #" + idx + ", input = " + a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}] + ", mask = " + mask[j % SPECIES.length()]);
else
assertEquals(r[i+j], ($type$)0, "at index #" + idx + ", input = " + a[i+(int)order[i+j]] + ", mask = " + mask[j % SPECIES.length()]);
}
@ -681,10 +755,10 @@ relativeError));
int i = 0;
try {
for (; i < a.length; i++) {
assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])));
assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])}));
}
} catch (AssertionError e) {
assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])),
assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])}),
"(" + a[i] + ", " + b[(i / SPECIES.length()) * SPECIES.length()] + ") at index #" + i);
}
}
@ -745,10 +819,10 @@ relativeError));
int i = 0;
try {
for (; i < a.length; i++) {
assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()]), mask[i % SPECIES.length()]));
assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])}, mask[i % SPECIES.length()]));
}
} catch (AssertionError err) {
assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()]),
assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])},
mask[i % SPECIES.length()]), "at index #" + i + ", input1 = " + a[i] +
", input2 = " + b[(i / SPECIES.length()) * SPECIES.length()] + ", mask = " +
mask[i % SPECIES.length()]);
@ -978,6 +1052,26 @@ relativeError));
#if[FP]
static boolean isWithin1Ulp($type$ actual, $type$ expected) {
#if[FP16]
Float16 act = shortBitsToFloat16(actual);
Float16 exp = shortBitsToFloat16(expected);
if (Float16.isNaN(exp) && !Float16.isNaN(act)) {
return false;
} else if (!Float16.isNaN(exp) && Float16.isNaN(act)) {
return false;
}
Float16 low = Float16.nextDown(exp);
Float16 high = Float16.nextUp(exp);
if (Float16.compare(low, exp) > 0) {
return false;
}
if (Float16.compare(high, exp) < 0) {
return false;
}
#else[FP16]
if ($Type$.isNaN(expected) && !$Type$.isNaN(actual)) {
return false;
} else if (!$Type$.isNaN(expected) && $Type$.isNaN(actual)) {
@ -994,6 +1088,7 @@ relativeError));
if ($Type$.compare(high, expected) < 0) {
return false;
}
#end[FP16]
return true;
}
@ -1003,11 +1098,11 @@ relativeError));
try {
// Check that result is within 1 ulp of strict math or equivalent to math implementation.
for (; i < a.length; i++) {
Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i])) == 0 ||
Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i])):mathf.apply(a[i])}) == 0 ||
isWithin1Ulp(r[i], strictmathf.apply(a[i])));
}
} catch (AssertionError e) {
Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i])) == 0, "at index #" + i + ", input = " + a[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i]));
Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i])):mathf.apply(a[i])}) == 0, "at index #" + i + ", input = " + a[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i]));
Assert.assertTrue(isWithin1Ulp(r[i], strictmathf.apply(a[i])), "at index #" + i + ", input = " + a[i] + ", actual = " + r[i] + ", expected (within 1 ulp) = " + strictmathf.apply(a[i]));
}
}
@ -1017,11 +1112,11 @@ relativeError));
try {
// Check that result is within 1 ulp of strict math or equivalent to math implementation.
for (; i < a.length; i++) {
Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i], b[i])) == 0 ||
Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[i])):mathf.apply(a[i], b[i])}) == 0 ||
isWithin1Ulp(r[i], strictmathf.apply(a[i], b[i])));
}
} catch (AssertionError e) {
Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i], b[i])) == 0, "at index #" + i + ", input1 = " + a[i] + ", input2 = " + b[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i], b[i]));
Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[i])):mathf.apply(a[i], b[i])}) == 0, "at index #" + i + ", input1 = " + a[i] + ", input2 = " + b[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i], b[i]));
Assert.assertTrue(isWithin1Ulp(r[i], strictmathf.apply(a[i], b[i])), "at index #" + i + ", input1 = " + a[i] + ", input2 = " + b[i] + ", actual = " + r[i] + ", expected (within 1 ulp) = " + strictmathf.apply(a[i], b[i]));
}
}
@ -1032,14 +1127,14 @@ relativeError));
try {
// Check that result is within 1 ulp of strict math or equivalent to math implementation.
for (; i < a.length; i++) {
Assert.assertTrue($Type$.compare(r[i],
mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])) == 0 ||
Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]},
{#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])):mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])}) == 0 ||
isWithin1Ulp(r[i],
strictmathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])));
}
} catch (AssertionError e) {
Assert.assertTrue($Type$.compare(r[i],
mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])) == 0,
Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]},
{#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])):mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])}) == 0,
"at index #" + i + ", input1 = " + a[i] + ", input2 = " +
b[(i / SPECIES.length()) * SPECIES.length()] + ", actual = " + r[i] +
", expected = " + mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()]));
@ -1236,6 +1331,7 @@ relativeError));
#if[!Int]
#if[!byteOrShort]
#if[!FP16]
static int intCornerCaseValue(int i) {
switch(i % 5) {
case 0:
@ -1250,7 +1346,45 @@ relativeError));
return (int)0;
}
}
#end[!FP16]
#if[FP16]
static $type$ convToFloat16(int i) {
return float16ToRawShortBits(Float16.valueOf(i));
}
static $type$ convIntToFloat16CornerCases(int i) {
switch(i % 4) {
case 0:
return convToFloat16(65504);
case 1:
return convToFloat16(-65504);
case 2:
return float16ToRawShortBits(Float16.valueOf(-0.0f));
default:
return float16ToRawShortBits(Float16.valueOf(0.0f));
}
}
static final List<IntFunction<$type$[]>> INT_$TYPE$_GENERATORS = List.of(
withToString("float16[-i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convToFloat16(-i * 5));
}),
withToString("float16[i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convToFloat16(i * 5));
}),
withToString("float16[i + 1]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ((($type$)(i + 1) == 0) ? convToFloat16(1) : convToFloat16(i + 1)));
}),
withToString("float16[intCornerCaseValue(i)]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convIntToFloat16CornerCases(i));
})
);
#else[FP16]
static final List<IntFunction<$type$[]>> INT_$TYPE$_GENERATORS = List.of(
withToString("$type$[-i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
@ -1269,6 +1403,7 @@ relativeError));
i -> ($type$)intCornerCaseValue(i));
})
);
#end[FP16]
#end[!byteOrShort]
#end[!Int]
@ -1276,7 +1411,11 @@ relativeError));
int i = 0;
try {
for (; i < r.length; i++) {
#if[FP16]
assertEquals(r[i], (int)Float.float16ToFloat(a[i+offs]));
#else[FP16]
assertEquals(r[i], (int)(a[i+offs]));
#end[FP16]
}
} catch (AssertionError e) {
assertEquals(r[i], (int)(a[i+offs]), "at index #" + i + ", input = " + a[i+offs]);
@ -1285,6 +1424,7 @@ relativeError));
#if[!Long]
#if[FP]
#if[!FP16]
static long longCornerCaseValue(int i) {
switch(i % 5) {
case 0:
@ -1299,7 +1439,45 @@ relativeError));
return (long)0;
}
}
#end[!FP16]
#if[FP16]
static $type$ convToFloat16(long i) {
return float16ToRawShortBits(Float16.valueOf(i));
}
static $type$ convLongToFloat16CornerCases(int i) {
switch(i % 4) {
case 0:
return convToFloat16(65504L);
case 1:
return convToFloat16(-65504L);
case 2:
return float16ToRawShortBits(Float16.valueOf(-0.0f));
default:
return float16ToRawShortBits(Float16.valueOf(0.0f));
}
}
static final List<IntFunction<$type$[]>> LONG_$TYPE$_GENERATORS = List.of(
withToString("float16[-i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convToFloat16(-i * 5));
}),
withToString("float16[i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convToFloat16(i * 5));
}),
withToString("float16[i + 1]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ((($type$)(i + 1) == 0) ? convToFloat16(1) : convToFloat16(i + 1)));
}),
withToString("float16[cornerCaseValue(i)]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convLongToFloat16CornerCases(i));
})
);
#else[FP16]
static final List<IntFunction<$type$[]>> LONG_$TYPE$_GENERATORS = List.of(
withToString("$type$[-i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
@ -1318,6 +1496,7 @@ relativeError));
i -> ($type$)longCornerCaseValue(i));
})
);
#end[FP16]
#end[FP]
#end[!Long]
@ -1338,7 +1517,11 @@ relativeError));
int i = 0;
try {
for (; i < r.length; i++) {
#if[FP16]
assertEquals(r[i], (long)Float.float16ToFloat(a[i+offs]));
#else[FP16]
assertEquals(r[i], (long)(a[i+offs]));
#end[FP16]
}
} catch (AssertionError e) {
assertEquals(r[i], (long)(a[i+offs]), "at index #" + i + ", input = " + a[i+offs]);
@ -1350,7 +1533,11 @@ relativeError));
int i = 0;
try {
for (; i < r.length; i++) {
#if[FP16]
assertEquals(r[i], (double)Float.float16ToFloat(a[i+offs]));
#else[FP16]
assertEquals(r[i], (double)(a[i+offs]));
#end[FP16]
}
} catch (AssertionError e) {
assertEquals(r[i], (double)(a[i+offs]), "at index #" + i + ", input = " + a[i+offs]);
@ -1358,8 +1545,40 @@ relativeError));
}
#end[!Double]
#if[FP16]
static $bitstype$ bits($type$ e) {
return {#if[FP]? $Type$.$type$To$Bitstype$Bits(e): e};
return e;
}
static final List<IntFunction<$type$[]>> $TYPE$_GENERATORS = List.of(
withToString("float16[-i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convToFloat16(-i * 5));
}),
withToString("float16[i * 5]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> convToFloat16(i * 5));
}),
withToString("float16[i + 1]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ((($type$)(i + 1) == 0) ? convToFloat16(1) : convToFloat16(i + 1)));
}),
withToString("float16[0.01 + (i / (i + 1))]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> Float.floatToFloat16((0.01f + ((float)i / (i + 1)))));
}),
withToString("float16[i -> i % 17 == 0 ? cornerCaseValue(i) : 0.01f + (i / (i + 1))]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> (i % 17 == 0) ? cornerCaseValue(i) : Float.floatToFloat16((0.01f + ((float)i / (i + 1)))));
}),
withToString("float16[cornerCaseValue(i)]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> cornerCaseValue(i));
})
);
#else[FP16]
static $bitstype$ bits($type$ e) {
return {#if[FP]?$Wideboxtype$.$type$To$Bitstype$Bits(e):e};
}
static final List<IntFunction<$type$[]>> $TYPE$_GENERATORS = List.of(
@ -1394,6 +1613,7 @@ relativeError));
i -> cornerCaseValue(i));
})
);
#end[FP16]
#if[!FP]
static final List<IntFunction<$type$[]>> $TYPE$_SATURATING_GENERATORS = List.of(
@ -1472,7 +1692,7 @@ relativeError));
collect(Collectors.toList());
static final List<IntFunction<$type$[]>> SELECT_FROM_INDEX_GENERATORS = List.of(
withToString("$type$[0..VECLEN*2)", (int s) -> {
withToString("$testtype$[0..VECLEN*2)", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ($type$)(RAND.nextInt()));
})
@ -1709,27 +1929,27 @@ relativeError));
#end[!Int]
static final List<IntFunction<$type$[]>> $TYPE$_COMPARE_GENERATORS = List.of(
withToString("$type$[i]", (int s) -> {
withToString("$testtype$[i]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ($type$)i);
}),
withToString("$type$[i - length / 2]", (int s) -> {
withToString("$testtype$[i - length / 2]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ($type$)(i - (s * BUFFER_REPS / 2)));
}),
withToString("$type$[i + 1]", (int s) -> {
withToString("$testtype$[i + 1]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ($type$)(i + 1));
}),
withToString("$type$[i - 2]", (int s) -> {
withToString("$testtype$[i - 2]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> ($type$)(i - 2));
}),
withToString("$type$[zigZag(i)]", (int s) -> {
withToString("$testtype$[zigZag(i)]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> i%3 == 0 ? ($type$)i : (i%3 == 1 ? ($type$)(i + 1) : ($type$)(i - 2)));
}),
withToString("$type$[cornerCaseValue(i)]", (int s) -> {
withToString("$testtype$[cornerCaseValue(i)]", (int s) -> {
return fill(s * BUFFER_REPS,
i -> cornerCaseValue(i));
})
@ -1792,6 +2012,18 @@ relativeError));
static $type$ cornerCaseValue(int i) {
#if[FP]
#if[FP16]
return switch(i % 8) {
case 0 -> float16ToRawShortBits($Wideboxtype$.MAX_VALUE);
case 1 -> float16ToRawShortBits($Wideboxtype$.MIN_VALUE);
case 2 -> float16ToRawShortBits($Wideboxtype$.NEGATIVE_INFINITY);
case 3 -> float16ToRawShortBits($Wideboxtype$.POSITIVE_INFINITY);
case 4 -> float16ToRawShortBits($Wideboxtype$.NaN);
case 5 -> float16ToRawShortBits(shortBitsToFloat16((short)0x7FFA));
case 6 -> float16ToShortBits(Float16.valueOf(0.0f));
default -> float16ToShortBits(Float16.valueOf(-0.0f));
};
#else[FP16]
return switch(i % 8) {
case 0 -> $Wideboxtype$.MAX_VALUE;
case 1 -> $Wideboxtype$.MIN_VALUE;
@ -1806,6 +2038,7 @@ relativeError));
case 6 -> ($type$)0.0;
default -> ($type$)-0.0;
};
#end[FP16]
#else[FP]
switch(i % 5) {
case 0:
@ -1923,6 +2156,262 @@ relativeError));
#end[intOrLong]
}
#end[BITWISE]
#if[FP16]
static boolean eq(short a, short b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return at.floatValue() == bt.floatValue();
}
static boolean neq(short a, short b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return at.floatValue() != bt.floatValue();
}
static boolean lt(short a, short b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return at.floatValue() < bt.floatValue();
}
static boolean le(short a, short b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return at.floatValue() <= bt.floatValue();
}
static boolean gt(short a, short b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return at.floatValue() > bt.floatValue();
}
static boolean ge(short a, short b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return at.floatValue() >= bt.floatValue();
}
static $type$ firstNonZero($type$ a, $type$ b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
Float16 zero = shortBitsToFloat16((short)0);
return Float16.compare(at, zero) != 0 ? a : b;
}
static $type$ scalar_add($type$ a, $type$ b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return float16ToRawShortBits(Float16.add(at, bt));
}
static $type$ scalar_sub($type$ a, $type$ b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return float16ToRawShortBits(Float16.subtract(at, bt));
}
static $type$ scalar_mul($type$ a, $type$ b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return float16ToRawShortBits(Float16.multiply(at, bt));
}
static $type$ scalar_max($type$ a, $type$ b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return float16ToRawShortBits(Float16.max(at, bt));
}
static $type$ scalar_min($type$ a, $type$ b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return float16ToRawShortBits(Float16.min(at, bt));
}
static $type$ scalar_div($type$ a, $type$ b) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
return float16ToRawShortBits(Float16.divide(at, bt));
}
static $type$ scalar_fma($type$ a, $type$ b, $type$ c) {
Float16 at = shortBitsToFloat16(a);
Float16 bt = shortBitsToFloat16(b);
Float16 ct = shortBitsToFloat16(c);
return float16ToRawShortBits(Float16.fma(at, bt, ct));
}
static $type$ scalar_abs($type$ a) {
Float16 at = shortBitsToFloat16(a);
return float16ToRawShortBits(Float16.abs(at));
}
static $type$ scalar_neg($type$ a) {
Float16 at = shortBitsToFloat16(a);
return float16ToRawShortBits(Float16.negate(at));
}
static $type$ scalar_sin($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.sin(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_exp($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.exp(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_log1p($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.log1p(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_log($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.log(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_log10($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.log10(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_expm1($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.expm1(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_cos($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.cos(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_tan($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.tan(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_sinh($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.sinh(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_cosh($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.cosh(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_tanh($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.tanh(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_asin($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.asin(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_acos($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.acos(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_atan($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.atan(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_cbrt($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.cbrt(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_sqrt($type$ a) {
return float16ToRawShortBits(Float16.valueOf(Math.sqrt(shortBitsToFloat16(a).doubleValue())));
}
static $type$ scalar_hypot($type$ a, $type$ b) {
return float16ToRawShortBits(Float16.valueOf(Math.hypot(shortBitsToFloat16(a).doubleValue(),
shortBitsToFloat16(b).doubleValue())));
}
static $type$ scalar_pow($type$ a, $type$ b) {
return float16ToRawShortBits(Float16.valueOf(Math.pow(shortBitsToFloat16(a).doubleValue(),
shortBitsToFloat16(b).doubleValue())));
}
static $type$ scalar_atan2($type$ a, $type$ b) {
return float16ToRawShortBits(Float16.valueOf(Math.atan2(shortBitsToFloat16(a).doubleValue(),
shortBitsToFloat16(b).doubleValue())));
}
static $type$ strict_scalar_sin($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.sin(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_exp($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.exp(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_log1p($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.log1p(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_log($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.log(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_log10($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.log10(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_expm1($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.expm1(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_cos($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.cos(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_tan($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.tan(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_sinh($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.sinh(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_cosh($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.cosh(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_tanh($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.tanh(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_asin($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.asin(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_acos($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.acos(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_atan($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.atan(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_cbrt($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.cbrt(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_sqrt($type$ a) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.sqrt(shortBitsToFloat16(a).doubleValue())));
}
static $type$ strict_scalar_hypot($type$ a, $type$ b) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.hypot(shortBitsToFloat16(a).doubleValue(),
shortBitsToFloat16(b).doubleValue())));
}
static $type$ strict_scalar_pow($type$ a, $type$ b) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.pow(shortBitsToFloat16(a).doubleValue(),
shortBitsToFloat16(b).doubleValue())));
}
static $type$ strict_scalar_atan2($type$ a, $type$ b) {
return float16ToRawShortBits(Float16.valueOf(StrictMath.atan2(shortBitsToFloat16(a).doubleValue(),
shortBitsToFloat16(b).doubleValue())));
}
#else[FP16]
static boolean eq($type$ a, $type$ b) {
return a == b;
@ -2155,6 +2644,7 @@ relativeError));
return ($type$)StrictMath.atan2((double)a, (double)b);
}
#end[!BITWISE]
#end[FP16]
#if[!FP]
static boolean ult($type$ a, $type$ b) {
@ -2175,6 +2665,17 @@ relativeError));
#end[!FP]
#if[FP]
#if[FP16]
static boolean isNaN($type$ a) {
return $Wideboxtype$.isNaN(shortBitsToFloat16(a));
}
static boolean isFinite($type$ a) {
return $Wideboxtype$.isFinite(shortBitsToFloat16(a));
}
static boolean isInfinite($type$ a) {
return $Wideboxtype$.isInfinite(shortBitsToFloat16(a));
}
#else[FP16]
static boolean isNaN($type$ a) {
return $Wideboxtype$.isNaN(a);
}
@ -2184,14 +2685,15 @@ relativeError));
static boolean isInfinite($type$ a) {
return $Wideboxtype$.isInfinite(a);
}
#end[FP16]
#end[FP]
@Test
static void smokeTest1() {
$abstractvectortype$ three = $abstractvectortype$.broadcast(SPECIES, (byte)-3);
$abstractvectortype$ three2 = ($abstractvectortype$) SPECIES.broadcast(-3);
$abstractvectortype$ three = $abstractvectortype$.broadcast(SPECIES, {#if[FP16]?float16ToRawShortBits(Float16.valueOf(-3)):(byte)-3});
$abstractvectortype$ three2 = ($abstractvectortype$) SPECIES.broadcast({#if[FP16]?Float16.valueOf(-3).longValue():-3});
assert(three.eq(three2).allTrue());
$abstractvectortype$ three3 = three2.broadcast(1).broadcast(-3);
$abstractvectortype$ three3 = three2.broadcast({#if[FP16]?float16ToRawShortBits(Float16.valueOf(1)):1}).broadcast({#if[FP16]?Float16.valueOf(-3).longValue():-3});
assert(three.eq(three3).allTrue());
int scale = 2;
Class<?> ETYPE = $type$.class;
@ -2202,14 +2704,18 @@ relativeError));
$abstractvectortype$ higher = three.addIndex(scale);
VectorMask<$Boxtype$> m = three.compare(VectorOperators.LE, higher);
assert(m.allTrue());
m = higher.min(($type$)-1).test(VectorOperators.IS_NEGATIVE);
m = higher.min({#if[FP16]?float16ToRawShortBits(Float16.valueOf(-1)):($type$)-1}).test(VectorOperators.IS_NEGATIVE);
assert(m.allTrue());
#if[FP]
m = higher.test(VectorOperators.IS_FINITE);
assert(m.allTrue());
#end[FP]
$type$ max = higher.reduceLanes(VectorOperators.MAX);
#if[FP16]
assert(max == float16ToRawShortBits(Float16.add(Float16.valueOf(-3), Float16.multiply(Float16.valueOf(scale), Float16.valueOf((SPECIES.length()-1))))));
#else[FP16]
assert(max == -3 + scale * (SPECIES.length()-1));
#end[FP16]
}
private static $type$[]
@ -2286,13 +2792,13 @@ relativeError));
assertEquals(asFloating.species(), SPECIES);
}
#else[FP]
#if[byteOrShort]
#if[byte]
@Test(expectedExceptions = UnsupportedOperationException.class)
void viewAsFloatingLanesTest() {
SPECIES.zero().viewAsFloatingLanes();
}
#else[byteOrShort]
#else[byte]
@Test
void viewAsFloatingLanesTest() {
@ -2303,7 +2809,7 @@ relativeError));
assertEquals(asFloatingSpecies.length(), SPECIES.length());
assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
}
#end[byteOrShort]
#end[byte]
#end[FP]
#if[BITWISE]

View File

@ -41,6 +41,10 @@
import java.lang.foreign.MemorySegment;
import java.lang.foreign.Arena;
import java.lang.foreign.ValueLayout;
#if[FP16]
import jdk.incubator.vector.Float16;
import jdk.incubator.vector.Float16Vector;
#end[FP16]
import jdk.incubator.vector.$Type$Vector;
import jdk.incubator.vector.VectorMask;
#if[MaxBit]
@ -69,7 +73,11 @@ public class $vectorteststype$ extends AbstractVectorLoadStoreTest {
static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 100);
#if[FP16]
static final ValueLayout.OfShort ELEMENT_LAYOUT = ValueLayout.JAVA_SHORT.withByteAlignment(1);
#else[FP16]
static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);
#end[FP16]
#if[MaxBit]
static VectorShape getMaxBit() {
@ -81,6 +89,29 @@ public class $vectorteststype$ extends AbstractVectorLoadStoreTest {
static final int BUFFER_REPS = Integer.getInteger("jdk.incubator.vector.test.buffer-vectors", 25000 / $bits$);
#if[FP16]
static void assertEquals(short actual, short expected) {
Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected));
}
static void assertEquals(short actual, short expected, String msg) {
Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), msg);
}
static void assertEquals(short [] actual, short [] expected) {
assert actual.length == expected.length;
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]));
}
}
static void assertEquals(short [] actual, short [] expected, String msg) {
assert actual.length == expected.length;
for (int i = 0; i < actual.length; i++) {
Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]), msg);
}
}
#else[FP16]
static void assertEquals($type$ actual, $type$ expected) {
Assert.assertEquals(actual, expected);
}
@ -95,6 +126,7 @@ public class $vectorteststype$ extends AbstractVectorLoadStoreTest {
static void assertEquals($type$ [] actual, $type$ [] expected, String msg) {
Assert.assertEquals(actual, expected, msg);
}
#end[FP16]
static void assertArraysEquals($type$[] r, $type$[] a, boolean[] mask) {
int i = 0;

View File

@ -0,0 +1,354 @@
/*
* Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang;
import java.util.stream.IntStream;
import java.util.concurrent.TimeUnit;
import jdk.incubator.vector.*;
import org.openjdk.jmh.annotations.*;
import static jdk.incubator.vector.Float16.*;
import static java.lang.Float.*;
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@Fork(jvmArgs = {"--add-modules=jdk.incubator.vector", "-Xbatch", "-XX:-TieredCompilation"})
public class Float16VectorOperationsBenchmark {
@Param({"1024", "2057"})
int vectorDim;
int [] rexp;
short [] vectorRes;
short [] vector1;
short [] vector2;
short [] vector3;
short [] vector4;
short [] vector5;
boolean [] vectorPredicate;
static final short f16_one = Float.floatToFloat16(1.0f);
static final short f16_two = Float.floatToFloat16(2.0f);
@Setup(Level.Trial)
public void BmSetup() {
rexp = new int[vectorDim];
vectorRes = new short[vectorDim];
vector1 = new short[vectorDim];
vector2 = new short[vectorDim];
vector3 = new short[vectorDim];
vector4 = new short[vectorDim];
vector5 = new short[vectorDim];
vectorPredicate = new boolean[vectorDim];
IntStream.range(0, vectorDim).forEach(i -> {vector1[i] = Float.floatToFloat16((float)i);});
IntStream.range(0, vectorDim).forEach(i -> {vector2[i] = Float.floatToFloat16((float)i);});
IntStream.range(0, vectorDim).forEach(i -> {vector3[i] = Float.floatToFloat16((float)i);});
IntStream.range(0, vectorDim).forEach(i -> {vector4[i] = ((i & 0x1) == 0) ?
float16ToRawShortBits(Float16.POSITIVE_INFINITY) :
Float.floatToFloat16((float)i);});
IntStream.range(0, vectorDim).forEach(i -> {vector5[i] = ((i & 0x1) == 0) ?
float16ToRawShortBits(Float16.NaN) :
Float.floatToFloat16((float)i);});
// Special Values
Float16 [] specialValues = {Float16.NaN, Float16.NEGATIVE_INFINITY, Float16.valueOf(0.0), Float16.valueOf(-0.0), Float16.POSITIVE_INFINITY};
IntStream.range(0, vectorDim).forEach(
i -> {
if ((i % 64) == 0) {
int idx1 = i % specialValues.length;
int idx2 = (i + 1) % specialValues.length;
int idx3 = (i + 2) % specialValues.length;
vector1[i] = float16ToRawShortBits(specialValues[idx1]);
vector2[i] = float16ToRawShortBits(specialValues[idx2]);
vector3[i] = float16ToRawShortBits(specialValues[idx3]);
}
}
);
}
static final VectorSpecies<Float16> HSPECIES = Float16Vector.SPECIES_PREFERRED;
static final VectorSpecies<Float> FSPECIES = FloatVector.SPECIES_PREFERRED;
@Benchmark
public void addBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.ADD,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.ADD,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i, mask);
}
}
@Benchmark
public void subBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.SUB,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.SUB,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i);
}
}
@Benchmark
public void mulBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.MUL,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.MUL,
Float16Vector.fromArray(HSPECIES, vector2, i, mask))
.intoArray(vectorRes, i, mask);
}
}
@Benchmark
public void divBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.DIV,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.DIV,
Float16Vector.fromArray(HSPECIES, vector2, i, mask))
.intoArray(vectorRes, i, mask);
}
}
@Benchmark
public void fmaBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(HSPECIES, vector2, i),
Float16Vector.fromArray(HSPECIES, vector3, i))
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(HSPECIES, vector2, i, mask),
Float16Vector.fromArray(HSPECIES, vector3, i, mask))
.intoArray(vectorRes, i, mask);
}
}
@Benchmark
public void maxBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.MAX,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.MAX,
Float16Vector.fromArray(HSPECIES, vector2, i, mask))
.intoArray(vectorRes, i, mask);
}
}
@Benchmark
public void minBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.MIN,
Float16Vector.fromArray(HSPECIES, vector2, i))
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.MIN,
Float16Vector.fromArray(HSPECIES, vector2, i, mask))
.intoArray(vectorRes, i, mask);
}
}
@Benchmark
public void sqrtBenchmark() {
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.SQRT)
.intoArray(vectorRes, i);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.SQRT)
.intoArray(vectorRes, i, mask);
}
}
@Benchmark
public short cosineSimilarityDoubleRoundingFP16() {
int i = 0;
Float16Vector macResVec = Float16Vector.broadcast(HSPECIES, (short)0);
Float16Vector vector1SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
Float16Vector vector2SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
// cosine distance = (VEC1 . VEC2) / ||VEC1||.||VEC2||
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
// Explicit add and multiply operation ensures double rounding.
Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i);
Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i);
macResVec = vec1.lanewise(VectorOperators.MUL, vec2)
.lanewise(VectorOperators.ADD, macResVec);
vector1SquareVec = vec1.lanewise(VectorOperators.MUL, vec1)
.lanewise(VectorOperators.ADD, vector1SquareVec);
vector2SquareVec = vec2.lanewise(VectorOperators.MUL, vec2)
.lanewise(VectorOperators.ADD, vector2SquareVec);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i, mask);
Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i, mask);
macResVec = vec1.lanewise(VectorOperators.MUL, vec2)
.lanewise(VectorOperators.ADD, macResVec);
vector1SquareVec = vec1.lanewise(VectorOperators.MUL, vec1)
.lanewise(VectorOperators.ADD, vector1SquareVec);
vector2SquareVec = vec2.lanewise(VectorOperators.MUL, vec2)
.lanewise(VectorOperators.ADD, vector2SquareVec);
return macResVec.lanewise(VectorOperators.DIV,
vector1SquareVec.lanewise(VectorOperators.MUL,
vector2SquareVec))
.reduceLanes(VectorOperators.ADD, mask);
} else {
return macResVec.lanewise(VectorOperators.DIV,
vector1SquareVec.lanewise(VectorOperators.MUL,
vector2SquareVec))
.reduceLanes(VectorOperators.ADD);
}
}
@Benchmark
public short cosineSimilaritySingleRoundingFP16() {
int i = 0;
Float16Vector macResVec = Float16Vector.broadcast(HSPECIES, (short)0);
Float16Vector vector1SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
Float16Vector vector2SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
// cosine distance = (VEC1 . VEC2) / ||VEC1||.||VEC2||
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
// Explicit add and multiply operation ensures double rounding.
Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i);
Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i);
macResVec = vec1.lanewise(VectorOperators.FMA, vec2, macResVec);
vector1SquareVec = vec1.lanewise(VectorOperators.FMA, vec1, vector1SquareVec);
vector2SquareVec = vec2.lanewise(VectorOperators.FMA, vec2, vector2SquareVec);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i, mask);
Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i, mask);
macResVec = vec1.lanewise(VectorOperators.FMA, vec2, macResVec);
vector1SquareVec = vec1.lanewise(VectorOperators.FMA, vec1, vector1SquareVec);
vector2SquareVec = vec2.lanewise(VectorOperators.FMA, vec2, vector2SquareVec);
return macResVec.lanewise(VectorOperators.DIV,
vector1SquareVec.lanewise(VectorOperators.MUL,
vector2SquareVec))
.reduceLanes(VectorOperators.ADD, mask);
} else {
return macResVec.lanewise(VectorOperators.DIV,
vector1SquareVec.lanewise(VectorOperators.MUL,
vector2SquareVec))
.reduceLanes(VectorOperators.ADD);
}
}
@Benchmark
public short euclideanDistanceFP16() {
Float16Vector resVec = Float16Vector.broadcast(HSPECIES, (short)0);
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
Float16Vector diffVec = Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.SUB,
Float16Vector.fromArray(HSPECIES, vector2, i));
resVec = diffVec.lanewise(VectorOperators.FMA, diffVec, resVec);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
Float16Vector diffVec = Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.SUB,
Float16Vector.fromArray(HSPECIES, vector2, i, mask));
resVec = diffVec.lanewise(VectorOperators.FMA, diffVec, resVec);
return resVec.lanewise(VectorOperators.SQRT)
.reduceLanes(VectorOperators.ADD, mask);
} else {
return resVec.lanewise(VectorOperators.SQRT)
.reduceLanes(VectorOperators.ADD);
}
}
@Benchmark
public short dotProductFP16() {
Float16Vector distResVec = Float16Vector.broadcast(HSPECIES, (short)0);
int i = 0;
for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
distResVec = Float16Vector.fromArray(HSPECIES, vector1, i)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(HSPECIES, vector2, i),
distResVec);
}
if (i < vectorDim) {
VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
distResVec = Float16Vector.fromArray(HSPECIES, vector1, i, mask)
.lanewise(VectorOperators.FMA,
Float16Vector.fromArray(HSPECIES, vector2, i, mask),
distResVec);
return distResVec.reduceLanes(VectorOperators.ADD, mask);
} else {
return distResVec.reduceLanes(VectorOperators.ADD);
}
}
}