8370691: Add new Float16Vector type and enable intrinsification of vector operations supported by auto-vectorizer

Co-authored-by: Bhavana Kilambi <bkilambi@openjdk.org> Reviewed-by: psandoz, epeter, xgong, sherman
2026-07-28 03:43:21 +00:00 · 2026-06-11 03:03:04 +00:00 · 2026-06-11 03:03:04 +00:00 · 90dc4208f8
commit 90dc4208f8
parent dc4bb5acbe
49 changed files with 46460 additions and 254 deletions
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@ -8294,6 +8294,34 @@ instruct castII_checked(iRegI dst, rFlagsReg cr)
  ins_pipe(pipe_slow);
 %}

+// The unchecked and checked variants for CastII below both use iRegINoSp for src and dst
+// as some consumers of CastII node like ConvHF2F forbid the stack pointer as an input
+// (please see convHF2F_reg_reg rule which requires input to be in an iRegINoSp register).
+instruct castII_nosp(iRegINoSp dst)
+%{
+  predicate(VerifyConstraintCasts == 0);
+  match(Set dst (CastII dst));
+
+  size(0);
+  format %{ "# castII of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_cost(0);
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct castII_checked_nosp(iRegINoSp dst, rFlagsReg cr)
+%{
+  predicate(VerifyConstraintCasts > 0);
+  match(Set dst (CastII dst));
+  effect(KILL cr);
+
+  format %{ "# castII_checked of $dst" %}
+  ins_encode %{
+    __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register, rscratch1);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 instruct castLL(iRegL dst)
 %{
  predicate(VerifyConstraintCasts == 0);
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@ -241,9 +241,9 @@ source %{
          return false;
        }
        break;
-      // At the time of writing this, the Vector API has no half-float (FP16) species.
-      // Consequently, AddReductionVHF and MulReductionVHF are only produced by the
-      // auto-vectorizer, which requires strictly ordered semantics for FP reductions.
+      // AddReductionVHF and MulReductionVHF are currently only produced by the
+      // auto-vectorizer (the Vector API does not yet intrinsify Float16 reductions),
+      // which requires strictly ordered semantics for FP reductions.
      //
      // There is no direct Neon instruction that performs strictly ordered floating
      // point add reduction. Hence, on Neon only machines, the add reduction operation
@ -354,9 +354,9 @@ source %{
        opcode = Op_StoreVectorScatterMasked;
        break;
      // Currently, the masked versions of the following 8 Float16 operations are disabled.
-      // When the support for Float16 vector classes is added in VectorAPI and the masked
-      // Float16 IR can be generated, these masked operations will be enabled and relevant
-      // backend support added.
+      // The Vector API does not yet emit predicated Float16 IR. When such masked IR can be
+      // generated, these masked operations will be enabled and the relevant backend support
+      // added.
      case Op_AddVHF:
      case Op_SubVHF:
      case Op_MulVHF:
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@ -298,7 +298,7 @@ static bool is_klass_initialized(const TypeInstPtr* vec_klass) {
 }

 static bool is_primitive_lane_type(VectorSupport::LaneType laneType) {
-  return laneType >= VectorSupport::LT_FLOAT && laneType <= VectorSupport::LT_LONG;
+  return laneType >= VectorSupport::LT_FLOAT && laneType <= VectorSupport::LT_FLOAT16;
 }

 static BasicType get_vector_primitive_lane_type(VectorSupport::LaneType lanetype) {
@ -310,10 +310,15 @@ static BasicType get_vector_primitive_lane_type(VectorSupport::LaneType lanetype
    case VectorSupport::LaneType::LT_INT: return T_INT;
    case VectorSupport::LaneType::LT_SHORT: return T_SHORT;
    case VectorSupport::LaneType::LT_BYTE: return T_BYTE;
+    case VectorSupport::LaneType::LT_FLOAT16: return T_SHORT;
  }
  return T_ILLEGAL;
 }

+static bool is_supported_lane_type(VectorSupport::LaneType laneType) {
+  return laneType >= VectorSupport::LT_FLOAT && laneType <= VectorSupport::LT_LONG;
+}
+
 //
 //  <V extends Vector<E>,
 //   M extends VectorMask<E>,
@ -557,6 +562,11 @@ bool LibraryCallKit::inline_vector_call(int arity) {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+

  if (!is_klass_initialized(vector_klass)) {
    log_if_needed("  ** klass argument not initialized");
@ -651,6 +661,11 @@ bool LibraryCallKit::inline_vector_mask_operation() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  int num_elem = vlen->get_con();
  BasicType elem_bt = get_vector_primitive_lane_type(vltype);
  int mopc = VectorSupport::vop2ideal(oper->get_con(), vltype);
@ -721,6 +736,12 @@ bool LibraryCallKit::inline_vector_frombits_coerced() {
    return false;
  }

+  int  bcast_mode = mode->get_con();
+  if (!is_supported_lane_type(vltype) && bcast_mode != VectorSupport::MODE_BROADCAST) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false; // should be primitive type
+  }
+
  if (!is_klass_initialized(vector_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
@ -732,7 +753,6 @@ bool LibraryCallKit::inline_vector_frombits_coerced() {
  const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);

  bool is_mask = is_vector_mask(vbox_klass);
-  int  bcast_mode = mode->get_con();
  VectorMaskUseType checkFlags = (VectorMaskUseType)(is_mask ? VecMaskUseAll : VecMaskNotUsed);
  int opc = bcast_mode == VectorSupport::MODE_BITS_COERCED_LONG_TO_MASK ? Op_VectorLongToMask : Op_Replicate;

@ -1296,6 +1316,11 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  BasicType elem_bt = get_vector_primitive_lane_type(vltype);
  int num_elem = vlen->get_con();
  int idx_num_elem = idx_vlen->get_con();
@ -1479,6 +1504,10 @@ bool LibraryCallKit::inline_vector_reduction() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
  BasicType elem_bt = get_vector_primitive_lane_type(vltype);
  const Type* vmask_type = gvn().type(argument(6));
  bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
@ -1624,6 +1653,11 @@ bool LibraryCallKit::inline_vector_test() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  if (!is_klass_initialized(vector_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
@ -1773,6 +1807,11 @@ bool LibraryCallKit::inline_vector_compare() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  if (!is_klass_initialized(vector_klass) || !is_klass_initialized(mask_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
@ -1893,6 +1932,10 @@ bool LibraryCallKit::inline_vector_rearrange() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
  BasicType elem_bt = get_vector_primitive_lane_type(vltype);
  BasicType shuffle_bt = elem_bt;
  if (shuffle_bt == T_FLOAT) {
@ -2029,6 +2072,10 @@ bool LibraryCallKit::inline_vector_select_from() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
  int num_elem = vlen->get_con();
  BasicType elem_bt = get_vector_primitive_lane_type(vltype);
  if (!is_power_of_2(num_elem)) {
@ -2193,6 +2240,11 @@ bool LibraryCallKit::inline_vector_broadcast_int() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  const Type* vmask_type = gvn().type(argument(7));
  bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
  if (is_masked_op) {
@ -2369,6 +2421,16 @@ bool LibraryCallKit::inline_vector_convert() {
    log_if_needed("  ** not a primitive to lt=%s", VectorSupport::lanetype2name(vltype_to));
    return false; // should be primitive type
  }
+
+  if (!is_supported_lane_type(vltype_from)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype_from));
+    return false;
+  }
+
+  if (!is_supported_lane_type(vltype_to)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype_to));
+    return false;
+  }
  BasicType elem_bt_from = get_vector_primitive_lane_type(vltype_from);
  BasicType elem_bt_to = get_vector_primitive_lane_type(vltype_to);

@ -2550,6 +2612,11 @@ bool LibraryCallKit::inline_vector_insert() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  if (!is_klass_initialized(vector_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
@ -2638,6 +2705,11 @@ bool LibraryCallKit::inline_vector_extract() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  if (!is_klass_initialized(vector_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
@ -2822,6 +2894,11 @@ bool LibraryCallKit::inline_vector_select_from_two_vectors() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  if (!is_klass_initialized(vector_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
@ -2960,6 +3037,11 @@ bool LibraryCallKit::inline_vector_compress_expand() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  int num_elem = vlen->get_con();
  BasicType elem_bt = get_vector_primitive_lane_type(vltype);
  int opc = VectorSupport::vop2ideal(opr->get_con(), vltype);
@ -3035,6 +3117,11 @@ bool LibraryCallKit::inline_index_vector() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  if (!is_klass_initialized(vector_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
@ -3170,6 +3257,11 @@ bool LibraryCallKit::inline_index_partially_in_upper_range() {
    return false;
  }

+  if (!is_supported_lane_type(vltype)) {
+    log_if_needed("  ** unsupported lane type =%s", VectorSupport::lanetype2name(vltype));
+    return false;
+  }
+
  if (!is_klass_initialized(mask_klass)) {
    log_if_needed("  ** klass argument not initialized");
    return false;
--- a/src/hotspot/share/prims/vectorSupport.cpp
+++ b/src/hotspot/share/prims/vectorSupport.cpp
@ -206,9 +206,10 @@ const char* VectorSupport::lanetype2name(LaneType lane_type) {
    "byte",
    "short",
    "int",
-    "long"
+    "long",
+    "float16",
  };
-  if (lane_type >= LT_FLOAT && lane_type <= LT_LONG) {
+  if (lane_type >= LT_FLOAT && lane_type <= LT_FLOAT16) {
    return lanetype2name[lane_type];
  }
  assert(false, "unknown lane type: %d", (int)lane_type);
@ -224,6 +225,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    return Op_AddI;
        case LT_LONG:   return Op_AddL;
+        case LT_FLOAT16:  return Op_AddHF;
        case LT_FLOAT:  return Op_AddF;
        case LT_DOUBLE: return Op_AddD;
        default: return 0;
@ -236,6 +238,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    return Op_SubI;
        case LT_LONG:   return Op_SubL;
+        case LT_FLOAT16: return Op_SubHF;
        case LT_FLOAT:  return Op_SubF;
        case LT_DOUBLE: return Op_SubD;
        default: return 0;
@ -248,6 +251,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    return Op_MulI;
        case LT_LONG:   return Op_MulL;
+        case LT_FLOAT16: return Op_MulHF;
        case LT_FLOAT:  return Op_MulF;
        case LT_DOUBLE: return Op_MulD;
        default: return 0;
@ -260,6 +264,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    return Op_DivI;
        case LT_LONG:   return Op_DivL;
+        case LT_FLOAT16: return Op_DivHF;
        case LT_FLOAT:  return Op_DivF;
        case LT_DOUBLE: return Op_DivD;
        default: return 0;
@ -272,6 +277,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:
        case LT_INT:    return Op_MinI;
        case LT_LONG:   return Op_MinL;
+        case LT_FLOAT16: return Op_MinHF;
        case LT_FLOAT:  return Op_MinF;
        case LT_DOUBLE: return Op_MinD;
        default: return 0;
@ -284,6 +290,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:
        case LT_INT:    return Op_MaxI;
        case LT_LONG:   return Op_MaxL;
+        case LT_FLOAT16: return Op_MaxHF;
        case LT_FLOAT:  return Op_MaxF;
        case LT_DOUBLE: return Op_MaxD;
        default: return 0;
@ -316,6 +323,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    return Op_AbsI;
        case LT_LONG:   return Op_AbsL;
+        case LT_FLOAT16: return 0;
        case LT_FLOAT:  return Op_AbsF;
        case LT_DOUBLE: return Op_AbsD;
        default: return 0;
@ -328,6 +336,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    return Op_NegI;
        case LT_LONG:   return Op_NegL;
+        case LT_FLOAT16: return 0;
        case LT_FLOAT:  return Op_NegF;
        case LT_DOUBLE: return Op_NegD;
        default: return 0;
@ -366,6 +375,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
    }
    case VECTOR_OP_SQRT: {
      switch (lt) {
+        case LT_FLOAT16:  return Op_SqrtHF;
        case LT_FLOAT:  return Op_SqrtF;
        case LT_DOUBLE: return Op_SqrtD;
        default: return 0;
@ -374,6 +384,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
    }
    case VECTOR_OP_FMA: {
      switch (lt) {
+        case LT_FLOAT16:  return Op_FmaHF;
        case LT_FLOAT:  return Op_FmaF;
        case LT_DOUBLE: return Op_FmaD;
        default: return 0;
@ -436,6 +447,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    // fall-through
        case LT_LONG:   // fall-through
+        case LT_FLOAT16: // fall-through
        case LT_FLOAT:  // fall-through
        case LT_DOUBLE: return Op_VectorMaskLastTrue;
        default: return 0;
@ -448,6 +460,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    // fall-through
        case LT_LONG:   // fall-through
+        case LT_FLOAT16: // fall-through
        case LT_FLOAT:  // fall-through
        case LT_DOUBLE: return Op_VectorMaskFirstTrue;
        default: return 0;
@ -460,6 +473,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    // fall-through
        case LT_LONG:   // fall-through
+        case LT_FLOAT16: // fall-through
        case LT_FLOAT:  // fall-through
        case LT_DOUBLE: return Op_VectorMaskTrueCount;
        default: return 0;
@ -472,6 +486,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    // fall-through
        case LT_LONG:   // fall-through
+        case LT_FLOAT16: // fall-through
        case LT_FLOAT:  // fall-through
        case LT_DOUBLE: return Op_VectorMaskToLong;
        default: return 0;
@ -484,6 +499,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    // fall-through
        case LT_LONG:   // fall-through
+        case LT_FLOAT16: // fall-through
        case LT_FLOAT:  // fall-through
        case LT_DOUBLE: return Op_ExpandV;
        default: return 0;
@ -496,6 +512,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    // fall-through
        case LT_LONG:   // fall-through
+        case LT_FLOAT16: // fall-through
        case LT_FLOAT:  // fall-through
        case LT_DOUBLE: return Op_CompressV;
        default: return 0;
@ -508,6 +525,7 @@ int VectorSupport::vop2ideal(jint id, LaneType lt) {
        case LT_SHORT:  // fall-through
        case LT_INT:    // fall-through
        case LT_LONG:   // fall-through
+        case LT_FLOAT16: // fall-through
        case LT_FLOAT:  // fall-through
        case LT_DOUBLE: return Op_CompressM;
        default: return 0;
--- a/src/hotspot/share/prims/vectorSupport.hpp
+++ b/src/hotspot/share/prims/vectorSupport.hpp
@ -144,7 +144,8 @@ class VectorSupport : AllStatic {
    LT_BYTE      = 2,
    LT_SHORT     = 3,
    LT_INT       = 4,
-    LT_LONG      = 5
+    LT_LONG      = 5,
+    LT_FLOAT16   = 6
  };

  enum {
--- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
+++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java
@ -156,7 +156,8 @@ public class VectorSupport {
        LT_BYTE      = 2,
        LT_SHORT     = 3,
        LT_INT       = 4,
-        LT_LONG      = 5;
+        LT_LONG      = 5,
+        LT_FLOAT16   = 6;

    /* ============================================================================ */

--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractMask.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractMask.java
@ -36,7 +36,9 @@ abstract sealed class AbstractMask<E> extends VectorMask<E>
        FloatVector64.FloatMask64, FloatVector128.FloatMask128, FloatVector256.FloatMask256, FloatVector512.FloatMask512, FloatVectorMax.FloatMaskMax,
        IntVector64.IntMask64, IntVector128.IntMask128, IntVector256.IntMask256, IntVector512.IntMask512, IntVectorMax.IntMaskMax,
        LongVector64.LongMask64, LongVector128.LongMask128, LongVector256.LongMask256, LongVector512.LongMask512, LongVectorMax.LongMaskMax,
-        ShortVector64.ShortMask64, ShortVector128.ShortMask128, ShortVector256.ShortMask256, ShortVector512.ShortMask512, ShortVectorMax.ShortMaskMax {
+        ShortVector64.ShortMask64, ShortVector128.ShortMask128, ShortVector256.ShortMask256, ShortVector512.ShortMask512, ShortVectorMax.ShortMaskMax,
+        Float16Vector64.Float16Mask64, Float16Vector128.Float16Mask128, Float16Vector256.Float16Mask256, Float16Vector512.Float16Mask512,
+        Float16VectorMax.Float16MaskMax {
    AbstractMask(boolean[] bits) {
        super(bits);
    }
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractShuffle.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractShuffle.java
@ -35,7 +35,8 @@ abstract sealed class AbstractShuffle<E> extends VectorShuffle<E>
        FloatVector64.FloatShuffle64, FloatVector128.FloatShuffle128, FloatVector256.FloatShuffle256, FloatVector512.FloatShuffle512, FloatVectorMax.FloatShuffleMax,
        IntVector64.IntShuffle64, IntVector128.IntShuffle128, IntVector256.IntShuffle256, IntVector512.IntShuffle512, IntVectorMax.IntShuffleMax,
        LongVector64.LongShuffle64, LongVector128.LongShuffle128, LongVector256.LongShuffle256, LongVector512.LongShuffle512, LongVectorMax.LongShuffleMax,
-        ShortVector64.ShortShuffle64, ShortVector128.ShortShuffle128, ShortVector256.ShortShuffle256, ShortVector512.ShortShuffle512, ShortVectorMax.ShortShuffleMax {
+        ShortVector64.ShortShuffle64, ShortVector128.ShortShuffle128, ShortVector256.ShortShuffle256, ShortVector512.ShortShuffle512, ShortVectorMax.ShortShuffleMax,
+        Float16Vector64.Float16Shuffle64, Float16Vector128.Float16Shuffle128, Float16Vector256.Float16Shuffle256, Float16Vector512.Float16Shuffle512, Float16VectorMax.Float16ShuffleMax {
    static final IntUnaryOperator IDENTITY = i -> i;

    // Internal representation allows for a maximum index of E.MAX_VALUE - 1
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractSpecies.java
@ -37,7 +37,7 @@ import jdk.internal.vm.annotation.TrustFinalFields;
 abstract sealed class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSupport.VectorSpecies<E>
        implements VectorSpecies<E>
        permits ByteVector.ByteSpecies, DoubleVector.DoubleSpecies, FloatVector.FloatSpecies,
-        IntVector.IntSpecies, LongVector.LongSpecies, ShortVector.ShortSpecies {
+        IntVector.IntSpecies, LongVector.LongSpecies, ShortVector.ShortSpecies, Float16Vector.Float16Species {
    final VectorShape vectorShape;
    final LaneType laneType;
    final int laneCount;
@ -424,14 +424,21 @@ abstract sealed class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSu
        Object ia = Array.newInstance(carrierType(), laneCount);
        assert(ia.getClass() == laneType.arrayType);
        checkValue(laneCount-1);  // worst case
-        for (int i = 0; i < laneCount; i++) {
-            if ((byte)i == i)
-                Array.setByte(ia, i, (byte)i);
-            else if ((short)i == i)
-                Array.setShort(ia, i, (short)i);
-            else
-                Array.setInt(ia, i, i);
-            assert(Array.getDouble(ia, i) == i);
+        if (elementType() == Float16.class) {
+            for (int i = 0; i < laneCount; i++) {
+                Array.setShort(ia, i, Float.floatToFloat16((float)i));
+                assert(Float16.shortBitsToFloat16(Array.getShort(ia, i)).intValue() == i);
+            }
+        } else {
+            for (int i = 0; i < laneCount; i++) {
+                if ((byte)i == i)
+                    Array.setByte(ia, i, (byte)i);
+                else if ((short)i == i)
+                    Array.setShort(ia, i, (short)i);
+                else
+                    Array.setInt(ia, i, i);
+                assert(Array.getDouble(ia, i) == i);
+            }
        }
        return ia;
    }
@ -629,6 +636,8 @@ abstract sealed class AbstractSpecies<E> extends jdk.internal.vm.vector.VectorSu
            s = IntVector.species(shape); break;
        case LaneType.SK_LONG:
            s = LongVector.species(shape); break;
+        case LaneType.SK_FLOAT16:
+            s = Float16Vector.species(shape); break;
        }
        if (s == null) {
            // NOTE: The result of this method is guaranteed to be
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/AbstractVector.java
@ -35,7 +35,7 @@ import static jdk.incubator.vector.VectorOperators.*;

@SuppressWarnings("cast")
 abstract sealed class AbstractVector<E> extends Vector<E>
-        permits ByteVector, DoubleVector, FloatVector, IntVector, LongVector, ShortVector {
+        permits ByteVector, DoubleVector, FloatVector, IntVector, LongVector, ShortVector, Float16Vector {
    /**
     * The order of vector bytes when stored in natural,
     * array elements of the same lane type.
@ -331,6 +331,15 @@ abstract sealed class AbstractVector<E> extends Vector<E>
        return (DoubleVector) asVectorRaw(LaneType.DOUBLE);
    }

+    /**
+     * {@inheritDoc} <!--workaround-->
+     */
+    @Override
+    @ForceInline
+    public Float16Vector reinterpretAsFloat16s() {
+        return (Float16Vector) asVectorRaw(LaneType.FLOAT16);
+    }
+
    /**
     * {@inheritDoc} <!--workaround-->
     */
@ -682,6 +691,8 @@ abstract sealed class AbstractVector<E> extends Vector<E>
            return FloatVector.fromMemorySegment(rsp.check(float.class), ms, 0, bo, m.check(float.class)).check0(rsp);
        case LaneType.SK_DOUBLE:
            return DoubleVector.fromMemorySegment(rsp.check(double.class), ms, 0, bo, m.check(double.class)).check0(rsp);
+        case LaneType.SK_FLOAT16:
+            return Float16Vector.fromMemorySegment(rsp.check(Float16.class), ms, 0, bo, m.check(Float16.class)).check0(rsp);
        default:
            throw new AssertionError(rsp.toString());
        }
@ -744,6 +755,13 @@ abstract sealed class AbstractVector<E> extends Vector<E>
                }
                return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
            }
+            case LaneType.SK_FLOAT16: {
+                short[] a = new short[rlength];
+                for (int i = 0; i < limit; i++) {
+                    a[i] = Float16.float16ToRawShortBits(Float16.valueOf((float) lanes[i]));
+                }
+                return Float16Vector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp);
+            }
            default: break;
            }
        } else {
@ -794,6 +812,13 @@ abstract sealed class AbstractVector<E> extends Vector<E>
                }
                return DoubleVector.fromArray(dsp.check(double.class), a, 0).check0(dsp);
            }
+            case LaneType.SK_FLOAT16: {
+                short[] a = new short[rlength];
+                for (int i = 0; i < limit; i++) {
+                    a[i] = Float16.float16ToRawShortBits(Float16.valueOf((float) lanes[i]));
+                }
+                return Float16Vector.fromArray(dsp.check(Float16.class), a, 0).check0(dsp);
+            }
            default: break;
            }
        }
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector.java
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector128.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector128.java
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector256.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector256.java
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector512.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector512.java
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector64.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16Vector64.java
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16VectorMax.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16VectorMax.java
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LaneType.java
@ -40,7 +40,8 @@ enum LaneType {
    BYTE(byte.class, Byte.class, byte[].class, 'I', -1, Byte.SIZE, byte.class),
    SHORT(short.class, Short.class, short[].class, 'I', -1, Short.SIZE, short.class),
    INT(int.class, Integer.class, int[].class, 'I', -1, Integer.SIZE, int.class),
-    LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, long.class);
+    LONG(long.class, Long.class, long[].class, 'I', -1, Long.SIZE, long.class),
+    FLOAT16(Float16.class, Short.class, short[].class, 'F', 11, Float16.SIZE, short.class);

    LaneType(Class<?> elementType,
             Class<?> genericElementType,
@ -66,7 +67,7 @@ enum LaneType {
        // printName.  If we do unsigned or vector or bit lane types,
        // report that condition also.
        this.typeChar = genericElementType.getSimpleName().charAt(0);
-        assert("FDBSIL".indexOf(typeChar) == ordinal()) : this;
+        assert("FDBSILS".charAt(ordinal()) == typeChar) : this;
        this.carrierType = carrierType;
        assert(carrierType.isPrimitive());

@ -181,7 +182,8 @@ enum LaneType {
        SK_SHORT    = 4,
        SK_INT      = 5,
        SK_LONG     = 6,
-        SK_LIMIT    = 7;
+        SK_FLOAT16  = 7,
+        SK_LIMIT    = 8;

    /*package-private*/
    @ForceInline
@ -278,5 +280,6 @@ enum LaneType {
        assert(ofLaneTypeOrdinal(LT_SHORT) == SHORT);
        assert(ofLaneTypeOrdinal(LT_INT) == INT);
        assert(ofLaneTypeOrdinal(LT_LONG) == LONG);
+        assert(ofLaneTypeOrdinal(LT_FLOAT16) == FLOAT16);
    }
 }
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java
@ -4149,22 +4149,14 @@ public abstract sealed class ShortVector extends AbstractVector<Short>

    /**
     * {@inheritDoc} <!--workaround-->
-     *
-     * @implNote This method always throws
-     * {@code UnsupportedOperationException}, because there is no floating
-     * point type of the same size as {@code short}.  The return type
-     * of this method is arbitrarily designated as
-     * {@code Vector<?>}.  Future versions of this API may change the return
-     * type if additional floating point types become available.
     */
    @ForceInline
    @Override
    public final
-    Vector<?>
+    Float16Vector
    viewAsFloatingLanes() {
        LaneType flt = LaneType.SHORT.asFloating();
-        // asFloating() will throw UnsupportedOperationException for the unsupported type short
-        throw new AssertionError("Cannot reach here");
+        return (Float16Vector) asVectorRaw(flt);
    }

    // ================================================
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Vector.java
@ -200,11 +200,11 @@ import java.util.Arrays;
 * element type (such as access to element values in lanes, logical operations
 * on values of integral elements types, or transcendental operations on values
 * of floating point element types).
- * There are six abstract subclasses of Vector corresponding to the supported set
+ * There are seven abstract subclasses of Vector corresponding to the supported set
 * of element types, {@link ByteVector}, {@link ShortVector},
- * {@link IntVector}, {@link LongVector}, {@link FloatVector}, and
- * {@link DoubleVector}. Along with type-specific operations these classes
- * support creation of vector values (instances of Vector).
+ * {@link IntVector}, {@link LongVector}, {@link FloatVector},
+ * {@link DoubleVector}, and {@link Float16Vector}. Along with type-specific
+ * operations these classes support creation of vector values (instances of Vector).
 * They expose static constants corresponding to the supported species,
 * and static methods on these types generally take a species as a parameter.
 * For example,
@ -3826,6 +3826,19 @@ public abstract sealed class Vector<E> extends jdk.internal.vm.vector.VectorSupp
     */
    public abstract LongVector reinterpretAsLongs();

+    /**
+     * Reinterprets this vector as a vector of the same shape
+     * and contents but a lane type of {@code Float16},
+     * where the lanes are assembled from successive bytes
+     * according to little-endian order.
+     * It is a convenience method for the expression
+     * {@code reinterpretShape(species().withLanes(Float16.class))}.
+     * It may be considered an inverse to {@link Vector#reinterpretAsBytes()}.
+     *
+     * @return a {@code Float16Vector} with the same shape and information content
+     */
+    public abstract Float16Vector reinterpretAsFloat16s();
+
    /**
     * Reinterprets this vector as a vector of the same shape
     * and contents but a lane type of {@code float},
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorOperators.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/VectorOperators.java
@ -64,8 +64,9 @@ import static jdk.internal.vm.vector.Utils.isNonCapturingLambda;
 *
 * <li>{@code bits(x)} &mdash; a function call which produces the
 * underlying bits of the value {@code x}.  If {@code x} is a floating
- * point value, this is either {@code doubleToLongBits(x)} or
- * {@code floatToIntBits(x)}.  Otherwise, the value is just {@code x}.
+ * point value, this is {@code doubleToLongBits(x)},
+ * {@code floatToIntBits(x)}, or {@code float16ToShortBits(x)}.
+ * Otherwise, the value is just {@code x}.
 *
 * <li>{@code ESIZE} &mdash; the size in bytes of the operand type
 *
@ -73,6 +74,26 @@ import static jdk.internal.vm.vector.Utils.isNonCapturingLambda;
 *
 * <li>{@code intVal}, {@code byteVal}, etc. &mdash; the operand of a
 * conversion, with the indicated type
+ *
+ * <li id="type_letters">Single-letter type codes used in the names of
+ * {@linkplain Conversion conversion} operator tokens (for example
+ * {@link #B2D}, {@link #F2H}, {@link #H2F}, {@link #REINTERPRET_F2I},
+ * {@link #ZERO_EXTEND_B2L}) abbreviate lane types as follows:
+ * <table class="striped">
+ * <caption style="display:none">Lane type letter codes</caption>
+ * <thead>
+ * <tr><th scope="col">Letter</th><th scope="col">Lane type</th></tr>
+ * </thead>
+ * <tbody>
+ * <tr><th scope="row">{@code B}</th><td>{@code byte}</td></tr>
+ * <tr><th scope="row">{@code S}</th><td>{@code short}</td></tr>
+ * <tr><th scope="row">{@code I}</th><td>{@code int}</td></tr>
+ * <tr><th scope="row">{@code L}</th><td>{@code long}</td></tr>
+ * <tr><th scope="row">{@code F}</th><td>{@code float}</td></tr>
+ * <tr><th scope="row">{@code D}</th><td>{@code double}</td></tr>
+ * <tr><th scope="row">{@code H}</th><td>{@link Float16} ("half")</td></tr>
+ * </tbody>
+ * </table>
 * </ul>
 *
 * <h2>Operations on floating point vectors</h2>
@ -307,13 +328,13 @@ public final class VectorOperators {
     */
    public sealed interface Conversion<E,F> extends Operator {
        /**
-         * The domain of this conversion, a primitive type.
+         * The domain of this conversion, a supported lane type.
         * @return the domain of this conversion
         */
        Class<E> domainType();

        /**
-         * The range of this conversion, a primitive type.
+         * The range of this conversion, a supported lane type.
         * @return the range of this conversion
         */
        @Override
@ -657,6 +678,8 @@ public final class VectorOperators {
    public static final Conversion<Byte,Long> B2L = convert("B2L", 'C', byte.class, long.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code byteVal} to {@code (short)byteVal}. */
    public static final Conversion<Byte,Short> B2S = convert("B2S", 'C', byte.class, short.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code byteVal} to {@code (Float16)byteVal}. */
+    public static final Conversion<Byte,Float16> B2H = convert("B2H", 'C', byte.class, Float16.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code doubleVal} to {@code (byte)doubleVal}. */
    public static final Conversion<Double,Byte> D2B = convert("D2B", 'C', double.class, byte.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code doubleVal} to {@code (float)doubleVal}. */
@ -667,6 +690,8 @@ public final class VectorOperators {
    public static final Conversion<Double,Long> D2L = convert("D2L", 'C', double.class, long.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code doubleVal} to {@code (short)doubleVal}. */
    public static final Conversion<Double,Short> D2S = convert("D2S", 'C', double.class, short.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code doubleVal} to {@code (Float16)doubleVal}. */
+    public static final Conversion<Double,Float16> D2H = convert("D2H", 'C', double.class, Float16.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code floatVal} to {@code (byte)floatVal}. */
    public static final Conversion<Float,Byte> F2B = convert("F2B", 'C', float.class, byte.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code floatVal} to {@code (double)floatVal}. */
@ -677,6 +702,8 @@ public final class VectorOperators {
    public static final Conversion<Float,Long> F2L = convert("F2L", 'C', float.class, long.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code floatVal} to {@code (short)floatVal}. */
    public static final Conversion<Float,Short> F2S = convert("F2S", 'C', float.class, short.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code floatVal} to {@code (Float16)floatVal}. */
+    public static final Conversion<Float,Float16> F2H = convert("F2H", 'C', float.class, Float16.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code intVal} to {@code (byte)intVal}. */
    public static final Conversion<Integer,Byte> I2B = convert("I2B", 'C', int.class, byte.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code intVal} to {@code (double)intVal}. */
@ -687,6 +714,8 @@ public final class VectorOperators {
    public static final Conversion<Integer,Long> I2L = convert("I2L", 'C', int.class, long.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code intVal} to {@code (short)intVal}. */
    public static final Conversion<Integer,Short> I2S = convert("I2S", 'C', int.class, short.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code intVal} to {@code (Float16)intVal}. */
+    public static final Conversion<Integer,Float16> I2H = convert("I2H", 'C', int.class, Float16.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code longVal} to {@code (byte)longVal}. */
    public static final Conversion<Long,Byte> L2B = convert("L2B", 'C', long.class, byte.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code longVal} to {@code (double)longVal}. */
@ -697,6 +726,8 @@ public final class VectorOperators {
    public static final Conversion<Long,Integer> L2I = convert("L2I", 'C', long.class, int.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code longVal} to {@code (short)longVal}. */
    public static final Conversion<Long,Short> L2S = convert("L2S", 'C', long.class, short.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code longVal} to {@code (Float16)longVal}. */
+    public static final Conversion<Long,Float16> L2H = convert("L2H", 'C', long.class, Float16.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code shortVal} to {@code (byte)shortVal}. */
    public static final Conversion<Short,Byte> S2B = convert("S2B", 'C', short.class, byte.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code shortVal} to {@code (double)shortVal}. */
@ -707,6 +738,21 @@ public final class VectorOperators {
    public static final Conversion<Short,Integer> S2I = convert("S2I", 'C', short.class, int.class, VO_KIND_CAST, VO_ALL);
    /** Convert {@code shortVal} to {@code (long)shortVal}. */
    public static final Conversion<Short,Long> S2L = convert("S2L", 'C', short.class, long.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code shortVal} to {@code (Float16)shortVal}. */
+    public static final Conversion<Short,Float16> S2H = convert("S2H", 'C', short.class, Float16.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code Float16Val} to {@code (byte)Float16Val}. */
+    public static final Conversion<Float16,Byte> H2B = convert("H2B", 'C', Float16.class, byte.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code Float16Val} to {@code (short)Float16Val}. */
+    public static final Conversion<Float16,Short> H2S = convert("H2S", 'C', Float16.class, short.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code Float16Val} to {@code (double)Float16Val}. */
+    public static final Conversion<Float16,Double> H2D = convert("H2D", 'C', Float16.class, double.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code Float16Val} to {@code (float)Float16Val}. */
+    public static final Conversion<Float16,Float> H2F = convert("H2F", 'C', Float16.class, float.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code Float16Val} to {@code (int)Float16Val}. */
+    public static final Conversion<Float16,Integer> H2I = convert("H2I", 'C', Float16.class, int.class, VO_KIND_CAST, VO_ALL);
+    /** Convert {@code Float16Val} to {@code (long)Float16Val}. */
+    public static final Conversion<Float16,Long> H2L = convert("H2L", 'C', Float16.class, long.class, VO_KIND_CAST, VO_ALL);
+
    /** Reinterpret bits of {@code doubleVal} as {@code long}. As if by {@link Double#doubleToRawLongBits(double)} */
    public static final Conversion<Double,Long> REINTERPRET_D2L = convert("REINTERPRET_D2L", 'R', double.class, long.class, VO_KIND_BITWISE, VO_ALL);
    /** Reinterpret bits of {@code floatVal} as {@code int}. As if by {@link Float#floatToRawIntBits(float)} */
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template
@ -41,12 +41,65 @@ import static jdk.internal.vm.vector.VectorSupport.*;
 import static jdk.incubator.vector.VectorIntrinsics.*;

 import static jdk.incubator.vector.VectorOperators.*;
+#if[FP16]
+import jdk.incubator.vector.Float16;
+import static jdk.incubator.vector.Float16.*;
+import static java.lang.Float.*;
+#end[FP16]

 #warn This file is preprocessed before being compiled

 /**
 * A specialized {@link Vector} representing an ordered immutable sequence of
+#if[FP16]
+ * 16-bit data values in the IEEE 754 binary16 format.
+ * <p>
+ * The scalar {@linkplain Float16Vector#elementType() element type} of {@code Float16Vector}
+ * is the class {@link Float16}, a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
+ * class holding 16-bit data in IEEE 754 binary16 format. However, the {@code Float16}
+ * class is not used by vector operations that accept scalar element values, or
+ * arrays of scalar element values. Instead, the primitive type {@code short} is
+ * used to explicitly hold 16-bit data in IEEE 754 binary16 format. For such operations
+ * it may be necessary to explicitly convert between floating-point values of {@code Float16}
+ * or {@code float} and values of {@code short} using the appropriate conversion
+ * methods on {@code Float16} or {@code Float}.
+ *
+ * <p>
+ * The specifications for operations on elements of this class are written as if
+ * {@code Float16} is a primitive floating-point type. An operation referencing a
+ * Java operator is mapped to a method on {@code Float16} that specifies that
+ * operator's semantics. For example, the semantics of the {@code +} operator,
+ * as referenced by {@link Vector#add(Vector)} and {@link VectorOperators#ADD},
+ * is mapped to the method {@link Float16#add(Float16, Float16)}.
+ * An operation referencing a method on {@link Math} is mapped to a method of the
+ * same name on {@code Float16}, if it exists. For example, {@link Math#fma} is
+ * mapped to {@link Float16#fma}, as referenced by {@link Float16Vector#fma(short, short)}
+ * and {@link VectorOperators#FMA}.
+ * Otherwise, if there is no equivalent method on {@code Float16}, the expression that is
+ * an invocation of a method on {@code Math} is mapped to an expression that converts
+ * the {@code Float16} arguments to {@code double} values or {@code float} values as
+ * required by the method's parameter types, invokes the method on {@code Math} with
+ * the converted values, and converts the resulting {@code double} or {@code float} value
+ * to a {@code Float16} value. For example, {@link Math#sin} is mapped to the expression
+ * {@code Float16.valueOf(Math.sin(a.doubleValue()))}, where {@code a} is the
+ * {@code Float16} lane value, as referenced by {@link VectorOperators#SIN}.
+ *
+ * @apiNote
+ * {@code Float16} is currently a value-based class and therefore cannot be optimally
+ * used as the scalar element type of vector operations until it becomes a value class
+ * that behaves similarly to the primitive type {@code short} and to arrays of {@code short}.
+ * For example, accessing {@code Float16} vectors using arrays requires those arrays be
+ * {@code short[]} arrays. Accessing vectors using memory segments requires, naturally,
+ * that consecutive 16-bits of memory hold 16-bit data values in the IEEE 754 binary16
+ * format.
+ * @see Float16
+ * @see Float16#float16ToRawShortBits(Float16)
+ * @see Float16#shortBitsToFloat16(short)
+ * @see Float#floatToFloat16(float)
+ * @see Float#float16ToFloat(short)
+#else[FP16]
 * {@code $type$} values.
+#end[FP16]
 */
@SuppressWarnings("cast")  // warning: redundant cast
 public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtype$>
@ -62,7 +115,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    static final int FORBID_OPCODE_KIND = VO_ONLYFP;
 #end[FP]

-    static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);
+    static final ValueLayout.Of$ElemLayout$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);

    static final int LANE_TYPE_ORDINAL = $laneType$;

@ -158,7 +211,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /*package-private*/
    interface FUnOp {
-        $type$ apply(int i, $type$ a);
+        $fallbacktype$ apply(int i, $fallbacktype$ a);
    }

    /*package-private*/
@ -170,7 +223,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $type$[] vec = vec();
        $type$[] res = new $type$[length()];
        for (int i = 0; i < res.length; i++) {
+#if[FP16]
+            res[i] = floatToFloat16(f.apply(i, float16ToFloat(vec[i])));
+#else[FP16]
            res[i] = f.apply(i, vec[i]);
+#end[FP16]
        }
        return vectorFactory(res);
    }
@ -190,16 +247,60 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $type$[] res = new $type$[length()];
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
+#if[FP16]
+            res[i] = mbits[i] ? floatToFloat16(f.apply(i, float16ToFloat(vec[i]))) : vec[i];
+#else[FP16]
            res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
+#end[FP16]
        }
        return vectorFactory(res);
    }

+#if[FP16]
+    /*package-private*/
+    interface FUnRawOp {
+        $type$ apply(int i, $type$ a);
+    }
+
+    /*package-private*/
+    abstract
+    $abstractvectortype$ uRawOp(FUnRawOp f);
+    @ForceInline
+    final
+    $abstractvectortype$ uRawOpTemplate(FUnRawOp f) {
+        $type$[] vec = vec();
+        $type$[] res = new $type$[length()];
+        for (int i = 0; i < res.length; i++) {
+            res[i] = f.apply(i, vec[i]);
+        }
+        return vectorFactory(res);
+    }
+
+    /*package-private*/
+    abstract
+    $abstractvectortype$ uRawOp(VectorMask<$Boxtype$> m,
+                             FUnRawOp f);
+    @ForceInline
+    final
+    $abstractvectortype$ uRawOpTemplate(VectorMask<$Boxtype$> m,
+                                     FUnRawOp f) {
+        if (m == null) {
+            return uRawOpTemplate(f);
+        }
+        $type$[] vec = vec();
+        $type$[] res = new $type$[length()];
+        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
+        for (int i = 0; i < res.length; i++) {
+            res[i] = mbits[i] ? f.apply(i, vec[i]) : vec[i];
+        }
+        return vectorFactory(res);
+    }
+#end[FP16]
    // Binary operator

    /*package-private*/
    interface FBinOp {
-        $type$ apply(int i, $type$ a, $type$ b);
+        $fallbacktype$ apply(int i, $fallbacktype$ a, $fallbacktype$ b);
    }

    /*package-private*/
@ -214,7 +315,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $type$[] vec1 = this.vec();
        $type$[] vec2 = (($abstractvectortype$)o).vec();
        for (int i = 0; i < res.length; i++) {
+#if[FP16]
+            res[i] = floatToFloat16(f.apply(i, float16ToFloat(vec1[i]), float16ToFloat(vec2[i])));
+#else[FP16]
            res[i] = f.apply(i, vec1[i], vec2[i]);
+#end[FP16]
        }
        return vectorFactory(res);
    }
@ -237,7 +342,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $type$[] vec2 = (($abstractvectortype$)o).vec();
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < res.length; i++) {
+#if[FP16]
+            res[i] = mbits[i] ? floatToFloat16(f.apply(i, float16ToFloat(vec1[i]), float16ToFloat(vec2[i]))) : vec1[i];
+#else[FP16]
            res[i] = mbits[i] ? f.apply(i, vec1[i], vec2[i]) : vec1[i];
+#end[FP16]
        }
        return vectorFactory(res);
    }
@ -310,7 +419,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $type$[] vec = vec();
        boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits();
        for (int i = 0; i < vec.length; i++) {
+#if[FP16]
+            v = mbits[i] ? floatToFloat16(f.apply(i, float16ToFloat(v), float16ToFloat(vec[i]))) : v;
+#else[FP16]
            v = mbits[i] ? f.apply(i, v, vec[i]) : v;
+#end[FP16]
        }
        return v;
    }
@ -320,7 +433,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    $type$ rOpTemplate($type$ v, FBinOp f) {
        $type$[] vec = vec();
        for (int i = 0; i < vec.length; i++) {
+#if[FP16]
+            v = floatToFloat16(f.apply(i, float16ToFloat(v), float16ToFloat(vec[i])));
+#else[FP16]
            v = f.apply(i, v, vec[i]);
+#end[FP16]
        }
        return v;
    }
@ -516,13 +633,21 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    /*package-private*/
    @ForceInline
    static long toBits($type$ e) {
+#if[FP16]
+        return e;
+#else[FP16]
        return {#if[FP]? $Type$.$type$ToRaw$Bitstype$Bits(e): e};
+#end[FP16]
    }

    /*package-private*/
    @ForceInline
    static $type$ fromBits(long bits) {
+#if[FP16]
+        return (short)bits;
+#else[FP16]
        return {#if[FP]?$Type$.$bitstype$BitsTo$Type$}(($bitstype$)bits);
+#end[FP16]
    }

    static $abstractvectortype$ expandHelper(Vector<$Boxtype$> v, VectorMask<$Boxtype$> m) {
@ -562,7 +687,12 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $type$[] vecPayload2 = (($abstractvectortype$)src1).vec();
        $type$[] vecPayload3 = (($abstractvectortype$)src2).vec();
        for (int i = 0; i < vlen; i++) {
+#if[FP16]
+            int index = shortBitsToFloat16(vecPayload1[i]).intValue();
+            int wrapped_index = VectorIntrinsics.wrapToRange(index, 2 * vlen);
+#else[FP16]
            int wrapped_index = VectorIntrinsics.wrapToRange((int)vecPayload1[i], 2 * vlen);
+#end[FP16]
            res[i] = wrapped_index >= vlen ? vecPayload3[wrapped_index - vlen] : vecPayload2[wrapped_index];
        }
        return (($abstractvectortype$)src1).vectorFactory(res);
@ -594,7 +724,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $Type$Species vsp = ($Type$Species) species;
 #if[FP]
        return VectorSupport.fromBitsCoerced(vsp.vectorType(), LANE_TYPE_ORDINAL, species.length(),
-                        toBits(0.0f), MODE_BROADCAST, vsp,
+                        toBits({#if[FP16]?(short) 0:0.0f}), MODE_BROADCAST, vsp,
                        ((bits_, s_) -> s_.rvOp(i -> bits_)));
 #else[FP]
        return VectorSupport.fromBitsCoerced(vsp.vectorType(), LANE_TYPE_ORDINAL, species.length(),
@ -784,27 +914,32 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    private static UnaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> unaryOperations(int opc_) {
        switch (opc_) {
+#if[FP16]
            case VECTOR_OP_NEG: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) -a);
+                    v0.uOp(m, (i, a) -> Float16.negate(Float16.valueOf(a)).floatValue());
+#else[FP16]
+            case VECTOR_OP_NEG: return (v0, m) ->
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) -a);
+#end[FP16]
            case VECTOR_OP_ABS: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.abs(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.abs(a));
 #if[!FP]
 #if[intOrLong]
            case VECTOR_OP_BIT_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.bitCount(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.bitCount(a));
            case VECTOR_OP_TZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfTrailingZeros(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.numberOfTrailingZeros(a));
            case VECTOR_OP_LZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.numberOfLeadingZeros(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.numberOfLeadingZeros(a));
            case VECTOR_OP_REVERSE: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverse(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.reverse(a));
 #else[intOrLong]
            case VECTOR_OP_BIT_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) bitCount(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) bitCount(a));
            case VECTOR_OP_TZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) numberOfTrailingZeros(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) numberOfTrailingZeros(a));
            case VECTOR_OP_LZ_COUNT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) numberOfLeadingZeros(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) numberOfLeadingZeros(a));
            case VECTOR_OP_REVERSE: return (v0, m) ->
                    v0.uOp(m, (i, a) -> reverse(a));
 #end[intOrLong]
@ -814,43 +949,47 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                    v0.uOp(m, (i, a) -> a);
 #else[byte]
            case VECTOR_OP_REVERSE_BYTES: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) $Boxtype$.reverseBytes(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) $Boxtype$.reverseBytes(a));
 #end[byte]
 #end[BITWISE]
 #end[!FP]
 #if[FP]
            case VECTOR_OP_SIN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.sin(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.sin(a));
            case VECTOR_OP_COS: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.cos(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.cos(a));
            case VECTOR_OP_TAN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.tan(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.tan(a));
            case VECTOR_OP_ASIN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.asin(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.asin(a));
            case VECTOR_OP_ACOS: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.acos(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.acos(a));
            case VECTOR_OP_ATAN: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.atan(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.atan(a));
            case VECTOR_OP_EXP: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.exp(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.exp(a));
            case VECTOR_OP_LOG: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.log(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.log(a));
            case VECTOR_OP_LOG10: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.log10(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.log10(a));
            case VECTOR_OP_SQRT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.sqrt(a));
+#if[FP16]
+                    v0.uOp(m, (i, a) -> Float16.sqrt(Float16.valueOf(a)).floatValue());
+#else[FP16]
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.sqrt(a));
+#end[FP16]
            case VECTOR_OP_CBRT: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.cbrt(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.cbrt(a));
            case VECTOR_OP_SINH: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.sinh(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.sinh(a));
            case VECTOR_OP_COSH: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.cosh(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.cosh(a));
            case VECTOR_OP_TANH: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.tanh(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.tanh(a));
            case VECTOR_OP_EXPM1: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.expm1(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.expm1(a));
            case VECTOR_OP_LOG1P: return (v0, m) ->
-                    v0.uOp(m, (i, a) -> ($type$) Math.log1p(a));
+                    v0.uOp(m, (i, a) -> ($fallbacktype$) Math.log1p(a));
 #end[FP]
            default: return null;
        }
@ -996,46 +1135,46 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    private static BinaryOperation<$abstractvectortype$, VectorMask<$Boxtype$>> binaryOperations(int opc_) {
        switch (opc_) {
            case VECTOR_OP_ADD: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a + b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a + b));
            case VECTOR_OP_SUB: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a - b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a - b));
            case VECTOR_OP_MUL: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a * b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a * b));
            case VECTOR_OP_DIV: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a / b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a / b));
            case VECTOR_OP_MAX: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.max(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)Math.max(a, b));
            case VECTOR_OP_MIN: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)Math.min(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)Math.min(a, b));
 #if[BITWISE]
            case VECTOR_OP_AND: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a & b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a & b));
            case VECTOR_OP_OR: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a | b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a | b));
            case VECTOR_OP_XOR: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(a ^ b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(a ^ b));
            case VECTOR_OP_LSHIFT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, n) -> ($type$)(a << n));
+                    v0.bOp(v1, vm, (i, a, n) -> ($fallbacktype$)(a << n));
            case VECTOR_OP_RSHIFT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, n) -> ($type$)(a >> n));
+                    v0.bOp(v1, vm, (i, a, n) -> ($fallbacktype$)(a >> n));
            case VECTOR_OP_URSHIFT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, n) -> ($type$)((a & LSHR_SETUP_MASK) >>> n));
+                    v0.bOp(v1, vm, (i, a, n) -> ($fallbacktype$)((a & LSHR_SETUP_MASK) >>> n));
            case VECTOR_OP_LROTATE: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> rotateLeft(a, (int)n));
            case VECTOR_OP_RROTATE: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> rotateRight(a, (int)n));
            case VECTOR_OP_UMAX: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)VectorMath.maxUnsigned(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)VectorMath.maxUnsigned(a, b));
            case VECTOR_OP_UMIN: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)VectorMath.minUnsigned(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)VectorMath.minUnsigned(a, b));
            case VECTOR_OP_SADD: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.addSaturating(a, b)));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.addSaturating(a, b)));
            case VECTOR_OP_SSUB: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.subSaturating(a, b)));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.subSaturating(a, b)));
            case VECTOR_OP_SUADD: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.addSaturatingUnsigned(a, b)));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.addSaturatingUnsigned(a, b)));
            case VECTOR_OP_SUSUB: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$)(VectorMath.subSaturatingUnsigned(a, b)));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$)(VectorMath.subSaturatingUnsigned(a, b)));
 #if[intOrLong]
            case VECTOR_OP_COMPRESS_BITS: return (v0, v1, vm) ->
                    v0.bOp(v1, vm, (i, a, n) -> $Boxtype$.compress(a, n));
@ -1045,13 +1184,17 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
 #end[BITWISE]
 #if[FP]
            case VECTOR_OP_OR: return (v0, v1, vm) ->
+#if[FP16]
+                    v0.bOp(v1, vm, (i, a, b) -> FloatVector.fromBits(FloatVector.toBits(a) | FloatVector.toBits(b)));
+#else[FP16]
                    v0.bOp(v1, vm, (i, a, b) -> fromBits(toBits(a) | toBits(b)));
+#end[FP16]
            case VECTOR_OP_ATAN2: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.atan2(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$) Math.atan2(a, b));
            case VECTOR_OP_POW: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.pow(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$) Math.pow(a, b));
            case VECTOR_OP_HYPOT: return (v0, v1, vm) ->
-                    v0.bOp(v1, vm, (i, a, b) -> ($type$) Math.hypot(a, b));
+                    v0.bOp(v1, vm, (i, a, b) -> ($fallbacktype$) Math.hypot(a, b));
 #end[FP]
            default: return null;
        }
@ -1147,13 +1290,13 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    public final
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  long e) {
-        $type$ e1 = ($type$) e;
+        $type$ e1 = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(e)):($type$) e};
 #if[BITWISE]
        if ((long)e1 != e
            // allow shift ops to clip down their int parameters
            && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 #else[BITWISE]
-        if ((long)e1 != e) {
+        if ({#if[FP16]?shortBitsToFloat16(e1).longValue():(long)e1} != e) {
 #end[BITWISE]
            vspecies().checkValue(e);  // for exception
        }
@ -1174,13 +1317,13 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    public final
    $abstractvectortype$ lanewise(VectorOperators.Binary op,
                                  long e, VectorMask<$Boxtype$> m) {
-        $type$ e1 = ($type$) e;
+        $type$ e1 = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(e)):($type$) e};
 #if[BITWISE]
        if ((long)e1 != e
            // allow shift ops to clip down their int parameters
            && !(opKind(op, VO_SHIFT) && (int)e1 == e)) {
 #else[BITWISE]
-        if ((long)e1 != e) {
+        if ({#if[FP16]?shortBitsToFloat16(e1).longValue():(long)e1} != e) {
 #end[BITWISE]
            vspecies().checkValue(e);  // for exception
        }
@ -1255,12 +1398,12 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    // since our lane types are first-class types, not just dressed
    // up ints.
    private static final int SHIFT_MASK = ($Boxtype$.SIZE - 1);
-#if[byteOrShort]
+#if[byteOrStrictShort]
    // Also simulate >>> on sub-word variables with a mask.
    private static final int LSHR_SETUP_MASK = ((1 << $Boxtype$.SIZE) - 1);
-#else[byteOrShort]
+#else[byteOrStrictShort]
    private static final $type$ LSHR_SETUP_MASK = -1;
-#end[byteOrShort]
+#end[byteOrStrictShort]
 #end[BITWISE]

    // Ternary lanewise support
@ -1363,7 +1506,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        switch (opc_) {
 #if[FP]
            case VECTOR_OP_FMA: return (v0, v1_, v2_, m) ->
+#if[FP16]
+                    v0.tOp(v1_, v2_, m, (i, a, b, c) -> float16ToRawShortBits(Float16.fma(shortBitsToFloat16(a), shortBitsToFloat16(b), shortBitsToFloat16(c))));
+#else[FP16]
                    v0.tOp(v1_, v2_, m, (i, a, b, c) -> Math.fma(a, b, c));
+#end[FP16]
 #end[FP]
            default: return null;
        }
@ -2392,7 +2539,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                // first kill the sign:
                bits = bits.and($Boxbitstype$.MAX_VALUE);
                // next find the bit pattern for infinity:
-                $bitstype$ infbits = ($bitstype$) toBits($Boxtype$.POSITIVE_INFINITY);
+                $bitstype$ infbits = ($bitstype$) toBits({#if[FP16]?float16ToRawShortBits($Boxtype$.POSITIVE_INFINITY):$Boxtype$.POSITIVE_INFINITY});
                // now compare:
                if (op == IS_FINITE) {
                    m = bits.compare(LT, infbits);
@ -2446,7 +2593,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                // first kill the sign:
                bits = bits.and($Boxbitstype$.MAX_VALUE);
                // next find the bit pattern for infinity:
-                $bitstype$ infbits = ($bitstype$) toBits($Boxtype$.POSITIVE_INFINITY);
+                $bitstype$ infbits = ($bitstype$) toBits({#if[FP16]?float16ToRawShortBits($Boxtype$.POSITIVE_INFINITY):$Boxtype$.POSITIVE_INFINITY});
                // now compare:
                if (op == IS_FINITE) {
                    m = bits.compare(LT, infbits, m);
@ -2517,14 +2664,23 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    }

    @ForceInline
-    private static boolean compareWithOp(int cond, $type$ a, $type$ b) {
+    private static boolean compareWithOp(int cond, $carriertype$ a, $carriertype$ b) {
        return switch (cond) {
+#if[FP16]
+            case BT_eq -> Float.float16ToFloat(a) == Float.float16ToFloat(b);
+            case BT_ne -> Float.float16ToFloat(a) != Float.float16ToFloat(b);
+            case BT_lt -> Float.float16ToFloat(a) < Float.float16ToFloat(b);
+            case BT_le -> Float.float16ToFloat(a) <= Float.float16ToFloat(b);
+            case BT_gt -> Float.float16ToFloat(a) > Float.float16ToFloat(b);
+            case BT_ge -> Float.float16ToFloat(a) >= Float.float16ToFloat(b);
+#else[FP16]
            case BT_eq -> a == b;
            case BT_ne -> a != b;
            case BT_lt -> a < b;
            case BT_le -> a <= b;
            case BT_gt -> a > b;
            case BT_ge -> a >= b;
+#end[FP16]
 #if[!FP]
            case BT_ult -> $Boxtype$.compareUnsigned(a, b) < 0;
            case BT_ule -> $Boxtype$.compareUnsigned(a, b) <= 0;
@ -2665,7 +2821,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                // and multiply.
                $abstractvectortype$ iota = s.iota();
                $type$ sc = ($type$) scale_;
-                return v.add(sc == 1 ? iota : iota.mul(sc));
+                return v.add(sc == 1 ? iota : iota.mul({#if[FP16]?float16ToRawShortBits(Float16.valueOf(sc)):sc}));
            });
    }

@ -2875,7 +3031,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        return VectorSupport.rearrangeOp(
            getClass(), shuffletype, null, laneTypeOrdinal(), length(),
            this, shuffle, null,
-            (v1, s_, m_) -> v1.uOp((i, a) -> {
+            (v1, s_, m_) -> v1.{#if[FP16]?uRawOp:uOp}((i, a) -> {
                int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
                return v1.lane(ei);
            }));
@ -2902,7 +3058,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        return VectorSupport.rearrangeOp(
                   getClass(), shuffletype, masktype, laneTypeOrdinal(), length(),
                   this, shuffle, m,
-                   (v1, s_, m_) -> v1.uOp((i, a) -> {
+                   (v1, s_, m_) -> v1.{#if[FP16]?uRawOp:uOp}((i, a) -> {
                        int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
                        return !m_.laneIsSet(i) ? 0 : v1.lane(ei);
                   }));
@ -2928,7 +3084,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
            VectorSupport.rearrangeOp(
                getClass(), shuffletype, null, laneTypeOrdinal(), length(),
                this, shuffle, null,
-                (v0, s_, m_) -> v0.uOp((i, a) -> {
+                (v0, s_, m_) -> v0.{#if[FP16]?uRawOp:uOp}((i, a) -> {
                    int ei = Integer.remainderUnsigned(s_.laneSource(i), v0.length());
                    return v0.lane(ei);
                }));
@ -2936,7 +3092,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
            VectorSupport.rearrangeOp(
                getClass(), shuffletype, null, laneTypeOrdinal(), length(),
                v, shuffle, null,
-                (v1, s_, m_) -> v1.uOp((i, a) -> {
+                (v1, s_, m_) -> v1.{#if[FP16]?uRawOp:uOp}((i, a) -> {
                    int ei = Integer.remainderUnsigned(s_.laneSource(i), v1.length());
                    return v1.lane(ei);
                }));
@ -2963,6 +3119,9 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    final <F>
    VectorShuffle<F> toShuffle(AbstractSpecies<F> dsp, boolean wrap) {
        assert(dsp.elementSize() == vspecies().elementSize());
+#if[FP16]
+        ShortVector idx = convert(VectorOperators.H2S, 0).reinterpretAsShorts();
+#end[FP16]
 #if[float]
        IntVector idx = convert(VectorOperators.F2I, 0).reinterpretAsInts();
 #end[float]
@ -2983,7 +3142,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /**
     * {@inheritDoc} <!--workaround-->
-     * @since 19
+     * @since {#if[FP16]?27:19}
     */
    @Override
    public abstract
@ -3002,7 +3161,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /**
     * {@inheritDoc} <!--workaround-->
-     * @since 19
+     * @since {#if[FP16]?27:19}
     */
    @Override
    public abstract
@ -3199,7 +3358,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
     *
     * This is a lane-wise ternary operation which applies an operation
     * conforming to the specification of
-     * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
+     * {@link Math#fma($fallbacktype$,$fallbacktype$,$fallbacktype$) Math.fma(a,b,c)}
     * to each lane.
 #if[intOrFloat]
     * The operation is adapted to cast the operands and the result,
@ -3240,7 +3399,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
     *
     * This is a lane-wise ternary operation which applies an operation
     * conforming to the specification of
-     * {@link Math#fma($type$,$type$,$type$) Math.fma(a,b,c)}
+     * {@link Math#fma($fallbacktype$,$fallbacktype$,$fallbacktype$) Math.fma(a,b,c)}
     * to each lane.
 #if[intOrFloat]
     * The operation is adapted to cast the operands and the result,
@ -3447,13 +3606,13 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) {
        switch (opc_) {
            case VECTOR_OP_ADD: return (v, m) ->
-                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b)));
+                    toBits(v.rOp(($type$)0, m, (i, a, b) -> ($fallbacktype$)(a + b)));
            case VECTOR_OP_MUL: return (v, m) ->
-                    toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b)));
+                    toBits(v.rOp(($type$){#if[FP16]?floatToFloat16(1.0f):1}, m, (i, a, b) -> ($fallbacktype$)(a * b)));
            case VECTOR_OP_MIN: return (v, m) ->
-                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b)));
+                    toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($fallbacktype$) Math.min(a, b)));
            case VECTOR_OP_MAX: return (v, m) ->
-                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b)));
+                    toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($fallbacktype$) Math.max(a, b)));
 #if[!FP]
            case VECTOR_OP_UMIN: return (v, m) ->
                    toBits(v.rOp(UMAX_VALUE, m, (i, a, b) -> ($type$) VectorMath.minUnsigned(a, b)));
@ -3475,8 +3634,8 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    }

 #if[FP]
-    private static final $type$ MIN_OR_INF = $Boxtype$.NEGATIVE_INFINITY;
-    private static final $type$ MAX_OR_INF = $Boxtype$.POSITIVE_INFINITY;
+    private static final $type$ MIN_OR_INF = {#if[FP16]?float16ToRawShortBits($Boxtype$.NEGATIVE_INFINITY):$Boxtype$.NEGATIVE_INFINITY};
+    private static final $type$ MAX_OR_INF = {#if[FP16]?float16ToRawShortBits($Boxtype$.POSITIVE_INFINITY):$Boxtype$.POSITIVE_INFINITY};
 #else[FP]
    private static final $type$ MIN_OR_INF = $Boxtype$.MIN_VALUE;
    private static final $type$ MAX_OR_INF = $Boxtype$.MAX_VALUE;
@ -3656,14 +3815,18 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $type$[] a = toArray();
        double[] res = new double[a.length];
        for (int i = 0; i < a.length; i++) {
-            res[i] = (double) a[i];
+            res[i] = (double) {#if[FP16]?shortBitsToFloat16(a[i]).doubleValue():a[i]};
        }
        return res;
    }
 #end[double]

    /**
+#if[FP16]
+     * Loads a vector from an array of type {@code $type$[]} holding IEEE 754 binary16 values
+#else[FP16]
     * Loads a vector from an array of type {@code $type$[]}
+#end[FP16]
     * starting at an offset.
     * For each vector lane, where {@code N} is the vector lane index, the
     * array element at index {@code offset + N} is placed into the
@ -3687,7 +3850,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    }

    /**
+#if[FP16]
+     * Loads a vector from an array of type {@code $type$[]} holding IEEE 754 binary16 values
+#else[FP16]
     * Loads a vector from an array of type {@code $type$[]}
+#end[FP16]
     * starting at an offset and using a mask.
     * Lanes where the mask is unset are filled with the default
     * value of {@code $type$} ({#if[FP]?positive }zero).
@ -3724,7 +3891,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /**
     * Gathers a new vector composed of elements from an array of type
+#if[FP16]
+     * {@code $type$[]} holding IEEE 754 binary16 values,
+#else[FP16]
     * {@code $type$[]},
+#end[FP16]
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
     * The index map is a contiguous sequence of {@code VLENGTH}
@ -3869,7 +4040,11 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /**
     * Gathers a new vector composed of elements from an array of type
+#if[FP16]
+     * {@code $type$[]} holding IEEE 754 binary16 values,
+#else[FP16]
     * {@code $type$[]},
+#end[FP16]
     * under the control of a mask, and
     * using indexes obtained by adding a fixed {@code offset} to a
     * series of secondary offsets from an <em>index map</em>.
@ -3918,7 +4093,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        }
    }

-#if[short]
+#if[strictShort]
    /**
     * Loads a vector from an array of type {@code char[]}
     * starting at an offset.
@ -4069,7 +4244,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        $Type$Species vsp = ($Type$Species) species;
        return vsp.vOp(m, n -> (short) a[offset + indexMap[mapOffset + n]]);
    }
-#end[short]
+#end[strictShort]

 #if[byte]
    /**
@ -4258,7 +4433,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
     *         for any lane {@code N} in the vector
     * @throws IllegalStateException if the memory segment's session is not alive,
     *         or if access occurs from a thread other than the thread owning the session.
-     * @since 19
+     * @since {#if[FP16]?27:19}
     */
    @ForceInline
    public static
@ -4317,7 +4492,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
     *         where the mask is set
     * @throws IllegalStateException if the memory segment's session is not alive,
     *         or if access occurs from a thread other than the thread owning the session.
-     * @since 19
+     * @since {#if[FP16]?27:19}
     */
    @ForceInline
    public static
@ -4555,7 +4730,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    }
 #end[byteOrShort]

-#if[short]
+#if[strictShort]
    /**
     * Stores this vector into an array of type {@code char[]}
     * starting at an offset.
@ -4711,7 +4886,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                 arr[off + j] = (char) e;
             });
    }
-#end[short]
+#end[strictShort]

 #if[byte]
    /**
@ -4886,7 +5061,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /**
     * {@inheritDoc} <!--workaround-->
-     * @since 19
+     * @since {#if[FP16]?27:19}
     */
    @Override
    @ForceInline
@ -4903,7 +5078,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /**
     * {@inheritDoc} <!--workaround-->
-     * @since 19
+     * @since {#if[FP16]?27:19}
     */
    @Override
    @ForceInline
@ -5100,7 +5275,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    }
 #end[byteOrShort]

-#if[short]
+#if[strictShort]
    /*package-private*/
    abstract
    $abstractvectortype$ fromCharArray0(char[] a, int offset);
@ -5132,7 +5307,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                (arr, off, s, vm) -> s.ldOp(arr, (int) off, vm,
                                            (arr_, off_, i) -> (short) arr_[off_ + i]));
    }
-#end[short]
+#end[strictShort]

 #if[byte]
    /*package-private*/
@ -5346,7 +5521,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                });
    }

-#if[short]
+#if[strictShort]
    /*package-private*/
    abstract
    void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m);
@ -5364,7 +5539,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
            -> v.stOp(arr, (int) off, vm,
                      (arr_, off_, i, e) -> arr_[off_ + i] = (char) e));
    }
-#end[short]
+#end[strictShort]

    // End of low-level memory operations.

@ -5423,7 +5598,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        return ARRAY_BASE + (((long)index) << ARRAY_SHIFT);
    }

-#if[short]
+#if[strictShort]
    static final int ARRAY_CHAR_SHIFT =
            31 - Integer.numberOfLeadingZeros(Unsafe.ARRAY_CHAR_INDEX_SCALE);
    static final long ARRAY_CHAR_BASE =
@ -5433,7 +5608,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    static long charArrayAddress(char[] a, int index) {
        return ARRAY_CHAR_BASE + (((long)index) << ARRAY_CHAR_SHIFT);
    }
-#end[short]
+#end[strictShort]

 #if[byte]
    static final int ARRAY_BOOLEAN_SHIFT =
@ -5490,7 +5665,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp

    /**
     * {@inheritDoc} <!--workaround-->
-#if[byteOrShort]
+#if[byte]
     *
     * @implNote This method always throws
     * {@code UnsupportedOperationException}, because there is no floating
@ -5498,23 +5673,27 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
     * of this method is arbitrarily designated as
     * {@code Vector<?>}.  Future versions of this API may change the return
     * type if additional floating point types become available.
-#end[byteOrShort]
+#end[byte]
     */
    @ForceInline
    @Override
    public final
-    {#if[byteOrShort]?Vector<?>:$Fptype$Vector}
+#if[FP16]
+    $Type$Vector
+#else[FP16]
+    {#if[byte]?Vector<?>:$Fptype$Vector}
+#end[FP16]
    viewAsFloatingLanes() {
 #if[FP]
        return this;
 #else[FP]
        LaneType flt = LaneType.$TYPE$.asFloating();
-#if[!byteOrShort]
+#if[!byte]
        return ($Fptype$Vector) asVectorRaw(flt);
-#else[!byteOrShort]
+#else[!byte]
        // asFloating() will throw UnsupportedOperationException for the unsupported type $type$
        throw new AssertionError("Cannot reach here");
-#end[!byteOrShort]
+#end[!byte]
 #end[FP]
    }

@ -5588,7 +5767,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
                Class<? extends AbstractMask<$Boxtype$>> maskType,
                Class<? extends AbstractShuffle<$Boxtype$>> shuffleType,
                Function<Object, $abstractvectortype$> vectorFactory) {
-            super(shape, LaneType.of($type$.class),
+            super(shape, LaneType.of($elemtype$.class),
                  vectorType, maskType, shuffleType,
                  vectorFactory);
            assert(this.elementSize() == $Boxtype$.SIZE);
@ -5599,7 +5778,7 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        @Override
        @ForceInline
        public final Class<$Boxtype$> elementType() {
-            return $type$.class;
+            return $elemtype$.class;
        }

        @Override
@ -5656,8 +5835,8 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
            return value;
 #else[long]
            // Do the conversion, and then test it for failure.
-            $type$ e = ($type$) value;
-            if ((long) e != value) {
+            $type$ e = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(value)):($type$) value};
+            if ({#if[FP16]?shortBitsToFloat16(e).longValue():(long) e} != value) {
                throw badElementBits(value, e);
            }
            return toBits(e);
@ -5667,10 +5846,18 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
        /*package-private*/
        @ForceInline
        static long toIntegralChecked($type$ e, boolean convertToInt) {
+#if[FP16]
+            float ef = shortBitsToFloat16(e).floatValue();
+            long value = convertToInt ? (int) ef : (long) ef;
+            if ((float) value != ef) {
+                throw badArrayBits(e, convertToInt, value);
+            }
+#else[FP16]
            long value = convertToInt ? (int) e : (long) e;
            if (($type$) value != e) {
                throw badArrayBits(e, convertToInt, value);
            }
+#end[FP16]
            return value;
        }

@ -5682,11 +5869,19 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
            $type$[] va = new $type$[laneCount()];
            for (int i = 0; i < va.length; i++) {
                int lv = values[i];
+#if[FP16]
+                $type$ v = float16ToRawShortBits(Float16.valueOf(lv));
+                va[i] = v;
+                if (Float16.valueOf(lv).intValue() != lv) {
+                    throw badElementBits(lv, v);
+                }
+#else[FP16]
                $type$ v = ($type$) lv;
                va[i] = v;
                if ((int)v != lv) {
                    throw badElementBits(lv, v);
                }
+#end[FP16]
            }
            return dummyVector().fromArray0(va, 0);
        }
@ -5859,10 +6054,17 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
    }

    /**
+#if[FP16]
+     * Finds a species for an element type of {@code $elemtype$} and shape.
+     *
+     * @param s the shape
+     * @return a species for an element type of {@code $elemtype$} and shape
+#else[FP16]
     * Finds a species for an element type of {@code $type$} and shape.
     *
     * @param s the shape
     * @return a species for an element type of {@code $type$} and shape
+#end[FP16]
     * @throws IllegalArgumentException if no such species exists for the shape
     */
    static $Type$Species species(VectorShape s) {
@ -5922,6 +6124,6 @@ public abstract sealed class $abstractvectortype$ extends AbstractVector<$Boxtyp
     * A preferred species is a species of maximal bit-size for the platform.
     */
    public static final VectorSpecies<$Boxtype$> SPECIES_PREFERRED
-        = ($Type$Species) VectorSpecies.ofPreferred($type$.class);
+        = ($Type$Species) VectorSpecies.ofPreferred($elemtype$.class);
 }

--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template
@ -59,7 +59,7 @@ final class $vectortype$ extends $abstractvectortype$ {

    static final Class<$Carriertype$> CTYPE = $carriertype$.class; // carrier type used by the JVM

-    static final Class<$Boxtype$> ETYPE = $type$.class; // used by the JVM
+    static final Class<$Boxtype$> ETYPE = $elemtype$.class; // used by the JVM

    $vectortype$($type$[] v) {
        super(v);
@ -95,7 +95,7 @@ final class $vectortype$ extends $abstractvectortype$ {

    @ForceInline
    @Override
-    public final Class<$Boxtype$> elementType() { return $type$.class; }
+    public final Class<$Boxtype$> elementType() { return $elemtype$.class; }

    @ForceInline
    final Class<$Carriertype$> carrierType() { return CTYPE; }
@ -216,6 +216,20 @@ final class $vectortype$ extends $abstractvectortype$ {
            super.uOpTemplate(($masktype$)m, f);  // specialize
    }

+#if[FP16]
+    @ForceInline
+    final @Override
+    $vectortype$ uRawOp(FUnRawOp f) {
+        return ($vectortype$) super.uRawOpTemplate(f);  // specialize
+    }
+
+    @ForceInline
+    final @Override
+    $vectortype$ uRawOp(VectorMask<$Boxtype$> m, FUnRawOp f) {
+        return ($vectortype$)
+            super.uRawOpTemplate(($masktype$)m, f);  // specialize
+    }
+#end[FP16]
    // Binary operator

    @ForceInline
@ -574,6 +588,24 @@ final class $vectortype$ extends $abstractvectortype$ {
            case 13: bits = laneHelper(13); break;
            case 14: bits = laneHelper(14); break;
            case 15: bits = laneHelper(15); break;
+#if[!16L]
+            case 16: bits = laneHelper(16); break;
+            case 17: bits = laneHelper(17); break;
+            case 18: bits = laneHelper(18); break;
+            case 19: bits = laneHelper(19); break;
+            case 20: bits = laneHelper(20); break;
+            case 21: bits = laneHelper(21); break;
+            case 22: bits = laneHelper(22); break;
+            case 23: bits = laneHelper(23); break;
+            case 24: bits = laneHelper(24); break;
+            case 25: bits = laneHelper(25); break;
+            case 26: bits = laneHelper(26); break;
+            case 27: bits = laneHelper(27); break;
+            case 28: bits = laneHelper(28); break;
+            case 29: bits = laneHelper(29); break;
+            case 30: bits = laneHelper(30); break;
+            case 31: bits = laneHelper(31); break;
+#end[!16L]
 #end[!8L]
 #end[!4L]
 #end[!2L]
@ -586,7 +618,7 @@ final class $vectortype$ extends $abstractvectortype$ {
        }
        $bitstype$ bits = laneHelper(i);
 #end[!Max]
-        return $Type$.$bitstype$BitsTo$Fptype$(bits);
+        return {#if[FP16]?bits:$Type$.$bitstype$BitsTo$Fptype$(bits)};
    }

    @ForceInline
@ -596,7 +628,7 @@ final class $vectortype$ extends $abstractvectortype$ {
                     this, i,
                     (vec, ix) -> {
                     $type$[] vecarr = vec.vec();
-                     return (long)$Type$.$type$ToRaw$Bitstype$Bits(vecarr[ix]);
+                     return {#if[FP16]?vecarr[ix]:(long)$Type$.$type$ToRaw$Bitstype$Bits(vecarr[ix])};
                     });
    }

@ -625,6 +657,24 @@ final class $vectortype$ extends $abstractvectortype$ {
            case 13: return withLaneHelper(13, e);
            case 14: return withLaneHelper(14, e);
            case 15: return withLaneHelper(15, e);
+#if[!16L]
+            case 16: return withLaneHelper(16, e);
+            case 17: return withLaneHelper(17, e);
+            case 18: return withLaneHelper(18, e);
+            case 19: return withLaneHelper(19, e);
+            case 20: return withLaneHelper(20, e);
+            case 21: return withLaneHelper(21, e);
+            case 22: return withLaneHelper(22, e);
+            case 23: return withLaneHelper(23, e);
+            case 24: return withLaneHelper(24, e);
+            case 25: return withLaneHelper(25, e);
+            case 26: return withLaneHelper(26, e);
+            case 27: return withLaneHelper(27, e);
+            case 28: return withLaneHelper(28, e);
+            case 29: return withLaneHelper(29, e);
+            case 30: return withLaneHelper(30, e);
+            case 31: return withLaneHelper(31, e);
+#end[!16L]
 #end[!8L]
 #end[!4L]
 #end[!2L]
@ -643,10 +693,10 @@ final class $vectortype$ extends $abstractvectortype$ {
    public $vectortype$ withLaneHelper(int i, $type$ e) {
        return VectorSupport.insert(
                                VCLASS, LANE_TYPE_ORDINAL, VLENGTH,
-                                this, i, (long)$Type$.$type$ToRaw$Bitstype$Bits(e),
+                                this, i, (long){#if[FP16]?e:$Type$.$type$ToRaw$Bitstype$Bits(e)},
                                (v, ix, bits) -> {
                                    $type$[] res = v.vec().clone();
-                                    res[ix] = $Type$.$bitstype$BitsTo$Type$(($bitstype$)bits);
+                                    res[ix] = {#if[FP16]?($bitstype$)bits:$Type$.$bitstype$BitsTo$Type$(($bitstype$)bits)};
                                    return v.vectorFactory(res);
                                });
    }
@ -981,7 +1031,7 @@ final class $vectortype$ extends $abstractvectortype$ {
        public $masktype$ compress() {
            return ($masktype$)VectorSupport.compressExpandOp(VectorSupport.VECTOR_OP_MASK_COMPRESS,
                $vectortype$.class, $masktype$.class, LANE_TYPE_ORDINAL, VLENGTH, null, this,
-                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, m1.trueCount()));
+                (v1, m1) -> VSPECIES.iota().compare(VectorOperators.LT, {#if[FP16]?Float16.float16ToRawShortBits(Float16.valueOf(m1.trueCount())):m1.trueCount()}));
        }


@ -1389,7 +1439,7 @@ final class $vectortype$ extends $abstractvectortype$ {
        return super.fromArray0Template($masktype$.class, a, offset, indexMap, mapOffset, ($masktype$) m);
    }

-#if[short]
+#if[strictShort]
    @ForceInline
    @Override
    final
@ -1403,7 +1453,7 @@ final class $vectortype$ extends $abstractvectortype$ {
    $abstractvectortype$ fromCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m, int offsetInRange) {
        return super.fromCharArray0Template($masktype$.class, a, offset, ($masktype$) m, offsetInRange);  // specialize
    }
-#end[short]
+#end[strictShort]

 #if[byte]
    @ForceInline
@ -1474,14 +1524,14 @@ final class $vectortype$ extends $abstractvectortype$ {
        super.intoMemorySegment0Template($masktype$.class, ms, offset, ($masktype$) m);
    }

-#if[short]
+#if[strictShort]
    @ForceInline
    @Override
    final
    void intoCharArray0(char[] a, int offset, VectorMask<$Boxtype$> m) {
        super.intoCharArray0Template($masktype$.class, a, offset, ($masktype$) m);
    }
-#end[short]
+#end[strictShort]

    // End of specialized low-level memory operations.

--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/gen-src.sh
@ -53,19 +53,29 @@ typeprefix=
 globalArgs=""
 #globalArgs="$globalArgs -KextraOverrides"

-for type in byte short int long float double
+for type in byte short int long float double float16
 do
  Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}"
  TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})"
+
+  case $type in
+    float16)
+       type=short
+       TYPE=SHORT
+       ;;
+  esac
+
  args=$globalArgs
  args="$args -K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE"

  Boxtype=$Type
  Wideboxtype=$Boxtype
+  ElemLayout=$Type

  kind=BITWISE

  bitstype=$type
+  maskbitstype=$type
  Bitstype=$Type
  Boxbitstype=$Boxtype

@ -74,23 +84,28 @@ do
  Boxfptype=$Boxtype
  carriertype=$type
  Carriertype=$Type
+  elemtype=$type
+  fallbacktype=$type

-  case $type in
-    byte)
+  case $Type in
+    Byte)
      Wideboxtype=Integer
      sizeInBytes=1
      laneType=LT_BYTE
      lanebitsType=LT_BYTE
-      args="$args -KbyteOrShort"
+      args="$args -KbyteOrShort -KbyteOrStrictShort"
      ;;
-    short)
+    Short)
+      fptype=Float16
+      Fptype=Float16
+      Boxfptype=Float16
      Wideboxtype=Integer
      sizeInBytes=2
      laneType=LT_SHORT
      lanebitsType=LT_SHORT
-      args="$args -KbyteOrShort"
+      args="$args -KbyteOrShort -KbyteOrStrictShort -KstrictShort"
      ;;
-    int)
+    Int)
      Boxtype=Integer
      Carriertype=Integer
      Wideboxtype=Integer
@ -103,7 +118,7 @@ do
      lanebitsType=LT_INT
      args="$args -KintOrLong -KintOrFP -KintOrFloat"
      ;;
-    long)
+    Long)
      fptype=double
      Fptype=Double
      Boxfptype=Double
@ -112,33 +127,53 @@ do
      lanebitsType=LT_LONG
      args="$args -KintOrLong -KlongOrDouble"
      ;;
-    float)
+    Float)
      kind=FP
      bitstype=int
+      maskbitstype=int
      Bitstype=Int
      Boxbitstype=Integer
      sizeInBytes=4
      laneType=LT_FLOAT
      lanebitsType=LT_INT
-      args="$args -KintOrFP -KintOrFloat"
+      args="$args -KFP32 -KintOrFP -KintOrFloat"
      ;;
-    double)
+    Double)
      kind=FP
      bitstype=long
+      maskbitstype=long
      Bitstype=Long
      Boxbitstype=Long
      sizeInBytes=8
      laneType=LT_DOUBLE
      lanebitsType=LT_LONG
-      args="$args -KintOrFP -KlongOrDouble"
+      args="$args -KFP64 -KintOrFP -KlongOrDouble"
+      ;;
+    Float16)
+      kind=FP
+      bitstype=short
+      maskbitstype=short
+      Bitstype=Short
+      Boxbitstype=Short
+      sizeInBytes=2
+      carriertype=short
+      Carriertype=Short
+      Boxtype=Float16
+      elemtype=Float16
+      ElemLayout=Short
+      laneType=LT_FLOAT16
+      lanebitsType=LT_SHORT
+      fallbacktype=float
+      args="$args -KFP16 -KbyteOrShort"
      ;;
  esac

-  args="$args -K$kind -DlaneType=$laneType -DlanebitsType=$lanebitsType -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype"
-  args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
+
+  args="$args -K$kind -DlaneType=$laneType -DlanebitsType=$lanebitsType -Dfallbacktype=$fallbacktype -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype"
+  args="$args -DElemLayout=$ElemLayout -Dbitstype=$bitstype -Dmaskbitstype=$maskbitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
  args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"
  args="$args -DsizeInBytes=$sizeInBytes"
-  args="$args -Dcarriertype=$carriertype -DCarriertype=$Carriertype"
+  args="$args -Dcarriertype=$carriertype -Delemtype=$elemtype -DCarriertype=$Carriertype"

  abstractvectortype=${typeprefix}${Type}Vector
  abstractbitsvectortype=${typeprefix}Vector${Bitstype}
--- a/test/hotspot/jtreg/compiler/vectorapi/TestCastIIToConvHF2FNoSp.java
+++ b/test/hotspot/jtreg/compiler/vectorapi/TestCastIIToConvHF2FNoSp.java
@ -0,0 +1,113 @@
+/*
+ * Copyright 2025 Arm Limited and/or its affiliates.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+* @test
+* @bug 8370691 8373574
+* @summary Verify correct execution of CastII -> ConvHF2F IR sequence on AArch64
+* @modules jdk.incubator.vector
+* @library /test/lib /
+* @compile TestCastIIToConvHF2FNoSp.java
+* @run driver/timeout=480 compiler.vectorapi.TestCastIIToConvHF2FNoSp
+*/
+
+package compiler.vectorapi;
+import compiler.lib.ir_framework.*;
+import jdk.incubator.vector.*;
+import static jdk.incubator.vector.Float16.*;
+import static java.lang.Float.*;
+import java.util.Arrays;
+import jdk.test.lib.*;
+import compiler.lib.generators.Generator;
+import static compiler.lib.generators.Generators.G;
+
+public class TestCastIIToConvHF2FNoSp {
+    short[] input1;
+    short[] output;
+    static final int LEN = 527;
+
+    static final Float16 FP16_CONST = Float16.valueOf(1023.0f);
+    static final VectorSpecies<Float16> SPECIES = Float16Vector.SPECIES_PREFERRED;
+
+    public static void main(String args[]) {
+        // Test with default MaxVectorSize
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
+
+        // Test with different values of MaxVectorSize
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=8");
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=16");
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=32");
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=64");
+    }
+
+    static void assertResults(int arity, short ... values) {
+        assert values.length == (arity + 2);
+        Float16 expected_fp16 = shortBitsToFloat16(values[arity]);
+        Float16 actual_fp16 = shortBitsToFloat16(values[arity + 1]);
+        if(!expected_fp16.equals(actual_fp16)) {
+            String inputs = Arrays.toString(Arrays.copyOfRange(values, 0, arity - 1));
+            throw new AssertionError("Result Mismatch!, input = " + inputs + " actual = " + actual_fp16 +  " expected = " + expected_fp16);
+        }
+    }
+
+    public TestCastIIToConvHF2FNoSp() {
+        input1 = new short[LEN];
+        output = new short[LEN];
+
+        Generator<Short> gen = G.float16s();
+        for (int i = 0; i < LEN; ++i) {
+            input1[i] = gen.next();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MIN_VHF, " >0 "},
+        applyIfCPUFeature = {"sve", "true"})
+    @IR(counts = {IRNode.MIN_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true", "sve", "false"})
+    void vectorMinConstantInputFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.MIN,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.MIN,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorMinConstantInputFloat16")
+    void checkResultMinConstantInputFloat16() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(Math.min(FP16_CONST.floatValue(), float16ToFloat(input1[i])));
+            assertResults(2, float16ToRawShortBits(FP16_CONST), input1[i], expected, output[i]);
+        }
+    }
+}
--- a/test/hotspot/jtreg/compiler/vectorapi/TestFloat16VectorOperations.java
+++ b/test/hotspot/jtreg/compiler/vectorapi/TestFloat16VectorOperations.java
@ -0,0 +1,620 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+* @test
+* @bug 8370691
+* @summary Test intrinsification of Float16Vector operations
+* @modules jdk.incubator.vector
+* @library /test/lib /
+* @compile TestFloat16VectorOperations.java
+* @run driver/timeout=480 compiler.vectorapi.TestFloat16VectorOperations
+*/
+
+package compiler.vectorapi;
+import compiler.lib.ir_framework.*;
+import jdk.incubator.vector.*;
+import static jdk.incubator.vector.Float16.*;
+import static java.lang.Float.*;
+import java.util.Arrays;
+import jdk.test.lib.*;
+import compiler.lib.generators.Generator;
+import static compiler.lib.generators.Generators.G;
+
+public class TestFloat16VectorOperations {
+    short[] input1;
+    short[] input2;
+    short[] input3;
+    short[] output;
+    static final int LEN = 527;
+    static short FP16_SCALAR = (short)0x7777;
+
+    static final Float16 FP16_CONST = Float16.valueOf(1023.0f);
+    static final VectorSpecies<Float16> SPECIES = Float16Vector.SPECIES_PREFERRED;
+
+    public static void main(String args[]) {
+        // Test with default MaxVectorSize
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
+
+        // Test with different values of MaxVectorSize
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=8");
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=16");
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=32");
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector", "-XX:MaxVectorSize=64");
+    }
+
+    static void assertResults(int arity, short ... values) {
+        assert values.length == (arity + 2);
+        Float16 expected_fp16 = shortBitsToFloat16(values[arity]);
+        Float16 actual_fp16 = shortBitsToFloat16(values[arity + 1]);
+        if(!expected_fp16.equals(actual_fp16)) {
+            String inputs = Arrays.toString(Arrays.copyOfRange(values, 0, arity - 1));
+            throw new AssertionError("Result Mismatch!, input = " + inputs + " actual = " + actual_fp16 +  " expected = " + expected_fp16);
+        }
+    }
+
+    public TestFloat16VectorOperations() {
+        input1 = new short[LEN];
+        input2 = new short[LEN];
+        input3 = new short[LEN];
+        output = new short[LEN];
+
+        Generator<Short> gen = G.float16s();
+        for (int i = 0; i < LEN; ++i) {
+            input1[i] = gen.next();
+            input2[i] = gen.next();
+            input3[i] = gen.next();
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.ADD_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorAddFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.ADD,
+                                   Float16Vector.fromArray(SPECIES, input2, i))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.ADD,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorAddFloat16")
+    void checkResultAdd() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(float16ToFloat(input1[i]) + float16ToFloat(input2[i]));
+            assertResults(2, input1[i], input2[i], expected, output[i]);
+        }
+    }
+
+
+    @Test
+    @IR(counts = {IRNode.SUB_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.SUB_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorSubFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.SUB,
+                                   Float16Vector.fromArray(SPECIES, input2, i))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.SUB,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorSubFloat16")
+    void checkResultSub() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(float16ToFloat(input1[i]) - float16ToFloat(input2[i]));
+            assertResults(2, input1[i], input2[i], expected, output[i]);
+        }
+    }
+
+
+    @Test
+    @IR(counts = {IRNode.MUL_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.MUL_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorMulFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.MUL,
+                                   Float16Vector.fromArray(SPECIES, input2, i))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.MUL,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorMulFloat16")
+    void checkResultMul() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(float16ToFloat(input1[i]) * float16ToFloat(input2[i]));
+            assertResults(2, input1[i], input2[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.DIV_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.DIV_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorDivFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.DIV,
+                                   Float16Vector.fromArray(SPECIES, input2, i))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.DIV,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorDivFloat16")
+    void checkResultDiv() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(float16ToFloat(input1[i]) / float16ToFloat(input2[i]));
+            assertResults(2, input1[i], input2[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MIN_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.MIN_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorMinFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.MIN,
+                                   Float16Vector.fromArray(SPECIES, input2, i))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.MIN,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorMinFloat16")
+    void checkResultMin() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(Math.min(float16ToFloat(input1[i]), float16ToFloat(input2[i])));
+            assertResults(2, input1[i], input2[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MAX_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.MAX_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorMaxFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.MAX,
+                                   Float16Vector.fromArray(SPECIES, input2, i))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.MAX,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorMaxFloat16")
+    void checkResultMax() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(Math.max(float16ToFloat(input1[i]), float16ToFloat(input2[i])));
+            assertResults(2, input1[i], input2[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.SQRT_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.SQRT_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorSqrtFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.SQRT)
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.SQRT)
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorSqrtFloat16")
+    void checkResultSqrt() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = float16ToRawShortBits(sqrt(shortBitsToFloat16(input1[i])));
+            assertResults(1, input1[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorFmaFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.FMA,
+                                   Float16Vector.fromArray(SPECIES, input2, i),
+                                   Float16Vector.fromArray(SPECIES, input3, i))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.FMA,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask),
+                                   Float16Vector.fromArray(SPECIES, input3, i, mask))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorFmaFloat16")
+    void checkResultFma() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1[i]), shortBitsToFloat16(input2[i]),
+                                                       shortBitsToFloat16(input3[i])));
+            assertResults(3, input1[i], input2[i], input3[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorFmaFloat16ScalarMixedConstants() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.FMA,
+                                   FP16_SCALAR,
+                                   floatToFloat16(3.0f))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.FMA,
+                                   FP16_SCALAR,
+                                   floatToFloat16(3.0f))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorFmaFloat16ScalarMixedConstants")
+    void checkResultFmaScalarMixedConstants() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1[i]), shortBitsToFloat16(FP16_SCALAR),
+                                                       shortBitsToFloat16(floatToFloat16(3.0f))));
+            assertResults(2, input1[i], FP16_SCALAR, expected, output[i]);
+        }
+    }
+
+
+    @Test
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorFmaFloat16MixedConstants() {
+        short input3 = floatToFloat16(3.0f);
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.FMA,
+                                   Float16Vector.fromArray(SPECIES, input2, i),
+                                   input3)
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.FMA,
+                                   Float16Vector.fromArray(SPECIES, input2, i, mask),
+                                   input3)
+                         .intoArray(output, i, mask);
+        }
+    }
+
+
+    @Check(test="vectorFmaFloat16MixedConstants")
+    void checkResultFmaMixedConstants() {
+        short input3 = floatToFloat16(3.0f);
+        for (int i = 0; i < LEN; ++i) {
+            short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1[i]), shortBitsToFloat16(input2[i]), shortBitsToFloat16(input3)));
+            assertResults(3, input1[i], input2[i], input3, expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.FMA_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorFmaFloat16AllConstants() {
+        short input1 = floatToFloat16(1.0f);
+        short input2 = floatToFloat16(2.0f);
+        short input3 = floatToFloat16(3.0f);
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.broadcast(SPECIES, input1)
+                         .lanewise(VectorOperators.FMA,
+                                   input2,
+                                   input3)
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.broadcast(SPECIES, input1)
+                         .lanewise(VectorOperators.FMA,
+                                   input2,
+                                   input3)
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorFmaFloat16AllConstants")
+    void checkResultFmaAllConstants() {
+        short input1 = floatToFloat16(1.0f);
+        short input2 = floatToFloat16(2.0f);
+        short input3 = floatToFloat16(3.0f);
+        for (int i = 0; i < LEN; ++i) {
+            short expected = float16ToRawShortBits(fma(shortBitsToFloat16(input1), shortBitsToFloat16(input2), shortBitsToFloat16(input3)));
+            assertResults(3, input1, input2, input3, expected, output[i]);
+        }
+    }
+
+
+    @Test
+    @IR(counts = {IRNode.ADD_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"})
+    @IR(counts = {IRNode.ADD_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorAddConstInputFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.ADD,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.ADD,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorAddConstInputFloat16")
+    void checkResultAddConstantInputFloat16() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(float16ToFloat(input1[i]) + FP16_CONST.floatValue());
+            assertResults(2, input1[i], float16ToRawShortBits(FP16_CONST), expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.SUB_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
+    @IR(counts = {IRNode.SUB_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorSubConstInputFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input1, i)
+                         .lanewise(VectorOperators.SUB,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input1, i, mask)
+                         .lanewise(VectorOperators.SUB,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorSubConstInputFloat16")
+    void checkResultSubConstantInputFloat16() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(float16ToFloat(input1[i]) - FP16_CONST.floatValue());
+            assertResults(2, input1[i], float16ToRawShortBits(FP16_CONST), expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MUL_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
+    @IR(counts = {IRNode.MUL_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorMulConstantInputFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input2, i)
+                           .lanewise(VectorOperators.MUL,
+                                     float16ToRawShortBits(FP16_CONST))
+                           .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input2, i, mask)
+                         .lanewise(VectorOperators.MUL,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorMulConstantInputFloat16")
+    void checkResultMulConstantInputFloat16() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(FP16_CONST.floatValue() * float16ToFloat(input2[i]));
+            assertResults(2, float16ToRawShortBits(FP16_CONST), input2[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.DIV_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
+    @IR(counts = {IRNode.DIV_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorDivConstantInputFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input2, i)
+                         .lanewise(VectorOperators.DIV,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input2, i, mask)
+                         .lanewise(VectorOperators.DIV,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorDivConstantInputFloat16")
+    void checkResultDivConstantInputFloat16() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(float16ToFloat(input2[i]) / FP16_CONST.floatValue());
+            assertResults(2, input2[i], float16ToRawShortBits(FP16_CONST), expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MAX_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
+    @IR(counts = {IRNode.MAX_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorMaxConstantInputFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input2, i)
+                         .lanewise(VectorOperators.MAX,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input2, i, mask)
+                         .lanewise(VectorOperators.MAX,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorMaxConstantInputFloat16")
+    void checkResultMaxConstantInputFloat16() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(Math.max(FP16_CONST.floatValue(), float16ToFloat(input2[i])));
+            assertResults(2, float16ToRawShortBits(FP16_CONST), input2[i], expected, output[i]);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MIN_VHF, " >0 "},
+        applyIfCPUFeatureOr = {"avx512_fp16", "true", "sve", "true"})
+    @IR(counts = {IRNode.MIN_VHF, " >0 "},
+        applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"})
+    void vectorMinConstantInputFloat16() {
+        int i = 0;
+        for (; i < SPECIES.loopBound(LEN); i += SPECIES.length()) {
+            Float16Vector.fromArray(SPECIES, input2, i)
+                         .lanewise(VectorOperators.MIN,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i);
+        }
+        if (i < LEN) {
+            VectorMask<Float16> mask = SPECIES.indexInRange(i, LEN);
+            Float16Vector.fromArray(SPECIES, input2, i, mask)
+                         .lanewise(VectorOperators.MIN,
+                                   float16ToRawShortBits(FP16_CONST))
+                         .intoArray(output, i, mask);
+        }
+    }
+
+    @Check(test="vectorMinConstantInputFloat16")
+    void checkResultMinConstantInputFloat16() {
+        for (int i = 0; i < LEN; ++i) {
+            short expected = floatToFloat16(Math.min(FP16_CONST.floatValue(), float16ToFloat(input2[i])));
+            assertResults(2, float16ToRawShortBits(FP16_CONST), input2[i], expected, output[i]);
+        }
+    }
+}
--- a/test/jdk/jdk/incubator/vector/AbstractVectorConversionTest.java
+++ b/test/jdk/jdk/incubator/vector/AbstractVectorConversionTest.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -41,6 +41,7 @@ import java.util.List;
 import java.util.function.BiConsumer;
 import java.util.function.Function;
 import java.util.function.IntFunction;
+import jdk.incubator.vector.Float16;

 abstract class AbstractVectorConversionTest {

@ -156,6 +157,31 @@ abstract class AbstractVectorConversionTest {
        return a;
    }

+    interface ToFloat16F {
+        short apply(int i);
+    }
+
+    static short[] fill_float16(int s, ToFloat16F f) {
+        return fill_float16(new short[s], f);
+    }
+
+    static short[] fill_float16(short[] a, ToFloat16F f) {
+        for (int i = 0; i < a.length; i++) {
+            a[i] = f.apply(i);
+        }
+        if (a.length > 7) {
+            a[0] = Float16.float16ToRawShortBits(Float16.MAX_VALUE);
+            a[1] = Float16.float16ToRawShortBits(Float16.MIN_VALUE);
+            a[2] = Float16.float16ToRawShortBits(Float16.NEGATIVE_INFINITY);
+            a[3] = Float16.float16ToRawShortBits(Float16.POSITIVE_INFINITY);
+            a[4] = Float16.float16ToRawShortBits(Float16.NaN);
+            a[5] = (short)0.0;
+            a[6] = Short.MIN_VALUE;
+        }
+        return a;
+    }
+
+
    static final List<IntFunction<byte[]>> BYTE_GENERATORS = List.of(
            withToString("byte(i)", (int s) -> fill_byte(s, i -> (byte) (i + 1)))
    );
@ -180,6 +206,10 @@ abstract class AbstractVectorConversionTest {
            withToString("double(i)", (int s) -> fill_double(s, i -> (double) (i * 10 + 0.1)))
    );

+    static final List<IntFunction<short[]>> FLOAT16_GENERATORS = List.of(
+            withToString("Float16(i)", (int s) -> fill_float16(s, i -> (short) (i * 100 + 1)))
+    );
+
    static List<?> sourceGenerators(Class<?> src) {
        if (src == byte.class) {
            return BYTE_GENERATORS;
@ -199,6 +229,9 @@ abstract class AbstractVectorConversionTest {
        else if (src == double.class) {
            return DOUBLE_GENERATORS;
        }
+        else if (src == Float16.class) {
+            return FLOAT16_GENERATORS;
+        }
        else
            throw new IllegalStateException();
    }
@ -206,11 +239,11 @@ abstract class AbstractVectorConversionTest {
    static Object[][] fixedShapeXFixedShapeSpeciesArgs(VectorShape shape) {
        List<Object[]> args = new ArrayList<>();

-        for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
+        for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
            VectorSpecies<?> src = VectorSpecies.of(srcE, shape);
            List<?> srcGens = sourceGenerators(srcE);

-            for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
+            for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
                VectorSpecies<?> dst = VectorSpecies.of(dstE, shape);

                for (Object srcGen : srcGens) {
@ -225,12 +258,12 @@ abstract class AbstractVectorConversionTest {
    static Object[][] fixedShapeXShapeSpeciesArgs(VectorShape srcShape) {
        List<Object[]> args = new ArrayList<>();

-        for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
+        for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
            VectorSpecies<?> src = VectorSpecies.of(srcE, srcShape);
            List<?> srcGens = sourceGenerators(srcE);

            for (VectorShape dstShape : VectorShape.values()) {
-                for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
+                for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
                    VectorSpecies<?> dst = VectorSpecies.of(dstE, dstShape);

                    for (Object srcGen : srcGens) {
@ -245,10 +278,10 @@ abstract class AbstractVectorConversionTest {

    static Object[][] fixedShapeXSegmentedCastSpeciesArgs(VectorShape srcShape, boolean legal) {
        List<Object[]> args = new ArrayList<>();
-        for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
+        for (Class<?> srcE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
            VectorSpecies<?> src = VectorSpecies.of(srcE, srcShape);
            for (VectorShape dstShape : VectorShape.values()) {
-                for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class)) {
+                for (Class<?> dstE : List.of(byte.class, short.class, int.class, long.class, float.class, double.class, Float16.class)) {
                    VectorSpecies<?> dst = VectorSpecies.of(dstE, dstShape);
                    if (legal == (dst.length() == src.length())) {
                        args.add(new Object[]{src, dst});
@ -261,6 +294,22 @@ abstract class AbstractVectorConversionTest {

    public enum ConvAPI {CONVERT, CONVERTSHAPE, CASTSHAPE, REINTERPRETSHAPE}

+    static Short float16_conversion_adapter(Number in) {
+        if (in.getClass() == Short.class)
+            return Float16.float16ToRawShortBits(Float16.valueOf(in.shortValue()));
+        else if (in.getClass() == Integer.class)
+            return Float16.float16ToRawShortBits(Float16.valueOf(in.intValue()));
+        else if (in.getClass() == Long.class)
+            return Float16.float16ToRawShortBits(Float16.valueOf(in.longValue()));
+        else if (in.getClass() == Float.class)
+            return Float16.float16ToRawShortBits(Float16.valueOf(in.floatValue()));
+        else if (in.getClass() == Double.class)
+            return Float16.float16ToRawShortBits(Float16.valueOf(in.doubleValue()));
+        else if (in.getClass() == Byte.class)
+            return Float16.float16ToRawShortBits(Float16.valueOf(in.byteValue()));
+        else
+            throw new IllegalStateException();
+    }

    static Function<Number, Object> convertValueFunction(Class<?> to) {
        if (to == byte.class)
@ -273,6 +322,8 @@ abstract class AbstractVectorConversionTest {
            return Number::longValue;
        else if (to == float.class)
            return Number::floatValue;
+        else if (to == Float16.class)
+            return (N) -> float16_conversion_adapter(N);
        else if (to == double.class)
            return Number::doubleValue;
        else
@ -282,7 +333,7 @@ abstract class AbstractVectorConversionTest {
    static BiConsumer<ByteBuffer, Object> putBufferValueFunction(Class<?> from) {
        if (from == byte.class)
            return (bb, o) -> bb.put((byte) o);
-        else if (from == short.class)
+        else if (from == short.class || from == Float16.class)
            return (bb, o) -> bb.putShort((short) o);
        else if (from == int.class)
            return (bb, o) -> bb.putInt((int) o);
@ -299,7 +350,7 @@ abstract class AbstractVectorConversionTest {
    static Function<ByteBuffer, Number> getBufferValueFunction(Class<?> to) {
        if (to == byte.class)
            return ByteBuffer::get;
-        else if (to == short.class)
+        else if (to == short.class || to == Float16.class)
            return ByteBuffer::getShort;
        else if (to == int.class)
            return ByteBuffer::getInt;
@ -335,10 +386,23 @@ abstract class AbstractVectorConversionTest {
    static void copyConversionArray(Object src, int srcPos,
                                    Object dest, int destPos,
                                    int length,
+                                    VectorSpecies srcSpecies,
+                                    VectorSpecies dstSpecies,
                                    Function<Number, Object> c) {
+        if (srcSpecies.elementType() == dstSpecies.elementType()) {
+            System.arraycopy(src, srcPos, dest, destPos, length);
+            return;
+        }
        for (int i = 0; i < length; i++) {
            Number v = (Number) Array.get(src, srcPos + i);
-            Array.set(dest, destPos + i, c.apply(v));
+            if (srcSpecies.elementType() == Float16.class) {
+                v = (Number) Float16.shortBitsToFloat16(v.shortValue());
+            }
+            v = (Number) c.apply(v);
+            if (dstSpecies.elementType() == Float16.class) {
+                v = (Number) v.shortValue();
+            }
+            Array.set(dest, destPos + i, v);
        }
    }

@ -420,8 +484,14 @@ abstract class AbstractVectorConversionTest {

        int[] parts = getPartsArray(m, is_contracting_conv);

-        Object expected = Array.newInstance(destSpecies.elementType(), out_len);
-        Object actual = Array.newInstance(destSpecies.elementType(), out_len);
+        Object expected = null, actual = null;
+        if (destSpecies.elementType() == Float16.class) {
+            expected = Array.newInstance(short.class, out_len);
+            actual = Array.newInstance(short.class, out_len);
+        } else {
+            expected = Array.newInstance(destSpecies.elementType(), out_len);
+            actual = Array.newInstance(destSpecies.elementType(), out_len);
+        }

        Function<Number, Object> convertValue = convertValueFunction(destSpecies.elementType());

@ -432,11 +502,12 @@ abstract class AbstractVectorConversionTest {
            if (is_contracting_conv) {
                int start_idx = -part * src_species_len;
                zeroArray(expected, j, dst_species_len);
-                copyConversionArray(in, i, expected, start_idx + j, src_species_len, convertValue);
+                copyConversionArray(in, i, expected, start_idx + j, src_species_len, srcSpecies, destSpecies, convertValue);
            } else {
                int start_idx = part * dst_species_len;
-                copyConversionArray(in, start_idx + i, expected, j, dst_species_len, convertValue);
+                copyConversionArray(in, start_idx + i, expected, j, dst_species_len, srcSpecies, destSpecies, convertValue);
            }
+
        }

        for (int ic = 0; ic < INVOC_COUNT; ic++) {
@ -452,7 +523,6 @@ abstract class AbstractVectorConversionTest {
                System.arraycopy(rv.toArray(), 0, actual, j, dst_species_len);
            }
        }
-
        Assert.assertEquals(actual, expected);
    }

@ -469,8 +539,14 @@ abstract class AbstractVectorConversionTest {

        int[] parts = getPartsArray(m, is_contracting_conv);

-        Object expected = Array.newInstance(dstSpecies.elementType(), out_len);
-        Object actual = Array.newInstance(dstSpecies.elementType(), out_len);
+        Object expected = null, actual = null;
+        if (dstSpecies.elementType() == Float16.class) {
+            expected = Array.newInstance(short.class, out_len);
+            actual = Array.newInstance(short.class, out_len);
+        } else {
+            expected = Array.newInstance(dstSpecies.elementType(), out_len);
+            actual = Array.newInstance(dstSpecies.elementType(), out_len);
+        }

        BiConsumer<ByteBuffer, Object> putValue = putBufferValueFunction(srcSpecies.elementType());
        Function<ByteBuffer, Number> getValue = getBufferValueFunction(dstSpecies.elementType());
--- a/test/jdk/jdk/incubator/vector/Float16Vector128LoadStoreTests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector128LoadStoreTests.java
--- a/test/jdk/jdk/incubator/vector/Float16Vector128Tests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector128Tests.java
--- a/test/jdk/jdk/incubator/vector/Float16Vector256LoadStoreTests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector256LoadStoreTests.java
--- a/test/jdk/jdk/incubator/vector/Float16Vector256Tests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector256Tests.java
--- a/test/jdk/jdk/incubator/vector/Float16Vector512LoadStoreTests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector512LoadStoreTests.java
--- a/test/jdk/jdk/incubator/vector/Float16Vector512Tests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector512Tests.java
--- a/test/jdk/jdk/incubator/vector/Float16Vector64LoadStoreTests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector64LoadStoreTests.java
--- a/test/jdk/jdk/incubator/vector/Float16Vector64Tests.java
+++ b/test/jdk/jdk/incubator/vector/Float16Vector64Tests.java
--- a/test/jdk/jdk/incubator/vector/Float16VectorMaxLoadStoreTests.java
+++ b/test/jdk/jdk/incubator/vector/Float16VectorMaxLoadStoreTests.java
--- a/test/jdk/jdk/incubator/vector/Float16VectorMaxTests.java
+++ b/test/jdk/jdk/incubator/vector/Float16VectorMaxTests.java
--- a/test/jdk/jdk/incubator/vector/ShortVector128Tests.java
+++ b/test/jdk/jdk/incubator/vector/ShortVector128Tests.java
@ -1719,9 +1719,14 @@ public class ShortVector128Tests extends AbstractVectorTest {
        assertEquals(asIntegral.species(), SPECIES);
    }

-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
    void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
    }

    @Test
--- a/test/jdk/jdk/incubator/vector/ShortVector256Tests.java
+++ b/test/jdk/jdk/incubator/vector/ShortVector256Tests.java
@ -1719,9 +1719,14 @@ public class ShortVector256Tests extends AbstractVectorTest {
        assertEquals(asIntegral.species(), SPECIES);
    }

-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
    void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
    }

    @Test
--- a/test/jdk/jdk/incubator/vector/ShortVector512Tests.java
+++ b/test/jdk/jdk/incubator/vector/ShortVector512Tests.java
@ -1719,9 +1719,14 @@ public class ShortVector512Tests extends AbstractVectorTest {
        assertEquals(asIntegral.species(), SPECIES);
    }

-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
    void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
    }

    @Test
--- a/test/jdk/jdk/incubator/vector/ShortVector64Tests.java
+++ b/test/jdk/jdk/incubator/vector/ShortVector64Tests.java
@ -1719,9 +1719,14 @@ public class ShortVector64Tests extends AbstractVectorTest {
        assertEquals(asIntegral.species(), SPECIES);
    }

-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
    void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
    }

    @Test
--- a/test/jdk/jdk/incubator/vector/ShortVectorMaxTests.java
+++ b/test/jdk/jdk/incubator/vector/ShortVectorMaxTests.java
@ -1725,9 +1725,14 @@ public class ShortVectorMaxTests extends AbstractVectorTest {
        assertEquals(asIntegral.species(), SPECIES);
    }

-    @Test(expectedExceptions = UnsupportedOperationException.class)
+    @Test
    void viewAsFloatingLanesTest() {
-        SPECIES.zero().viewAsFloatingLanes();
+        Vector<?> asFloating = SPECIES.zero().viewAsFloatingLanes();
+        VectorSpecies<?> asFloatingSpecies = asFloating.species();
+        Assert.assertNotEquals(asFloatingSpecies.elementType(), SPECIES.elementType());
+        assertEquals(asFloatingSpecies.vectorShape(), SPECIES.vectorShape());
+        assertEquals(asFloatingSpecies.length(), SPECIES.length());
+        assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
    }

    @Test
--- a/test/jdk/jdk/incubator/vector/gen-tests.sh
+++ b/test/jdk/jdk/incubator/vector/gen-tests.sh
@ -52,11 +52,11 @@ Log false "$compilation\n"
 Log true "done\n"

 # For each type
-for type in byte short int long float double
+for type in byte short int long float double float16
 do
  Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}"
  TYPE="$(tr '[:lower:]' '[:upper:]' <<< ${type})"
-  args="-K$type -Dtype=$type -DType=$Type -DTYPE=$TYPE"
+  args="-K$type -DType=$Type -DTYPE=$TYPE"

  Boxtype=$Type
  Wideboxtype=$Boxtype
@ -69,6 +69,7 @@ do
  bitstype=$type
  Bitstype=$Type
  Boxbitstype=$Boxtype
+  testtype=$type

  fptype=$type
  Fptype=$Type
@ -118,11 +119,23 @@ do
      MaxValue=POSITIVE_INFINITY
      MinValue=NEGATIVE_INFINITY
      ;;
+    float16)
+      kind=FP
+      fpkind=FP16
+      bitstype=short
+      type=short
+      Bitstype=Short
+      Boxbitstype=Short
+      Wideboxtype=Float16
+      MaxValue=POSITIVE_INFINITY
+      MinValue=NEGATIVE_INFINITY
+      Type=Float16
+      ;;
  esac

-  args="$args -K$kind -K$fpkind -K$Type -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype -DMaxValue=$MaxValue -DMinValue=$MinValue"
+  args="$args -Dtype=$type -K$kind -K$Type -DBoxtype=$Boxtype -DWideboxtype=$Wideboxtype -DMaxValue=$MaxValue -DMinValue=$MinValue"
  args="$args -Dbitstype=$bitstype -DBitstype=$Bitstype -DBoxbitstype=$Boxbitstype"
-  args="$args -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"
+  args="$args -Dtesttype=$testtype -K$fpkind -Dfptype=$fptype -DFptype=$Fptype -DBoxfptype=$Boxfptype"

  abstractvectortype=${typeprefix}${Type}Vector
  abstractvectorteststype=${typeprefix}${Type}VectorTests
--- a/test/jdk/jdk/incubator/vector/templates/Kernel-Binary-Broadcast-Long-op.template
+++ b/test/jdk/jdk/incubator/vector/templates/Kernel-Binary-Broadcast-Long-op.template
@ -4,5 +4,5 @@

        for (int i = 0; i < a.length; i += SPECIES.length()) {
            $abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
-            av.lanewise(VectorOperators.[[TEST]], (long)b[i]).intoArray(r, i);
+            av.lanewise(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}).intoArray(r, i);
        }
--- a/test/jdk/jdk/incubator/vector/templates/Kernel-Binary-Broadcast-Masked-Long-op.template
+++ b/test/jdk/jdk/incubator/vector/templates/Kernel-Binary-Broadcast-Masked-Long-op.template
@ -6,5 +6,5 @@

        for (int i = 0; i < a.length; i += SPECIES.length()) {
            $abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
-            av.lanewise(VectorOperators.[[TEST]], (long)b[i], vmask).intoArray(r, i);
+            av.lanewise(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}, vmask).intoArray(r, i);
        }
--- a/test/jdk/jdk/incubator/vector/templates/Unit-Compare-Broadcast.template
+++ b/test/jdk/jdk/incubator/vector/templates/Unit-Compare-Broadcast.template
@ -43,11 +43,11 @@

        for (int i = 0; i < a.length; i += SPECIES.length()) {
            $abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
-            VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], (long)b[i]);
+            VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]});

            // Check results as part of computation.
            for (int j = 0; j < SPECIES.length(); j++) {
-                assertEquals(mv.laneIsSet(j), [[TEST_OP]](a[i + j], ($type$)((long)b[i])));
+                assertEquals(mv.laneIsSet(j), [[TEST_OP]](a[i + j], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[i]).longValue())):($type$)((long)b[i])}));
            }
        }
    }
@ -63,11 +63,11 @@

        for (int i = 0; i < a.length; i += SPECIES.length()) {
            $abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
-            VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], (long)b[i], vmask);
+            VectorMask<$Wideboxtype$> mv = av.compare(VectorOperators.[[TEST]], {#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}, vmask);

            // Check results as part of computation.
            for (int j = 0; j < SPECIES.length(); j++) {
-                assertEquals(mv.laneIsSet(j), mask[j] && ([[TEST_OP]](a[i + j], ($type$)((long)b[i]))));
+                assertEquals(mv.laneIsSet(j), mask[j] && ([[TEST_OP]](a[i + j], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[i]).longValue())):($type$)((long)b[i])})));
            }
        }
    }
--- a/test/jdk/jdk/incubator/vector/templates/Unit-Miscellaneous.template
+++ b/test/jdk/jdk/incubator/vector/templates/Unit-Miscellaneous.template
@ -210,7 +210,7 @@
        $type$[] r = new $type$[a.length];

        for (int i = 0; i < a.length; i += SPECIES.length()) {
-            $abstractvectortype$.broadcast(SPECIES, (long)a[i]).intoArray(r, i);
+            $abstractvectortype$.broadcast(SPECIES, {#if[FP16]?shortBitsToFloat16(a[i]).longValue():(long)a[i]}).intoArray(r, i);
        }
        assertBroadcastArraysEquals(r, a);
    }
@ -227,7 +227,7 @@
        for (int ic = 0; ic < INVOC_COUNT; ic++) {
            for (int i = 0; i < a.length; i += SPECIES.length()) {
                $abstractvectortype$ av = $abstractvectortype$.fromArray(SPECIES, a, i);
-                av.blend((long)b[i], vmask).intoArray(r, i);
+                av.blend({#if[FP16]?shortBitsToFloat16(b[i]).longValue():(long)b[i]}, vmask).intoArray(r, i);
            }
        }
        assertBroadcastLongArraysEquals(r, a, b, mask, $vectorteststype$::blend);
@ -588,14 +588,14 @@
    static void ShapeWithLanes$vectorteststype$SmokeTest() {
        $abstractvectortype$ av = $abstractvectortype$.zero(SPECIES);
        VectorShape vsh = av.shape();
-        VectorSpecies species = vsh.withLanes($type$.class);
+        VectorSpecies species = vsh.withLanes({#if[FP16]?Float16.class:$type$.class});
        assert(species.equals(SPECIES));
    }

    @Test
    static void ElementType$vectorteststype$SmokeTest() {
        $abstractvectortype$ av = $abstractvectortype$.zero(SPECIES);
-        assert(av.species().elementType() == $type$.class);
+        assert(av.species().elementType() == {#if[FP16]?Float16.class:$type$.class});
    }

    @Test
@ -613,7 +613,7 @@
    @Test
    static void WithLanes$vectorteststype$SmokeTest() {
        $abstractvectortype$ av = $abstractvectortype$.zero(SPECIES);
-        VectorSpecies species = av.species().withLanes($type$.class);
+        VectorSpecies species = av.species().withLanes({#if[FP16]?Float16.class:$type$.class});
        assert(species.equals(SPECIES));
    }

--- a/test/jdk/jdk/incubator/vector/templates/Unit-header.template
+++ b/test/jdk/jdk/incubator/vector/templates/Unit-header.template
@ -48,6 +48,11 @@ import jdk.incubator.vector.ByteVector;
 #if[Float]
 import jdk.incubator.vector.FloatVector;
 #end[Float]
+#if[FP16]
+import jdk.incubator.vector.Float16;
+import static jdk.incubator.vector.Float16.*;
+import jdk.incubator.vector.Float16Vector;
+#end[FP16]
 #if[Int]
 import jdk.incubator.vector.IntVector;
 #end[Int]
@ -86,6 +91,38 @@ public class $vectorteststype$ extends AbstractVectorTest {
 #end[MaxBit]

    static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 100);
+#if[FP16]
+    static void assertEquals(short actual, short expected) {
+        Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected));
+    }
+    static void assertEquals(short actual, short expected, String msg) {
+        Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), msg);
+    }
+    static void assertEquals(short actual, short expected, short delta) {
+        Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), Float.float16ToFloat(delta));
+    }
+    static void assertEquals(short actual, short expected, short delta, String msg) {
+        Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), Float.float16ToFloat(delta), msg);
+    }
+    static void assertEquals(short [] actual, short [] expected) {
+        assert actual.length == expected.length;
+        for (int i = 0; i < actual.length; i++) {
+            Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]));
+        }
+    }
+    static void assertEquals(short [] actual, short [] expected, String msg) {
+        assert actual.length == expected.length;
+        for (int i = 0; i < actual.length; i++) {
+            Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]), msg);
+        }
+    }
+    static void assertEquals(long actual, long expected) {
+        Assert.assertEquals(Float.float16ToFloat((short)actual), Float.float16ToFloat((short)expected));
+    }
+    static void assertEquals(long actual, long expected, String msg) {
+        Assert.assertEquals(Float.float16ToFloat((short)actual), Float.float16ToFloat((short)expected), msg);
+    }
+#else[FP16]
    static void assertEquals($type$ actual, $type$ expected) {
        Assert.assertEquals(actual, expected);
    }
@ -112,6 +149,7 @@ public class $vectorteststype$ extends AbstractVectorTest {
        Assert.assertEquals(actual, expected, msg);
    }
 #end[!long]
+#end[FP16]
    static void assertEquals(String actual, String expected) {
        Assert.assertEquals(actual, expected);
    }
@ -152,9 +190,9 @@ public class $vectorteststype$ extends AbstractVectorTest {
    private static final $type$ AND_IDENTITY = ($type$)-1;
 #end[BITWISE]
    private static final $type$ FIRST_NONZERO_IDENTITY = ($type$)0;
-    private static final $type$ MAX_IDENTITY = $Wideboxtype$.$MinValue$;
-    private static final $type$ MIN_IDENTITY = $Wideboxtype$.$MaxValue$;
-    private static final $type$ MUL_IDENTITY = ($type$)1;
+    private static final $type$ MAX_IDENTITY = {#if[FP16]?float16ToRawShortBits($Wideboxtype$.$MinValue$):$Wideboxtype$.$MinValue$};
+    private static final $type$ MIN_IDENTITY = {#if[FP16]?float16ToRawShortBits($Wideboxtype$.$MaxValue$):$Wideboxtype$.$MaxValue$};
+    private static final $type$ MUL_IDENTITY = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(1.0f)):($type$)1};
 #if[BITWISE]
    private static final $type$ OR_IDENTITY = ($type$)0;
    private static final $type$ SUADD_IDENTITY = ($type$)0;
@ -165,10 +203,10 @@ public class $vectorteststype$ extends AbstractVectorTest {
 #if[FP]

    // for floating point addition reduction ops that may introduce rounding errors
-    private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_ADD = ($type$)10.0;
+    private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_ADD = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(10.0f)):($type$)10.0};

    // for floating point multiplication reduction ops that may introduce rounding errors
-    private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_MUL = ($type$)50.0;
+    private static final $type$ RELATIVE_ROUNDING_ERROR_FACTOR_MUL = {#if[FP16]?float16ToRawShortBits(Float16.valueOf(50.0f)):($type$)50.0};
 #end[FP]

    static final int BUFFER_REPS = Integer.getInteger("jdk.incubator.vector.test.buffer-vectors", 25000 / $bits$);
@ -176,8 +214,8 @@ public class $vectorteststype$ extends AbstractVectorTest {
    static void assertArraysStrictlyEquals($type$[] r, $type$[] a) {
        for (int i = 0; i < a.length; i++) {
 #if[FP]
-            $bitstype$ ir = $Wideboxtype$.$type$ToRaw$Bitstype$Bits(r[i]);
-            $bitstype$ ia = $Wideboxtype$.$type$ToRaw$Bitstype$Bits(a[i]);
+            $bitstype$ ir = {#if[FP16]?r[i]:$Wideboxtype$.$type$ToRaw$Bitstype$Bits(r[i])};
+            $bitstype$ ia = {#if[FP16]?a[i]:$Wideboxtype$.$type$ToRaw$Bitstype$Bits(a[i])};
            if (ir != ia) {
 #if[Float]
                Assert.fail(String.format("at index #%d, expected = %08X, actual = %08X", i, ia, ir));
@ -265,6 +303,23 @@ public class $vectorteststype$ extends AbstractVectorTest {
 #end[FP]
    }
 #if[FP]
+#if[FP16]
+
+    static void assertReductionArraysEquals($type$[] r, $type$ rc, $type$[] a,
+                                            FReductionOp f, FReductionAllOp fa,
+                                            $type$ relativeErrorFactor) {
+        int i = 0;
+        try {
+            assertEquals(rc, fa.apply(a), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(rc)), shortBitsToFloat16(relativeErrorFactor))));
+            for (; i < a.length; i += SPECIES.length()) {
+                assertEquals(r[i], f.apply(a, i), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(r[i])), shortBitsToFloat16(relativeErrorFactor))));
+            }
+        } catch (AssertionError e) {
+            assertEquals(rc, fa.apply(a), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(rc)), shortBitsToFloat16(relativeErrorFactor))), "Final result is incorrect!");
+            assertEquals(r[i], f.apply(a, i), float16ToRawShortBits(Float16.multiply(Float16.ulp(shortBitsToFloat16(r[i])), shortBitsToFloat16(relativeErrorFactor))), "at index #" + i);
+        }
+    }
+#else[FP16]

    static void assertReductionArraysEquals($type$[] r, $type$ rc, $type$[] a,
                                            FReductionOp f, FReductionAllOp fa,
@ -280,6 +335,7 @@ public class $vectorteststype$ extends AbstractVectorTest {
            assertEquals(r[i], f.apply(a, i), Math.ulp(r[i]) * relativeErrorFactor, "at index #" + i);
        }
    }
+#end[FP16]
 #end[FP]

    interface FReductionMaskedOp {
@ -308,6 +364,23 @@ public class $vectorteststype$ extends AbstractVectorTest {
 #end[FP]
    }
 #if[FP]
+#if[FP16]
+
+    static void assertReductionArraysEqualsMasked($type$[] r, $type$ rc, $type$[] a, boolean[] mask,
+                                            FReductionMaskedOp f, FReductionAllMaskedOp fa,
+                                            $type$ relativeError) {
+        int i = 0;
+        try {
+            assertEquals(rc, fa.apply(a, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(rc), shortBitsToFloat16(relativeError)))));
+            for (; i < a.length; i += SPECIES.length()) {
+                assertEquals(r[i], f.apply(a, i, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(r[i]), shortBitsToFloat16(relativeError)))));
+            }
+        } catch (AssertionError e) {
+            assertEquals(rc, fa.apply(a, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(rc), shortBitsToFloat16(relativeError)))), "Final result is incorrect!");
+            assertEquals(r[i], f.apply(a, i, mask), float16ToRawShortBits(Float16.abs(Float16.multiply(shortBitsToFloat16(r[i]), shortBitsToFloat16(relativeError)))), "at index #" + i);
+        }
+    }
+#else[FP16]

    static void assertReductionArraysEqualsMasked($type$[] r, $type$ rc, $type$[] a, boolean[] mask,
                                            FReductionMaskedOp f, FReductionAllMaskedOp fa,
@ -324,6 +397,7 @@ relativeError));
            assertEquals(r[i], f.apply(a, i, mask), Math.abs(r[i] * relativeError), "at index #" + i);
        }
    }
+#end[FP16]
 #end[FP]

 #if[!Long]
@ -473,7 +547,7 @@ relativeError));
            for (; i < a.length; i += vector_len) {
                for (j = 0; j < vector_len; j++) {
                    idx = i + j;
-                    wrapped_index = Math.floorMod((int)order[idx], 2 * vector_len);
+                    wrapped_index = Math.floorMod({#if[FP16]?shortBitsToFloat16(order[idx]).intValue():(int)order[idx]}, 2 * vector_len);
                    is_exceptional_idx = wrapped_index >= vector_len;
                    oidx = is_exceptional_idx ? (wrapped_index - vector_len) : wrapped_index;
                    assertEquals(r[idx], (is_exceptional_idx ? b[i + oidx] : a[i + oidx]));
@ -489,12 +563,12 @@ relativeError));
        try {
            for (; i < a.length; i += vector_len) {
                for (j = 0; j < vector_len; j++) {
-                    assertEquals(r[i+j], a[i+(int)order[i+j]]);
+                    assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}]);
                }
            }
        } catch (AssertionError e) {
            int idx = i + j;
-            assertEquals(r[i+j], a[i+(int)order[i+j]], "at index #" + idx + ", input = " + a[i+(int)order[i+j]]);
+            assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}], "at index #" + idx + ", input = " + a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}]);
        }
    }

@ -524,7 +598,7 @@ relativeError));
            for (; i < a.length; i += vector_len) {
                for (j = 0; j < vector_len; j++) {
                    if (mask[j % SPECIES.length()])
-                         assertEquals(r[i+j], a[i+(int)order[i+j]]);
+                         assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}]);
                    else
                         assertEquals(r[i+j], ($type$)0);
                }
@ -532,7 +606,7 @@ relativeError));
        } catch (AssertionError e) {
            int idx = i + j;
            if (mask[j % SPECIES.length()])
-                assertEquals(r[i+j], a[i+(int)order[i+j]], "at index #" + idx + ", input = " + a[i+(int)order[i+j]] + ", mask = " + mask[j % SPECIES.length()]);
+                assertEquals(r[i+j], a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}], "at index #" + idx + ", input = " + a[i+{#if[FP16]?shortBitsToFloat16(order[i+j]).intValue():(int)order[i+j]}] + ", mask = " + mask[j % SPECIES.length()]);
            else
                assertEquals(r[i+j], ($type$)0, "at index #" + idx + ", input = " + a[i+(int)order[i+j]] + ", mask = " + mask[j % SPECIES.length()]);
        }
@ -681,10 +755,10 @@ relativeError));
        int i = 0;
        try {
            for (; i < a.length; i++) {
-                assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])));
+                assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])}));
            }
        } catch (AssertionError e) {
-            assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])),
+            assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])}),
                                "(" + a[i] + ", " + b[(i / SPECIES.length()) * SPECIES.length()] + ") at index #" + i);
        }
    }
@ -745,10 +819,10 @@ relativeError));
        int i = 0;
        try {
            for (; i < a.length; i++) {
-                assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()]), mask[i % SPECIES.length()]));
+                assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])}, mask[i % SPECIES.length()]));
            }
        } catch (AssertionError err) {
-            assertEquals(r[i], f.apply(a[i], ($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()]),
+            assertEquals(r[i], f.apply(a[i], {#if[FP16]?float16ToRawShortBits(Float16.valueOf(shortBitsToFloat16(b[(i / SPECIES.length()) * SPECIES.length()]).longValue())):($type$)((long)b[(i / SPECIES.length()) * SPECIES.length()])},
                                mask[i % SPECIES.length()]), "at index #" + i + ", input1 = " + a[i] +
                                ", input2 = " + b[(i / SPECIES.length()) * SPECIES.length()] + ", mask = " +
                                mask[i % SPECIES.length()]);
@ -978,6 +1052,26 @@ relativeError));

 #if[FP]
    static boolean isWithin1Ulp($type$ actual, $type$ expected) {
+#if[FP16]
+        Float16 act = shortBitsToFloat16(actual);
+        Float16 exp = shortBitsToFloat16(expected);
+        if (Float16.isNaN(exp) && !Float16.isNaN(act)) {
+            return false;
+        } else if (!Float16.isNaN(exp) && Float16.isNaN(act)) {
+             return false;
+        }
+
+        Float16 low = Float16.nextDown(exp);
+        Float16 high = Float16.nextUp(exp);
+
+        if (Float16.compare(low, exp) > 0) {
+            return false;
+        }
+
+        if (Float16.compare(high, exp) < 0) {
+            return false;
+        }
+#else[FP16]
        if ($Type$.isNaN(expected) && !$Type$.isNaN(actual)) {
            return false;
        } else if (!$Type$.isNaN(expected) && $Type$.isNaN(actual)) {
@ -994,6 +1088,7 @@ relativeError));
        if ($Type$.compare(high, expected) < 0) {
            return false;
        }
+#end[FP16]

        return true;
    }
@ -1003,11 +1098,11 @@ relativeError));
        try {
            // Check that result is within 1 ulp of strict math or equivalent to math implementation.
            for (; i < a.length; i++) {
-                Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i])) == 0 ||
+                Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i])):mathf.apply(a[i])}) == 0 ||
                                    isWithin1Ulp(r[i], strictmathf.apply(a[i])));
            }
        } catch (AssertionError e) {
-            Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i])) == 0, "at index #" + i + ", input = " + a[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i]));
+            Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i])):mathf.apply(a[i])}) == 0, "at index #" + i + ", input = " + a[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i]));
            Assert.assertTrue(isWithin1Ulp(r[i], strictmathf.apply(a[i])), "at index #" + i + ", input = " + a[i] + ", actual = " + r[i] + ", expected (within 1 ulp) = " + strictmathf.apply(a[i]));
        }
    }
@ -1017,11 +1112,11 @@ relativeError));
        try {
            // Check that result is within 1 ulp of strict math or equivalent to math implementation.
            for (; i < a.length; i++) {
-                Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i], b[i])) == 0 ||
+                Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[i])):mathf.apply(a[i], b[i])}) == 0 ||
                                    isWithin1Ulp(r[i], strictmathf.apply(a[i], b[i])));
            }
        } catch (AssertionError e) {
-            Assert.assertTrue($Type$.compare(r[i], mathf.apply(a[i], b[i])) == 0, "at index #" + i + ", input1 = " + a[i] + ", input2 = " + b[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i], b[i]));
+            Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]}, {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[i])):mathf.apply(a[i], b[i])}) == 0, "at index #" + i + ", input1 = " + a[i] + ", input2 = " + b[i] + ", actual = " + r[i] + ", expected = " + mathf.apply(a[i], b[i]));
            Assert.assertTrue(isWithin1Ulp(r[i], strictmathf.apply(a[i], b[i])), "at index #" + i + ", input1 = " + a[i] + ", input2 = " + b[i] + ", actual = " + r[i] + ", expected (within 1 ulp) = " + strictmathf.apply(a[i], b[i]));
        }
    }
@ -1032,14 +1127,14 @@ relativeError));
        try {
            // Check that result is within 1 ulp of strict math or equivalent to math implementation.
            for (; i < a.length; i++) {
-                Assert.assertTrue($Type$.compare(r[i],
-                                  mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])) == 0 ||
+                Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]},
+                                  {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])):mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])}) == 0 ||
                                  isWithin1Ulp(r[i],
                                  strictmathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])));
            }
        } catch (AssertionError e) {
-            Assert.assertTrue($Type$.compare(r[i],
-                              mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])) == 0,
+            Assert.assertTrue($Wideboxtype$.compare({#if[FP16]?shortBitsToFloat16(r[i]):r[i]},
+                              {#if[FP16]?shortBitsToFloat16(mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])):mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()])}) == 0,
                              "at index #" + i + ", input1 = " + a[i] + ", input2 = " +
                              b[(i / SPECIES.length()) * SPECIES.length()] + ", actual = " + r[i] +
                              ", expected = " + mathf.apply(a[i], b[(i / SPECIES.length()) * SPECIES.length()]));
@ -1236,6 +1331,7 @@ relativeError));

 #if[!Int]
 #if[!byteOrShort]
+#if[!FP16]
    static int intCornerCaseValue(int i) {
        switch(i % 5) {
            case 0:
@ -1250,7 +1346,45 @@ relativeError));
                return (int)0;
        }
    }
+#end[!FP16]

+#if[FP16]
+    static $type$ convToFloat16(int i) {
+        return float16ToRawShortBits(Float16.valueOf(i));
+    }
+
+    static $type$ convIntToFloat16CornerCases(int i) {
+        switch(i % 4) {
+            case 0:
+                return convToFloat16(65504);
+            case 1:
+                return convToFloat16(-65504);
+            case 2:
+                return float16ToRawShortBits(Float16.valueOf(-0.0f));
+            default:
+                return float16ToRawShortBits(Float16.valueOf(0.0f));
+        }
+    }
+
+    static final List<IntFunction<$type$[]>> INT_$TYPE$_GENERATORS = List.of(
+            withToString("float16[-i * 5]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> convToFloat16(-i * 5));
+            }),
+            withToString("float16[i * 5]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> convToFloat16(i * 5));
+            }),
+            withToString("float16[i + 1]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> ((($type$)(i + 1) == 0) ? convToFloat16(1) : convToFloat16(i + 1)));
+            }),
+            withToString("float16[intCornerCaseValue(i)]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> convIntToFloat16CornerCases(i));
+            })
+    );
+#else[FP16]
    static final List<IntFunction<$type$[]>> INT_$TYPE$_GENERATORS = List.of(
            withToString("$type$[-i * 5]", (int s) -> {
                return fill(s * BUFFER_REPS,
@ -1269,6 +1403,7 @@ relativeError));
                            i -> ($type$)intCornerCaseValue(i));
            })
    );
+#end[FP16]
 #end[!byteOrShort]
 #end[!Int]

@ -1276,7 +1411,11 @@ relativeError));
        int i = 0;
        try {
            for (; i < r.length; i++) {
+#if[FP16]
+                assertEquals(r[i], (int)Float.float16ToFloat(a[i+offs]));
+#else[FP16]
                assertEquals(r[i], (int)(a[i+offs]));
+#end[FP16]
            }
        } catch (AssertionError e) {
            assertEquals(r[i], (int)(a[i+offs]), "at index #" + i + ", input = " + a[i+offs]);
@ -1285,6 +1424,7 @@ relativeError));

 #if[!Long]
 #if[FP]
+#if[!FP16]
    static long longCornerCaseValue(int i) {
        switch(i % 5) {
            case 0:
@ -1299,7 +1439,45 @@ relativeError));
                return (long)0;
        }
    }
+#end[!FP16]

+#if[FP16]
+    static $type$ convToFloat16(long i) {
+        return float16ToRawShortBits(Float16.valueOf(i));
+    }
+
+    static $type$ convLongToFloat16CornerCases(int i) {
+        switch(i % 4) {
+            case 0:
+                return convToFloat16(65504L);
+            case 1:
+                return convToFloat16(-65504L);
+            case 2:
+                return float16ToRawShortBits(Float16.valueOf(-0.0f));
+            default:
+                return float16ToRawShortBits(Float16.valueOf(0.0f));
+        }
+    }
+
+    static final List<IntFunction<$type$[]>> LONG_$TYPE$_GENERATORS = List.of(
+            withToString("float16[-i * 5]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> convToFloat16(-i * 5));
+            }),
+            withToString("float16[i * 5]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> convToFloat16(i * 5));
+            }),
+            withToString("float16[i + 1]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> ((($type$)(i + 1) == 0) ? convToFloat16(1) : convToFloat16(i + 1)));
+            }),
+            withToString("float16[cornerCaseValue(i)]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i ->  convLongToFloat16CornerCases(i));
+            })
+    );
+#else[FP16]
    static final List<IntFunction<$type$[]>> LONG_$TYPE$_GENERATORS = List.of(
            withToString("$type$[-i * 5]", (int s) -> {
                return fill(s * BUFFER_REPS,
@ -1318,6 +1496,7 @@ relativeError));
                            i -> ($type$)longCornerCaseValue(i));
            })
    );
+#end[FP16]
 #end[FP]
 #end[!Long]

@ -1338,7 +1517,11 @@ relativeError));
        int i = 0;
        try {
            for (; i < r.length; i++) {
+#if[FP16]
+                assertEquals(r[i], (long)Float.float16ToFloat(a[i+offs]));
+#else[FP16]
                assertEquals(r[i], (long)(a[i+offs]));
+#end[FP16]
            }
        } catch (AssertionError e) {
            assertEquals(r[i], (long)(a[i+offs]), "at index #" + i + ", input = " + a[i+offs]);
@ -1350,7 +1533,11 @@ relativeError));
        int i = 0;
        try {
            for (; i < r.length; i++) {
+#if[FP16]
+                assertEquals(r[i], (double)Float.float16ToFloat(a[i+offs]));
+#else[FP16]
                assertEquals(r[i], (double)(a[i+offs]));
+#end[FP16]
            }
        } catch (AssertionError e) {
            assertEquals(r[i], (double)(a[i+offs]), "at index #" + i + ", input = " + a[i+offs]);
@ -1358,8 +1545,40 @@ relativeError));
    }
 #end[!Double]

+#if[FP16]
    static $bitstype$ bits($type$ e) {
-        return {#if[FP]? $Type$.$type$To$Bitstype$Bits(e): e};
+        return e;
+    }
+
+    static final List<IntFunction<$type$[]>> $TYPE$_GENERATORS = List.of(
+            withToString("float16[-i * 5]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> convToFloat16(-i * 5));
+            }),
+            withToString("float16[i * 5]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> convToFloat16(i * 5));
+            }),
+            withToString("float16[i + 1]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> ((($type$)(i + 1) == 0) ? convToFloat16(1) : convToFloat16(i + 1)));
+            }),
+            withToString("float16[0.01 + (i / (i + 1))]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> Float.floatToFloat16((0.01f + ((float)i / (i + 1)))));
+            }),
+            withToString("float16[i -> i % 17 == 0 ? cornerCaseValue(i) : 0.01f + (i / (i + 1))]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> (i % 17 == 0) ? cornerCaseValue(i) : Float.floatToFloat16((0.01f + ((float)i / (i + 1)))));
+            }),
+            withToString("float16[cornerCaseValue(i)]", (int s) -> {
+                return fill(s * BUFFER_REPS,
+                            i -> cornerCaseValue(i));
+            })
+    );
+#else[FP16]
+    static $bitstype$ bits($type$ e) {
+        return  {#if[FP]?$Wideboxtype$.$type$To$Bitstype$Bits(e):e};
    }

    static final List<IntFunction<$type$[]>> $TYPE$_GENERATORS = List.of(
@ -1394,6 +1613,7 @@ relativeError));
                            i -> cornerCaseValue(i));
            })
    );
+#end[FP16]

 #if[!FP]
    static final List<IntFunction<$type$[]>> $TYPE$_SATURATING_GENERATORS = List.of(
@ -1472,7 +1692,7 @@ relativeError));
                collect(Collectors.toList());

    static final List<IntFunction<$type$[]>> SELECT_FROM_INDEX_GENERATORS = List.of(
-            withToString("$type$[0..VECLEN*2)", (int s) -> {
+            withToString("$testtype$[0..VECLEN*2)", (int s) -> {
                return fill(s * BUFFER_REPS,
                            i -> ($type$)(RAND.nextInt()));
            })
@ -1709,27 +1929,27 @@ relativeError));
 #end[!Int]

    static final List<IntFunction<$type$[]>> $TYPE$_COMPARE_GENERATORS = List.of(
-            withToString("$type$[i]", (int s) -> {
+            withToString("$testtype$[i]", (int s) -> {
                return fill(s * BUFFER_REPS,
                            i -> ($type$)i);
            }),
-            withToString("$type$[i - length / 2]", (int s) -> {
+            withToString("$testtype$[i - length / 2]", (int s) -> {
                return fill(s * BUFFER_REPS,
                            i -> ($type$)(i - (s * BUFFER_REPS / 2)));
            }),
-            withToString("$type$[i + 1]", (int s) -> {
+            withToString("$testtype$[i + 1]", (int s) -> {
                return fill(s * BUFFER_REPS,
                            i -> ($type$)(i + 1));
            }),
-            withToString("$type$[i - 2]", (int s) -> {
+            withToString("$testtype$[i - 2]", (int s) -> {
                return fill(s * BUFFER_REPS,
                            i -> ($type$)(i - 2));
            }),
-            withToString("$type$[zigZag(i)]", (int s) -> {
+            withToString("$testtype$[zigZag(i)]", (int s) -> {
                return fill(s * BUFFER_REPS,
                            i -> i%3 == 0 ? ($type$)i : (i%3 == 1 ? ($type$)(i + 1) : ($type$)(i - 2)));
            }),
-            withToString("$type$[cornerCaseValue(i)]", (int s) -> {
+            withToString("$testtype$[cornerCaseValue(i)]", (int s) -> {
                return fill(s * BUFFER_REPS,
                            i -> cornerCaseValue(i));
            })
@ -1792,6 +2012,18 @@ relativeError));

    static $type$ cornerCaseValue(int i) {
 #if[FP]
+#if[FP16]
+        return switch(i % 8) {
+            case 0  -> float16ToRawShortBits($Wideboxtype$.MAX_VALUE);
+            case 1  -> float16ToRawShortBits($Wideboxtype$.MIN_VALUE);
+            case 2  -> float16ToRawShortBits($Wideboxtype$.NEGATIVE_INFINITY);
+            case 3  -> float16ToRawShortBits($Wideboxtype$.POSITIVE_INFINITY);
+            case 4  -> float16ToRawShortBits($Wideboxtype$.NaN);
+            case 5  -> float16ToRawShortBits(shortBitsToFloat16((short)0x7FFA));
+            case 6  -> float16ToShortBits(Float16.valueOf(0.0f));
+            default -> float16ToShortBits(Float16.valueOf(-0.0f));
+        };
+#else[FP16]
        return switch(i % 8) {
            case 0  -> $Wideboxtype$.MAX_VALUE;
            case 1  -> $Wideboxtype$.MIN_VALUE;
@ -1806,6 +2038,7 @@ relativeError));
            case 6  -> ($type$)0.0;
            default -> ($type$)-0.0;
        };
+#end[FP16]
 #else[FP]
        switch(i % 5) {
            case 0:
@ -1923,6 +2156,262 @@ relativeError));
 #end[intOrLong]
    }
 #end[BITWISE]
+#if[FP16]
+
+    static boolean eq(short a, short b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return at.floatValue() == bt.floatValue();
+    }
+
+    static boolean neq(short a, short b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return at.floatValue() != bt.floatValue();
+    }
+
+    static boolean lt(short a, short b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return at.floatValue() < bt.floatValue();
+    }
+
+    static boolean le(short a, short b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return at.floatValue() <= bt.floatValue();
+    }
+
+    static boolean gt(short a, short b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return at.floatValue() > bt.floatValue();
+    }
+
+    static boolean ge(short a, short b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return at.floatValue() >= bt.floatValue();
+    }
+
+    static $type$ firstNonZero($type$ a, $type$ b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        Float16 zero = shortBitsToFloat16((short)0);
+        return Float16.compare(at, zero) != 0 ? a : b;
+    }
+
+    static $type$ scalar_add($type$ a, $type$ b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return float16ToRawShortBits(Float16.add(at, bt));
+    }
+
+    static $type$ scalar_sub($type$ a, $type$ b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return float16ToRawShortBits(Float16.subtract(at, bt));
+    }
+
+    static $type$ scalar_mul($type$ a, $type$ b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return float16ToRawShortBits(Float16.multiply(at, bt));
+
+    }
+    static $type$ scalar_max($type$ a, $type$ b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return float16ToRawShortBits(Float16.max(at, bt));
+    }
+
+    static $type$ scalar_min($type$ a, $type$ b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return float16ToRawShortBits(Float16.min(at, bt));
+    }
+
+    static $type$ scalar_div($type$ a, $type$ b) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        return float16ToRawShortBits(Float16.divide(at, bt));
+    }
+
+    static $type$ scalar_fma($type$ a, $type$ b, $type$ c) {
+        Float16 at = shortBitsToFloat16(a);
+        Float16 bt = shortBitsToFloat16(b);
+        Float16 ct = shortBitsToFloat16(c);
+        return float16ToRawShortBits(Float16.fma(at, bt, ct));
+    }
+
+    static $type$ scalar_abs($type$ a) {
+        Float16 at = shortBitsToFloat16(a);
+        return float16ToRawShortBits(Float16.abs(at));
+    }
+
+    static $type$ scalar_neg($type$ a) {
+        Float16 at = shortBitsToFloat16(a);
+        return float16ToRawShortBits(Float16.negate(at));
+    }
+
+    static $type$ scalar_sin($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.sin(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_exp($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.exp(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_log1p($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.log1p(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_log($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.log(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_log10($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.log10(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_expm1($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.expm1(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_cos($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.cos(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_tan($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.tan(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_sinh($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.sinh(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_cosh($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.cosh(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_tanh($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.tanh(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_asin($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.asin(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_acos($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.acos(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_atan($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.atan(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_cbrt($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.cbrt(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_sqrt($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(Math.sqrt(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ scalar_hypot($type$ a, $type$ b) {
+        return float16ToRawShortBits(Float16.valueOf(Math.hypot(shortBitsToFloat16(a).doubleValue(),
+                                                                     shortBitsToFloat16(b).doubleValue())));
+    }
+
+    static $type$ scalar_pow($type$ a, $type$ b) {
+        return float16ToRawShortBits(Float16.valueOf(Math.pow(shortBitsToFloat16(a).doubleValue(),
+                                                                   shortBitsToFloat16(b).doubleValue())));
+    }
+
+    static $type$ scalar_atan2($type$ a, $type$ b) {
+        return float16ToRawShortBits(Float16.valueOf(Math.atan2(shortBitsToFloat16(a).doubleValue(),
+                                                                     shortBitsToFloat16(b).doubleValue())));
+    }
+
+    static $type$ strict_scalar_sin($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.sin(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_exp($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.exp(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_log1p($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.log1p(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_log($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.log(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_log10($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.log10(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_expm1($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.expm1(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_cos($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.cos(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_tan($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.tan(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_sinh($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.sinh(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_cosh($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.cosh(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_tanh($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.tanh(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_asin($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.asin(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_acos($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.acos(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_atan($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.atan(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_cbrt($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.cbrt(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_sqrt($type$ a) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.sqrt(shortBitsToFloat16(a).doubleValue())));
+    }
+
+    static $type$ strict_scalar_hypot($type$ a, $type$ b) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.hypot(shortBitsToFloat16(a).doubleValue(),
+                                                                     shortBitsToFloat16(b).doubleValue())));
+    }
+
+    static $type$ strict_scalar_pow($type$ a, $type$ b) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.pow(shortBitsToFloat16(a).doubleValue(),
+                                                                   shortBitsToFloat16(b).doubleValue())));
+    }
+
+    static $type$ strict_scalar_atan2($type$ a, $type$ b) {
+        return float16ToRawShortBits(Float16.valueOf(StrictMath.atan2(shortBitsToFloat16(a).doubleValue(),
+                                                                     shortBitsToFloat16(b).doubleValue())));
+    }
+#else[FP16]

    static boolean eq($type$ a, $type$ b) {
        return a == b;
@ -2155,6 +2644,7 @@ relativeError));
        return ($type$)StrictMath.atan2((double)a, (double)b);
    }
 #end[!BITWISE]
+#end[FP16]
 #if[!FP]

    static boolean ult($type$ a, $type$ b) {
@ -2175,6 +2665,17 @@ relativeError));
 #end[!FP]

 #if[FP]
+#if[FP16]
+    static boolean isNaN($type$ a) {
+        return $Wideboxtype$.isNaN(shortBitsToFloat16(a));
+    }
+    static boolean isFinite($type$ a) {
+        return $Wideboxtype$.isFinite(shortBitsToFloat16(a));
+    }
+    static boolean isInfinite($type$ a) {
+        return $Wideboxtype$.isInfinite(shortBitsToFloat16(a));
+    }
+#else[FP16]
    static boolean isNaN($type$ a) {
        return $Wideboxtype$.isNaN(a);
    }
@ -2184,14 +2685,15 @@ relativeError));
    static boolean isInfinite($type$ a) {
        return $Wideboxtype$.isInfinite(a);
    }
+#end[FP16]
 #end[FP]

    @Test
    static void smokeTest1() {
-        $abstractvectortype$ three = $abstractvectortype$.broadcast(SPECIES, (byte)-3);
-        $abstractvectortype$ three2 = ($abstractvectortype$) SPECIES.broadcast(-3);
+        $abstractvectortype$ three = $abstractvectortype$.broadcast(SPECIES, {#if[FP16]?float16ToRawShortBits(Float16.valueOf(-3)):(byte)-3});
+        $abstractvectortype$ three2 = ($abstractvectortype$) SPECIES.broadcast({#if[FP16]?Float16.valueOf(-3).longValue():-3});
        assert(three.eq(three2).allTrue());
-        $abstractvectortype$ three3 = three2.broadcast(1).broadcast(-3);
+        $abstractvectortype$ three3 = three2.broadcast({#if[FP16]?float16ToRawShortBits(Float16.valueOf(1)):1}).broadcast({#if[FP16]?Float16.valueOf(-3).longValue():-3});
        assert(three.eq(three3).allTrue());
        int scale = 2;
        Class<?> ETYPE = $type$.class;
@ -2202,14 +2704,18 @@ relativeError));
        $abstractvectortype$ higher = three.addIndex(scale);
        VectorMask<$Boxtype$> m = three.compare(VectorOperators.LE, higher);
        assert(m.allTrue());
-        m = higher.min(($type$)-1).test(VectorOperators.IS_NEGATIVE);
+        m = higher.min({#if[FP16]?float16ToRawShortBits(Float16.valueOf(-1)):($type$)-1}).test(VectorOperators.IS_NEGATIVE);
        assert(m.allTrue());
 #if[FP]
        m = higher.test(VectorOperators.IS_FINITE);
        assert(m.allTrue());
 #end[FP]
        $type$ max = higher.reduceLanes(VectorOperators.MAX);
+#if[FP16]
+        assert(max == float16ToRawShortBits(Float16.add(Float16.valueOf(-3), Float16.multiply(Float16.valueOf(scale), Float16.valueOf((SPECIES.length()-1))))));
+#else[FP16]
        assert(max == -3 + scale * (SPECIES.length()-1));
+#end[FP16]
    }

    private static $type$[]
@ -2286,13 +2792,13 @@ relativeError));
        assertEquals(asFloating.species(), SPECIES);
    }
 #else[FP]
-#if[byteOrShort]
+#if[byte]

    @Test(expectedExceptions = UnsupportedOperationException.class)
    void viewAsFloatingLanesTest() {
        SPECIES.zero().viewAsFloatingLanes();
    }
-#else[byteOrShort]
+#else[byte]

    @Test
    void viewAsFloatingLanesTest() {
@ -2303,7 +2809,7 @@ relativeError));
        assertEquals(asFloatingSpecies.length(), SPECIES.length());
        assertEquals(asFloating.viewAsIntegralLanes().species(), SPECIES);
    }
-#end[byteOrShort]
+#end[byte]
 #end[FP]
 #if[BITWISE]

--- a/test/jdk/jdk/incubator/vector/templates/X-LoadStoreTest.java.template
+++ b/test/jdk/jdk/incubator/vector/templates/X-LoadStoreTest.java.template
@ -41,6 +41,10 @@
 import java.lang.foreign.MemorySegment;
 import java.lang.foreign.Arena;
 import java.lang.foreign.ValueLayout;
+#if[FP16]
+import jdk.incubator.vector.Float16;
+import jdk.incubator.vector.Float16Vector;
+#end[FP16]
 import jdk.incubator.vector.$Type$Vector;
 import jdk.incubator.vector.VectorMask;
 #if[MaxBit]
@ -69,7 +73,11 @@ public class $vectorteststype$ extends AbstractVectorLoadStoreTest {

    static final int INVOC_COUNT = Integer.getInteger("jdk.incubator.vector.test.loop-iterations", 100);

+#if[FP16]
+    static final ValueLayout.OfShort ELEMENT_LAYOUT = ValueLayout.JAVA_SHORT.withByteAlignment(1);
+#else[FP16]
    static final ValueLayout.Of$Type$ ELEMENT_LAYOUT = ValueLayout.JAVA_$TYPE$.withByteAlignment(1);
+#end[FP16]

 #if[MaxBit]
    static VectorShape getMaxBit() {
@ -81,6 +89,29 @@ public class $vectorteststype$ extends AbstractVectorLoadStoreTest {

    static final int BUFFER_REPS = Integer.getInteger("jdk.incubator.vector.test.buffer-vectors", 25000 / $bits$);

+#if[FP16]
+    static void assertEquals(short actual, short expected) {
+        Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected));
+    }
+
+    static void assertEquals(short actual, short expected, String msg) {
+        Assert.assertEquals(Float.float16ToFloat(actual), Float.float16ToFloat(expected), msg);
+    }
+
+    static void assertEquals(short [] actual, short [] expected) {
+        assert actual.length == expected.length;
+        for (int i = 0; i < actual.length; i++) {
+            Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]));
+        }
+    }
+
+    static void assertEquals(short [] actual, short [] expected, String msg) {
+        assert actual.length == expected.length;
+        for (int i = 0; i < actual.length; i++) {
+            Assert.assertEquals(Float.float16ToFloat(actual[i]), Float.float16ToFloat(expected[i]), msg);
+        }
+    }
+#else[FP16]
    static void assertEquals($type$ actual, $type$ expected) {
        Assert.assertEquals(actual, expected);
    }
@ -95,6 +126,7 @@ public class $vectorteststype$ extends AbstractVectorLoadStoreTest {
    static void assertEquals($type$ [] actual, $type$ [] expected, String msg) {
        Assert.assertEquals(actual, expected, msg);
    }
+#end[FP16]

    static void assertArraysEquals($type$[] r, $type$[] a, boolean[] mask) {
        int i = 0;
--- a/test/micro/org/openjdk/bench/jdk/incubator/vector/Float16VectorOperationsBenchmark.java
+++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/Float16VectorOperationsBenchmark.java
@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.java.lang;
+
+import java.util.stream.IntStream;
+import java.util.concurrent.TimeUnit;
+import jdk.incubator.vector.*;
+import org.openjdk.jmh.annotations.*;
+import static jdk.incubator.vector.Float16.*;
+import static java.lang.Float.*;
+
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Thread)
+@Fork(jvmArgs = {"--add-modules=jdk.incubator.vector", "-Xbatch", "-XX:-TieredCompilation"})
+public class Float16VectorOperationsBenchmark {
+    @Param({"1024", "2057"})
+    int vectorDim;
+
+    int   [] rexp;
+    short [] vectorRes;
+    short [] vector1;
+    short [] vector2;
+    short [] vector3;
+    short [] vector4;
+    short [] vector5;
+    boolean [] vectorPredicate;
+
+    static final short f16_one = Float.floatToFloat16(1.0f);
+    static final short f16_two = Float.floatToFloat16(2.0f);
+
+    @Setup(Level.Trial)
+    public void BmSetup() {
+        rexp      = new int[vectorDim];
+        vectorRes = new short[vectorDim];
+        vector1   = new short[vectorDim];
+        vector2   = new short[vectorDim];
+        vector3   = new short[vectorDim];
+        vector4   = new short[vectorDim];
+        vector5   = new short[vectorDim];
+        vectorPredicate = new boolean[vectorDim];
+
+        IntStream.range(0, vectorDim).forEach(i -> {vector1[i] = Float.floatToFloat16((float)i);});
+        IntStream.range(0, vectorDim).forEach(i -> {vector2[i] = Float.floatToFloat16((float)i);});
+        IntStream.range(0, vectorDim).forEach(i -> {vector3[i] = Float.floatToFloat16((float)i);});
+        IntStream.range(0, vectorDim).forEach(i -> {vector4[i] = ((i & 0x1) == 0) ?
+                                                                  float16ToRawShortBits(Float16.POSITIVE_INFINITY) :
+                                                                  Float.floatToFloat16((float)i);});
+        IntStream.range(0, vectorDim).forEach(i -> {vector5[i] = ((i & 0x1) == 0) ?
+                                                                  float16ToRawShortBits(Float16.NaN) :
+                                                                  Float.floatToFloat16((float)i);});
+        // Special Values
+        Float16 [] specialValues = {Float16.NaN, Float16.NEGATIVE_INFINITY, Float16.valueOf(0.0), Float16.valueOf(-0.0), Float16.POSITIVE_INFINITY};
+        IntStream.range(0, vectorDim).forEach(
+            i -> {
+                if ((i % 64) == 0) {
+                    int idx1 = i % specialValues.length;
+                    int idx2 = (i + 1) % specialValues.length;
+                    int idx3 = (i + 2) % specialValues.length;
+                    vector1[i] = float16ToRawShortBits(specialValues[idx1]);
+                    vector2[i] = float16ToRawShortBits(specialValues[idx2]);
+                    vector3[i] = float16ToRawShortBits(specialValues[idx3]);
+                }
+            }
+        );
+    }
+
+    static final VectorSpecies<Float16> HSPECIES = Float16Vector.SPECIES_PREFERRED;
+    static final VectorSpecies<Float> FSPECIES = FloatVector.SPECIES_PREFERRED;
+
+    @Benchmark
+    public void addBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.ADD,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i))
+                         .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                         .lanewise(VectorOperators.ADD,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i))
+                         .intoArray(vectorRes, i, mask);
+        }
+    }
+
+    @Benchmark
+    public void subBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.SUB,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i))
+                         .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.SUB,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i))
+                         .intoArray(vectorRes, i);
+        }
+    }
+
+    @Benchmark
+    public void mulBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.MUL,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i))
+                         .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                         .lanewise(VectorOperators.MUL,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i, mask))
+                         .intoArray(vectorRes, i, mask);
+        }
+    }
+
+    @Benchmark
+    public void divBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.DIV,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i))
+                         .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                         .lanewise(VectorOperators.DIV,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i, mask))
+                         .intoArray(vectorRes, i, mask);
+        }
+    }
+
+    @Benchmark
+    public void fmaBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.FMA,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i),
+                                   Float16Vector.fromArray(HSPECIES, vector3, i))
+                         .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                         .lanewise(VectorOperators.FMA,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i, mask),
+                                   Float16Vector.fromArray(HSPECIES, vector3, i, mask))
+                         .intoArray(vectorRes, i, mask);
+        }
+    }
+
+    @Benchmark
+    public void maxBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.MAX,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i))
+                         .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                         .lanewise(VectorOperators.MAX,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i, mask))
+                         .intoArray(vectorRes, i, mask);
+        }
+    }
+
+    @Benchmark
+    public void minBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                          .lanewise(VectorOperators.MIN,
+                                    Float16Vector.fromArray(HSPECIES, vector2, i))
+                          .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                         .lanewise(VectorOperators.MIN,
+                                   Float16Vector.fromArray(HSPECIES, vector2, i, mask))
+                         .intoArray(vectorRes, i, mask);
+        }
+    }
+
+    @Benchmark
+    public void sqrtBenchmark() {
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i +=  HSPECIES.length()) {
+            Float16Vector.fromArray(HSPECIES, vector1, i)
+                         .lanewise(VectorOperators.SQRT)
+                         .intoArray(vectorRes, i);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                         .lanewise(VectorOperators.SQRT)
+                         .intoArray(vectorRes, i, mask);
+        }
+    }
+
+    @Benchmark
+    public short cosineSimilarityDoubleRoundingFP16() {
+        int i = 0;
+        Float16Vector macResVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        Float16Vector vector1SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        Float16Vector vector2SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        // cosine distance = (VEC1 . VEC2) / ||VEC1||.||VEC2||
+        for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
+            // Explicit add and multiply operation ensures double rounding.
+            Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i);
+            Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i);
+            macResVec = vec1.lanewise(VectorOperators.MUL, vec2)
+                            .lanewise(VectorOperators.ADD, macResVec);
+            vector1SquareVec = vec1.lanewise(VectorOperators.MUL, vec1)
+                                   .lanewise(VectorOperators.ADD, vector1SquareVec);
+            vector2SquareVec = vec2.lanewise(VectorOperators.MUL, vec2)
+                                   .lanewise(VectorOperators.ADD, vector2SquareVec);
+        }
+
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i, mask);
+            Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i, mask);
+            macResVec = vec1.lanewise(VectorOperators.MUL, vec2)
+                            .lanewise(VectorOperators.ADD, macResVec);
+            vector1SquareVec = vec1.lanewise(VectorOperators.MUL, vec1)
+                                   .lanewise(VectorOperators.ADD, vector1SquareVec);
+            vector2SquareVec = vec2.lanewise(VectorOperators.MUL, vec2)
+                                   .lanewise(VectorOperators.ADD, vector2SquareVec);
+            return macResVec.lanewise(VectorOperators.DIV,
+                                      vector1SquareVec.lanewise(VectorOperators.MUL,
+                                                                vector2SquareVec))
+                            .reduceLanes(VectorOperators.ADD, mask);
+        } else {
+            return macResVec.lanewise(VectorOperators.DIV,
+                                      vector1SquareVec.lanewise(VectorOperators.MUL,
+                                                                vector2SquareVec))
+                            .reduceLanes(VectorOperators.ADD);
+        }
+    }
+
+    @Benchmark
+    public short cosineSimilaritySingleRoundingFP16() {
+        int i = 0;
+        Float16Vector macResVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        Float16Vector vector1SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        Float16Vector vector2SquareVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        // cosine distance = (VEC1 . VEC2) / ||VEC1||.||VEC2||
+        for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
+            // Explicit add and multiply operation ensures double rounding.
+            Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i);
+            Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i);
+            macResVec = vec1.lanewise(VectorOperators.FMA, vec2, macResVec);
+            vector1SquareVec = vec1.lanewise(VectorOperators.FMA, vec1, vector1SquareVec);
+            vector2SquareVec = vec2.lanewise(VectorOperators.FMA, vec2, vector2SquareVec);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector vec1 = Float16Vector.fromArray(HSPECIES, vector1, i, mask);
+            Float16Vector vec2 = Float16Vector.fromArray(HSPECIES, vector2, i, mask);
+            macResVec = vec1.lanewise(VectorOperators.FMA, vec2, macResVec);
+            vector1SquareVec = vec1.lanewise(VectorOperators.FMA, vec1, vector1SquareVec);
+            vector2SquareVec = vec2.lanewise(VectorOperators.FMA, vec2, vector2SquareVec);
+            return macResVec.lanewise(VectorOperators.DIV,
+                                      vector1SquareVec.lanewise(VectorOperators.MUL,
+                                                                vector2SquareVec))
+                            .reduceLanes(VectorOperators.ADD, mask);
+        } else {
+            return macResVec.lanewise(VectorOperators.DIV,
+                                      vector1SquareVec.lanewise(VectorOperators.MUL,
+                                                                vector2SquareVec))
+                            .reduceLanes(VectorOperators.ADD);
+        }
+    }
+
+    @Benchmark
+    public short euclideanDistanceFP16() {
+        Float16Vector resVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
+            Float16Vector diffVec = Float16Vector.fromArray(HSPECIES, vector1, i)
+                                                 .lanewise(VectorOperators.SUB,
+                                                           Float16Vector.fromArray(HSPECIES, vector2, i));
+            resVec = diffVec.lanewise(VectorOperators.FMA, diffVec, resVec);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            Float16Vector diffVec = Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                                                 .lanewise(VectorOperators.SUB,
+                                                           Float16Vector.fromArray(HSPECIES, vector2, i, mask));
+            resVec = diffVec.lanewise(VectorOperators.FMA, diffVec, resVec);
+            return resVec.lanewise(VectorOperators.SQRT)
+                         .reduceLanes(VectorOperators.ADD, mask);
+        } else {
+            return resVec.lanewise(VectorOperators.SQRT)
+                         .reduceLanes(VectorOperators.ADD);
+        }
+    }
+
+    @Benchmark
+    public short dotProductFP16() {
+        Float16Vector distResVec = Float16Vector.broadcast(HSPECIES, (short)0);
+        int i = 0;
+        for (; i < HSPECIES.loopBound(vectorDim); i += HSPECIES.length()) {
+            distResVec = Float16Vector.fromArray(HSPECIES, vector1, i)
+                                      .lanewise(VectorOperators.FMA,
+                                                Float16Vector.fromArray(HSPECIES, vector2, i),
+                                                distResVec);
+        }
+        if (i < vectorDim) {
+            VectorMask<Float16> mask = HSPECIES.indexInRange(i, vectorDim);
+            distResVec = Float16Vector.fromArray(HSPECIES, vector1, i, mask)
+                                      .lanewise(VectorOperators.FMA,
+                                                Float16Vector.fromArray(HSPECIES, vector2, i, mask),
+                                                distResVec);
+            return distResVec.reduceLanes(VectorOperators.ADD, mask);
+        } else {
+            return distResVec.reduceLanes(VectorOperators.ADD);
+        }
+    }
+}