8368822: Refactor Float16.valueOf(double)

Reviewed-by: rgiulietti
2026-04-29 16:14:58 +00:00 · 2025-09-29 14:48:04 +00:00 · 2025-09-29 14:48:04 +00:00 · 63688d894e
commit 63688d894e
parent 9d9c0e0670
1 changed files with 35 additions and 23 deletions
--- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16.java
+++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float16.java
@ -222,6 +222,16 @@ public final class Float16
     */
    public static final int BYTES = SIZE / Byte.SIZE;

+    /**
+     * The overflow threshold (for round to nearest) is MAX_VALUE + 1/2 ulp.
+     */
+    private static final double OVERFLOW_THRESH = 0x1.ffcp15 + 0x0.002p15;
+
+    /**
+     * The underflow threshold (for round to nearest) is MIN_VALUE * 0.5.
+     */
+    private static final double UNDERFLOW_THRESH = 0x1.0p-24d * 0.5d;
+
    /**
     * Returns a string representation of the {@code Float16}
     * argument.
@ -340,51 +350,51 @@ public final class Float16
    * @param  d a {@code double}
    */
    public static Float16 valueOf(double d) {
-        long doppel = Double.doubleToRawLongBits(d);
-
-        short sign_bit = (short)((doppel & 0x8000_0000_0000_0000L) >> 48);
-
        if (Double.isNaN(d)) {
            // Have existing float code handle any attempts to
            // preserve NaN bits.
            return valueOf((float)d);
        }

+        long doppel = Double.doubleToRawLongBits(d);
+        short sign_bit = (short)((doppel & 0x8000_0000_0000_0000L) >> (64 - 16));
        double abs_d = Math.abs(d);

-        // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp
-        if (abs_d >= (0x1.ffcp15 + 0x0.002p15) ) {
+        if (abs_d >= OVERFLOW_THRESH) {
             // correctly signed infinity
            return new Float16((short)(sign_bit | 0x7c00));
        }

-        // Smallest magnitude nonzero representable binary16 value
-        // is equal to 0x1.0p-24; half-way and smaller rounds to zero.
-        if (abs_d <= 0x1.0p-24d * 0.5d) { // Covers double zeros and subnormals.
-            return new Float16(sign_bit); // Positive or negative zero
+        if (abs_d <= UNDERFLOW_THRESH) { // Covers double zeros and subnormals.
+            // positive or negative zero
+            return new Float16(sign_bit);
        }

        // Dealing with finite values in exponent range of binary16
        // (when rounding is done, could still round up)
        int exp = Math.getExponent(d);
-        assert -25 <= exp && exp <= 15;
+        assert
+            (MIN_EXPONENT - PRECISION) <= exp &&
+            exp <= MAX_EXPONENT;

-        // For binary16 subnormals, beside forcing exp to -15, retain
-        // the difference expdelta = E_min - exp.  This is the excess
-        // shift value, in addition to 42, to be used in the
+        // For target format subnormals, beside forcing exp to
+        // MIN_EXPONENT-1, retain the difference expdelta = E_min -
+        // exp.  This is the excess shift value, in addition to the
+        // difference in precision bits, to be used in the
        // computations below.  Further the (hidden) msb with value 1
        // in d must be involved as well.
        int expdelta = 0;
        long msb = 0x0000_0000_0000_0000L;
-        if (exp < -14) {
-            expdelta = -14 - exp; // FIXME?
-            exp = -15;
-            msb = 0x0010_0000_0000_0000L; // should be 0x0020_... ?
+        if (exp < MIN_EXPONENT) {
+            expdelta = MIN_EXPONENT - exp;
+            exp = MIN_EXPONENT - 1;
+            msb = 0x0010_0000_0000_0000L;
        }
        long f_signif_bits = doppel & 0x000f_ffff_ffff_ffffL | msb;

+        int PRECISION_DIFF = Double.PRECISION - PRECISION; // 42
        // Significand bits as if using rounding to zero (truncation).
-        short signif_bits = (short)(f_signif_bits >> (42 + expdelta));
+        short signif_bits = (short)(f_signif_bits >> (PRECISION_DIFF + expdelta));

        // For round to nearest even, determining whether or not to
        // round up (in magnitude) is a function of the least
@ -399,9 +409,9 @@ public final class Float16
        // 1    1     1
        // See "Computer Arithmetic Algorithms," Koren, Table 4.9

-        long lsb    = f_signif_bits & (1L << 42 + expdelta);
-        long round  = f_signif_bits & (1L << 41 + expdelta);
-        long sticky = f_signif_bits & ((1L << 41 + expdelta) - 1);
+        long lsb    = f_signif_bits &  (1L << (PRECISION_DIFF      + expdelta));
+        long round  = f_signif_bits &  (1L << (PRECISION_DIFF - 1) + expdelta);
+        long sticky = f_signif_bits & ((1L << (PRECISION_DIFF - 1) + expdelta) - 1);

        if (round != 0 && ((lsb | sticky) != 0 )) {
            signif_bits++;
@ -412,7 +422,9 @@ public final class Float16
        // to implement a carry out from rounding the significand.
        assert (0xf800 & signif_bits) == 0x0;

-        return new Float16((short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) ));
+        // Exponent bias adjust in the representation is equal to MAX_EXPONENT.
+        return new Float16((short)(sign_bit |
+                                   ( ((exp + MAX_EXPONENT) << (PRECISION - 1)) + signif_bits ) ));
    }

    /**