From 8cfd74f76afc9e5d50c52104fef9974784718dd4 Mon Sep 17 00:00:00 2001 From: Vladimir Kozlov Date: Thu, 9 Mar 2023 03:26:38 +0000 Subject: [PATCH] 8302976: C2 intrinsification of Float.floatToFloat16 and Float.float16ToFloat yields different result than the interpreter Reviewed-by: sviswanathan, jbhateja, vlivanov --- src/hotspot/cpu/aarch64/aarch64.ad | 6 +- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 6 +- .../cpu/aarch64/c1_LIRGenerator_aarch64.cpp | 23 +- .../cpu/aarch64/macroAssembler_aarch64.hpp | 9 +- .../templateInterpreterGenerator_aarch64.cpp | 45 ++ .../cpu/aarch64/vm_version_aarch64.hpp | 2 + .../arm/templateInterpreterGenerator_arm.cpp | 2 + .../ppc/templateInterpreterGenerator_ppc.cpp | 4 + src/hotspot/cpu/riscv/assembler_riscv.hpp | 4 - src/hotspot/cpu/riscv/globals_riscv.hpp | 1 - src/hotspot/cpu/riscv/riscv.ad | 42 -- .../templateInterpreterGenerator_riscv.cpp | 4 + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 3 - .../templateInterpreterGenerator_s390.cpp | 4 + src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 6 +- src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp | 12 +- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 19 +- src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 59 +++ src/hotspot/cpu/x86/stubGenerator_x86_64.hpp | 4 +- .../templateInterpreterGenerator_x86_32.cpp | 54 ++- .../templateInterpreterGenerator_x86_64.cpp | 56 ++- src/hotspot/cpu/x86/vm_version_x86.hpp | 7 +- src/hotspot/cpu/x86/x86.ad | 11 +- src/hotspot/share/c1/c1_Compiler.cpp | 6 +- src/hotspot/share/c1/c1_LIR.cpp | 6 +- src/hotspot/share/c1/c1_LIR.hpp | 6 +- src/hotspot/share/c1/c1_LIRAssembler.cpp | 4 +- src/hotspot/share/c1/c1_LIRGenerator.cpp | 4 + src/hotspot/share/c1/c1_LinearScan.cpp | 4 +- src/hotspot/share/classfile/vmIntrinsics.cpp | 4 + .../share/compiler/abstractCompiler.hpp | 4 +- .../share/interpreter/abstractInterpreter.cpp | 2 + .../share/interpreter/abstractInterpreter.hpp | 4 + .../templateInterpreterGenerator.cpp | 8 +- .../templateInterpreterGenerator.hpp | 3 + src/hotspot/share/opto/convertnode.cpp | 28 +- src/hotspot/share/opto/convertnode.hpp | 1 - .../share/runtime/abstract_vm_version.hpp | 2 + src/hotspot/share/runtime/sharedRuntime.cpp | 91 ---- src/hotspot/share/runtime/sharedRuntime.hpp | 2 - src/hotspot/share/runtime/stubRoutines.cpp | 3 + src/hotspot/share/runtime/stubRoutines.hpp | 21 + .../float16/Binary16Conversion.java | 436 ++++++++++++++++++ .../float16/Binary16ConversionNaN.java | 161 +++++++ .../float16/TestAllFloat16ToFloat.java | 97 ++++ .../float16/TestConstFloat16ToFloat.java | 166 +++++++ .../lang/Float/Binary16ConversionNaN.java | 3 +- 47 files changed, 1254 insertions(+), 195 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java create mode 100644 test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java create mode 100644 test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java create mode 100644 test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 63a2b8cb64d..b604b6af733 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -15043,8 +15043,7 @@ instruct convF2HF_reg_reg(iRegINoSp dst, vRegF src, vRegF tmp) %{ %} effect(TEMP tmp); ins_encode %{ - __ fcvtsh($tmp$$FloatRegister, $src$$FloatRegister); - __ smov($dst$$Register, $tmp$$FloatRegister, __ H, 0); + __ flt_to_flt16($dst$$Register, $src$$FloatRegister, $tmp$$FloatRegister); %} ins_pipe(pipe_slow); %} @@ -15056,8 +15055,7 @@ instruct convHF2F_reg_reg(vRegF dst, iRegINoSp src, vRegF tmp) %{ %} effect(TEMP tmp); ins_encode %{ - __ mov($tmp$$FloatRegister, __ H, 0, $src$$Register); - __ fcvths($dst$$FloatRegister, $tmp$$FloatRegister); + __ flt16_to_flt($dst$$FloatRegister, $src$$Register, $tmp$$FloatRegister); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index 1fe41080d1c..ef21b759360 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -1814,10 +1814,12 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); } -void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) { switch(code) { case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break; case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break; + case lir_f2hf: __ flt_to_flt16(dest->as_register(), value->as_float_reg(), tmp->as_float_reg()); break; + case lir_hf2f: __ flt16_to_flt(dest->as_float_reg(), value->as_register(), tmp->as_float_reg()); break; default : ShouldNotReachHere(); } } diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index a2815016a5b..2d12590028a 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -752,20 +752,35 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { switch (x->id()) { case vmIntrinsics::_dabs: case vmIntrinsics::_dsqrt: - case vmIntrinsics::_dsqrt_strict: { + case vmIntrinsics::_dsqrt_strict: + case vmIntrinsics::_floatToFloat16: + case vmIntrinsics::_float16ToFloat: { assert(x->number_of_arguments() == 1, "wrong type"); LIRItem value(x->argument_at(0), this); value.load_item(); + LIR_Opr src = value.result(); LIR_Opr dst = rlock_result(x); switch (x->id()) { case vmIntrinsics::_dsqrt: case vmIntrinsics::_dsqrt_strict: { - __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + __ sqrt(src, dst, LIR_OprFact::illegalOpr); break; } case vmIntrinsics::_dabs: { - __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + __ abs(src, dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_floatToFloat16: { + LIR_Opr tmp = new_register(T_FLOAT); + __ move(LIR_OprFact::floatConst(-0.0), tmp); + __ f2hf(src, dst, tmp); + break; + } + case vmIntrinsics::_float16ToFloat: { + LIR_Opr tmp = new_register(T_FLOAT); + __ move(LIR_OprFact::floatConst(-0.0), tmp); + __ hf2f(src, dst, tmp); break; } default: diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 5e6ec8f4cdd..c33efa338f0 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -513,8 +513,15 @@ public: orr(Vd, T, Vn, Vn); } + void flt_to_flt16(Register dst, FloatRegister src, FloatRegister tmp) { + fcvtsh(tmp, src); + smov(dst, tmp, H, 0); + } -public: + void flt16_to_flt(FloatRegister dst, Register src, FloatRegister tmp) { + mov(tmp, H, 0, src); + fcvths(dst, tmp); + } // Generalized Test Bit And Branch, including a "far" variety which // spans more than 32KiB. diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index 8b1081ad922..9c9a9d17ac4 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -300,6 +300,50 @@ void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpr __ blr(rscratch1); } +address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { + // vmIntrinsics checks InlineIntrinsics flag, no need to check it here. + if (!VM_Version::supports_float16() || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) { + return nullptr; + } + // r19_sender_sp: sender sp + // stack: + // [ arg ] <-- esp + // [ arg ] + // retaddr in lr + // result in v0 + + address entry_point = __ pc(); + __ ldrw(c_rarg0, Address(esp)); + __ flt16_to_flt(v0, c_rarg0, v1); + __ mov(sp, r19_sender_sp); // Restore caller's SP + __ br(lr); + return entry_point; +} + +address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { + // vmIntrinsics checks InlineIntrinsics flag, no need to check it here. + if (!VM_Version::supports_float16() || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) { + return nullptr; + } + // r19_sender_sp: sender sp + // stack: + // [ arg ] <-- esp + // [ arg ] + // retaddr in lr + // result in c_rarg0 + + address entry_point = __ pc(); + __ ldrs(v0, Address(esp)); + __ flt_to_flt16(c_rarg0, v0, v1); + __ mov(sp, r19_sender_sp); // Restore caller's SP + __ br(lr); + return entry_point; +} + // Abstract method entry // Attempt to execute abstract method. Throw exception address TemplateInterpreterGenerator::generate_abstract_entry(void) { @@ -1698,6 +1742,7 @@ address TemplateInterpreterGenerator::generate_currentThread() { return entry_point; } + //----------------------------------------------------------------------------- // Exceptions diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index db26893ff8d..a141127387e 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -170,6 +170,8 @@ enum Ampere_CPU_Model { static bool supports_on_spin_wait() { return _spin_wait.inst() != SpinWait::NONE; } + static bool supports_float16() { return true; } + #ifdef __APPLE__ // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64) static bool is_cpu_emulated(); diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp index 11531f4cb30..5542cc96532 100644 --- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp @@ -783,6 +783,8 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { address TemplateInterpreterGenerator::generate_CRC32_update_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; } address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; } +address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; } +address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; } // // Interpreter stub for calling a native method. (asm interpreter) diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp index 31f74b114f3..22488a3d23f 100644 --- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp @@ -1933,6 +1933,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract return NULL; } +// Not supported +address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; } +address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; } + // ============================================================================= // Exceptions diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 076a3831d9b..e39e6737685 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -798,8 +798,6 @@ enum operand_size { int8, int16, int32, uint32, int64 }; INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101); INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000); INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001); - INSN(fcvt_s_h, 0b1010011, 0b00010, 0b0100000); - INSN(fcvt_h_s, 0b1010011, 0b00000, 0b0100010); #undef INSN // Immediate Instruction @@ -1056,7 +1054,6 @@ enum operand_size { int8, int16, int32, uint32, int64 }; INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000); INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001); - INSN(fmv_h_x, 0b1010011, 0b000, 0b00000, 0b1111010); #undef INSN @@ -1077,7 +1074,6 @@ enum operand_size { int8, int16, int32, uint32, int64 }; INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001); INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000); INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001); - INSN(fmv_x_h, 0b1010011, 0b000, 0b00000, 0b1110010); #undef INSN diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp index 654db3b2d26..ab9e6a94ba1 100644 --- a/src/hotspot/cpu/riscv/globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -103,7 +103,6 @@ define_pd_global(intx, InlineSmallCode, 1000); product(bool, UseZba, false, EXPERIMENTAL, "Use Zba instructions") \ product(bool, UseZbb, false, EXPERIMENTAL, "Use Zbb instructions") \ product(bool, UseZbs, false, EXPERIMENTAL, "Use Zbs instructions") \ - product(bool, UseZfhmin, false, EXPERIMENTAL, "Use Zfhmin instructions") \ product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \ product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \ product(bool, UseZicbop, false, EXPERIMENTAL, "Use Zicbop instructions") \ diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 301dc01f689..73560437164 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1845,10 +1845,6 @@ const bool Matcher::match_rule_supported(int opcode) { case Op_CountTrailingZerosI: case Op_CountTrailingZerosL: return UseZbb; - - case Op_ConvF2HF: - case Op_ConvHF2F: - return UseZfhmin; } return true; // Per default match rules are supported. @@ -8180,44 +8176,6 @@ instruct convL2F_reg_reg(fRegF dst, iRegL src) %{ ins_pipe(fp_l2f); %} -// float <-> half float - -instruct convHF2F_reg_reg(fRegF dst, iRegINoSp src, fRegF tmp) %{ - predicate(UseZfhmin); - match(Set dst (ConvHF2F src)); - effect(TEMP tmp); - - ins_cost(XFER_COST); - format %{ "fmv.h.x $tmp, $src\t#@convHF2F_reg_reg\n\t" - "fcvt.s.h $dst, $tmp\t#@convHF2F_reg_reg" - %} - - ins_encode %{ - __ fmv_h_x($tmp$$FloatRegister, $src$$Register); - __ fcvt_s_h($dst$$FloatRegister, $tmp$$FloatRegister); - %} - - ins_pipe(fp_i2f); -%} - -instruct convF2HF_reg_reg(iRegINoSp dst, fRegF src, fRegF tmp) %{ - predicate(UseZfhmin); - match(Set dst (ConvF2HF src)); - effect(TEMP tmp); - - ins_cost(XFER_COST); - format %{ "fcvt.h.s $tmp, $src\t#@convF2HF_reg_reg\n\t" - "fmv.x.h $dst, $tmp\t#@convF2HF_reg_reg" - %} - - ins_encode %{ - __ fcvt_h_s($tmp$$FloatRegister, $src$$FloatRegister); - __ fmv_x_h($dst$$Register, $tmp$$FloatRegister); - %} - - ins_pipe(fp_f2i); -%} - // double <-> int instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{ diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp index 0ce70328e35..4ed383fdf5a 100644 --- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -301,6 +301,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M return entry_point; } +// Not supported +address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; } +address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; } + // Abstract method entry // Attempt to execute abstract method. Throw exception address TemplateInterpreterGenerator::generate_abstract_entry(void) { diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index 26a13913e70..fc8f35872de 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -76,9 +76,6 @@ void VM_Version::initialize() { if (FLAG_IS_DEFAULT(UseZicboz)) { FLAG_SET_DEFAULT(UseZicboz, true); } - if (FLAG_IS_DEFAULT(UseZfhmin)) { - FLAG_SET_DEFAULT(UseZfhmin, true); - } if (FLAG_IS_DEFAULT(UseZihintpause)) { FLAG_SET_DEFAULT(UseZihintpause, true); } diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp index 707814b1da1..bd6145941a2 100644 --- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp @@ -2003,6 +2003,10 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract return NULL; } +// Not supported +address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; } +address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; } + void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { // Quick & dirty stack overflow checking: bang the stack & handle trap. // Note that we do the banging after the frame is setup, since the exception diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 6e130e0d29d..24b0256b1e4 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2454,6 +2454,10 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_ default : ShouldNotReachHere(); } #endif // !_LP64 + } else if (code == lir_f2hf) { + __ flt_to_flt16(dest->as_register(), value->as_xmm_float_reg(), tmp->as_xmm_float_reg()); + } else if (code == lir_hf2f) { + __ flt16_to_flt(dest->as_xmm_float_reg(), value->as_register()); } else { Unimplemented(); } diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 48461054ac6..fd85427afd5 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -832,6 +832,10 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { __ move(LIR_OprFact::doubleConst(-0.0), tmp); } #endif + if (x->id() == vmIntrinsics::_floatToFloat16) { + tmp = new_register(T_FLOAT); + __ move(LIR_OprFact::floatConst(-0.0), tmp); + } switch(x->id()) { case vmIntrinsics::_dabs: @@ -841,6 +845,12 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { case vmIntrinsics::_dsqrt_strict: __ sqrt(calc_input, calc_result, LIR_OprFact::illegalOpr); break; + case vmIntrinsics::_floatToFloat16: + __ f2hf(calc_input, calc_result, tmp); + break; + case vmIntrinsics::_float16ToFloat: + __ hf2f(calc_input, calc_result, LIR_OprFact::illegalOpr); + break; default: ShouldNotReachHere(); } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 12a2aaf1959..62eb254e212 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -162,6 +162,11 @@ class MacroAssembler: public Assembler { void incrementq(Register reg, int value = 1); void incrementq(Address dst, int value = 1); + void incrementl(AddressLiteral dst, Register rscratch = noreg); + void incrementl(ArrayAddress dst, Register rscratch); + + void incrementq(AddressLiteral dst, Register rscratch = noreg); + // Support optimal SSE move instructions. void movflt(XMMRegister dst, XMMRegister src) { if (dst-> encoding() == src->encoding()) return; @@ -189,10 +194,18 @@ class MacroAssembler: public Assembler { } void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } - void incrementl(AddressLiteral dst, Register rscratch = noreg); - void incrementl(ArrayAddress dst, Register rscratch); + void flt_to_flt16(Register dst, XMMRegister src, XMMRegister tmp) { + // Use separate tmp XMM register because caller may + // requires src XMM register to be unchanged (as in x86.ad). + vcvtps2ph(tmp, src, 0x04, Assembler::AVX_128bit); + movdl(dst, tmp); + movswl(dst, dst); + } - void incrementq(AddressLiteral dst, Register rscratch = noreg); + void flt16_to_flt(XMMRegister dst, Register src) { + movdl(dst, src); + vcvtph2ps(dst, dst, Assembler::AVX_128bit); + } // Alignment void align32(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 6f4868f1592..4ada2d43810 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3518,6 +3518,55 @@ void StubGenerator::generate_libm_stubs() { } } +/** +* Arguments: +* +* Input: +* c_rarg0 - float16 jshort +* +* Output: +* xmm0 - float +*/ +address StubGenerator::generate_float16ToFloat() { + StubCodeMark mark(this, "StubRoutines", "float16ToFloat"); + + address start = __ pc(); + + BLOCK_COMMENT("Entry:"); + // No need for RuntimeStub frame since it is called only during JIT compilation + + // Load value into xmm0 and convert + __ flt16_to_flt(xmm0, c_rarg0); + + __ ret(0); + + return start; +} + +/** +* Arguments: +* +* Input: +* xmm0 - float +* +* Output: +* rax - float16 jshort +*/ +address StubGenerator::generate_floatToFloat16() { + StubCodeMark mark(this, "StubRoutines", "floatToFloat16"); + + address start = __ pc(); + + BLOCK_COMMENT("Entry:"); + // No need for RuntimeStub frame since it is called only during JIT compilation + + // Convert and put result into rax + __ flt_to_flt16(rax, xmm0, xmm1); + + __ ret(0); + + return start; +} address StubGenerator::generate_cont_thaw(const char* label, Continuation::thaw_kind kind) { if (!Continuations::enabled()) return nullptr; @@ -3883,6 +3932,16 @@ void StubGenerator::generate_initial() { StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); } + if (VM_Version::supports_float16()) { + // For results consistency both intrinsics should be enabled. + // vmIntrinsics checks InlineIntrinsics flag, no need to check it here. + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_float16ToFloat) && + vmIntrinsics::is_intrinsic_available(vmIntrinsics::_floatToFloat16)) { + StubRoutines::_hf2f = generate_float16ToFloat(); + StubRoutines::_f2hf = generate_floatToFloat16(); + } + } + generate_libm_stubs(); } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index 71fec0a65d9..f91e26e8997 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -474,6 +474,8 @@ class StubGenerator: public StubCodeGenerator { address generate_bigIntegerRightShift(); address generate_bigIntegerLeftShift(); + address generate_float16ToFloat(); + address generate_floatToFloat16(); // Libm trigonometric stubs diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp index b8b4efda768..50b3d1f707e 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -314,6 +314,58 @@ address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry( return NULL; } +/** + * Method entry for static method: + * java.lang.Float.float16ToFloat(short floatBinary16) + */ +address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { + // vmIntrinsics checks InlineIntrinsics flag, no need to check it here. + if (!VM_Version::supports_float16() || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) { + return nullptr; // Generate a vanilla entry + } + address entry = __ pc(); + + // rsi: the sender's SP + + // Load value into xmm0 and convert + __ movswl(rax, Address(rsp, wordSize)); + __ flt16_to_flt(xmm0, rax); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; +} + +/** + * Method entry for static method: + * java.lang.Float.floatToFloat16(float value) + */ +address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { + // vmIntrinsics checks InlineIntrinsics flag, no need to check it here. + if (!VM_Version::supports_float16() || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16) || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat)) { + return nullptr; // Generate a vanilla entry + } + address entry = __ pc(); + + // rsi: the sender's SP + + // Load value into xmm0, convert and put result into rax + __ movflt(xmm0, Address(rsp, wordSize)); + __ flt_to_flt16(rax, xmm0, xmm1); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; +} + address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { // rbx,: Method* diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp index e5355e19be8..d514bf54de8 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -332,6 +332,60 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract return NULL; } +/** + * Method entry for static method: + * java.lang.Float.float16ToFloat(short floatBinary16) + */ +address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { + // vmIntrinsics checks InlineIntrinsics flag, no need to check it here. + if (!VM_Version::supports_float16() || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat) || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16)) { + return nullptr; // Generate a vanilla entry + } + address entry = __ pc(); + + // r13: the sender's SP + + // Load value into xmm0 and convert + __ movswl(rax, Address(rsp, wordSize)); + __ flt16_to_flt(xmm0, rax); + + // Return result in xmm0 + __ pop(rdi); // get return address + __ mov(rsp, r13); // set rsp to sender's SP + __ jmp(rdi); + + return entry; +} + +/** + * Method entry for static method: + * java.lang.Float.floatToFloat16(float value) + */ +address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { + // vmIntrinsics checks InlineIntrinsics flag, no need to check it here. + if (!VM_Version::supports_float16() || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_floatToFloat16) || + vmIntrinsics::is_disabled_by_flags(vmIntrinsics::_float16ToFloat)) { + return nullptr; // Generate a vanilla entry + } + address entry = __ pc(); + + // r13: the sender's SP + + // Load value into xmm0, convert and put result into rax + __ movflt(xmm0, Address(rsp, wordSize)); + __ flt_to_flt16(rax, xmm0, xmm1); + + // Return result in rax + __ pop(rdi); // get return address + __ mov(rsp, r13); // set rsp to sender's SP + __ jmp(rdi); + + return entry; +} + // // Various method entries // diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index 9213d42bc57..226c0848e73 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -759,6 +759,11 @@ public: return true; } + // For AVX CPUs only. f16c support is disabled if UseAVX == 0. + static bool supports_float16() { + return supports_f16c() || supports_avx512vl(); + } + // there are several insns to force cache line sync to memory which // we can use to ensure mapped non-volatile memory is up to date with // pending in-cache changes. diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 80f0c0f4b49..999cd15f0cf 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -1683,7 +1683,7 @@ const bool Matcher::match_rule_supported(int opcode) { break; case Op_ConvF2HF: case Op_ConvHF2F: - if (!VM_Version::supports_f16c() && !VM_Version::supports_avx512vl()) { + if (!VM_Version::supports_float16()) { return false; } break; @@ -3665,9 +3665,7 @@ instruct convF2HF_reg_reg(rRegI dst, regF src, regF tmp) %{ ins_cost(125); format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} ins_encode %{ - __ vcvtps2ph($tmp$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); - __ movdl($dst$$Register, $tmp$$XMMRegister); - __ movswl($dst$$Register, $dst$$Register); + __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); %} ins_pipe( pipe_slow ); %} @@ -3709,8 +3707,7 @@ instruct convHF2F_reg_reg(regF dst, rRegI src) %{ match(Set dst (ConvHF2F src)); format %{ "vcvtph2ps $dst,$src" %} ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ vcvtph2ps($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); + __ flt16_to_flt($dst$$XMMRegister, $src$$Register); %} ins_pipe( pipe_slow ); %} diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp index dd5ab9c3cc0..ba20e83c330 100644 --- a/src/hotspot/share/c1/c1_Compiler.cpp +++ b/src/hotspot/share/c1/c1_Compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -135,6 +135,10 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) { case vmIntrinsics::_onSpinWait: if (!VM_Version::supports_on_spin_wait()) return false; break; + case vmIntrinsics::_floatToFloat16: + case vmIntrinsics::_float16ToFloat: + if (!VM_Version::supports_float16()) return false; + break; case vmIntrinsics::_arraycopy: case vmIntrinsics::_currentTimeMillis: case vmIntrinsics::_nanoTime: diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 5b903192c9e..a553af52a80 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -563,6 +563,8 @@ void LIR_OpVisitState::visit(LIR_Op* op) { case lir_sqrt: case lir_abs: case lir_neg: + case lir_f2hf: + case lir_hf2f: case lir_logic_and: case lir_logic_or: case lir_logic_xor: @@ -1731,6 +1733,8 @@ const char * LIR_Op::name() const { case lir_abs: s = "abs"; break; case lir_neg: s = "neg"; break; case lir_sqrt: s = "sqrt"; break; + case lir_f2hf: s = "f2hf"; break; + case lir_hf2f: s = "hf2f"; break; case lir_logic_and: s = "logic_and"; break; case lir_logic_or: s = "logic_or"; break; case lir_logic_xor: s = "logic_xor"; break; diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index c50ca261da3..a0987a23feb 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -958,6 +958,8 @@ enum LIR_Code { , lir_abs , lir_neg , lir_tan + , lir_f2hf + , lir_hf2f , lir_log10 , lir_logic_and , lir_logic_or @@ -2272,6 +2274,8 @@ class LIR_List: public CompilationResourceObj { void fmaf(LIR_Opr from, LIR_Opr from1, LIR_Opr from2, LIR_Opr to) { append(new LIR_Op3(lir_fmaf, from, from1, from2, to)); } void log10 (LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_log10, from, LIR_OprFact::illegalOpr, to, tmp)); } void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); } + void f2hf(LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_f2hf, from, tmp, to)); } + void hf2f(LIR_Opr from, LIR_Opr to, LIR_Opr tmp) { append(new LIR_Op2(lir_hf2f, from, tmp, to)); } void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); } void sub (LIR_Opr left, LIR_Opr right, LIR_Opr res, CodeEmitInfo* info = NULL) { append(new LIR_Op2(lir_sub, left, right, res, info)); } diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 7b71c8ddb09..56c80aca3da 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -726,6 +726,8 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { case lir_sqrt: case lir_tan: case lir_log10: + case lir_f2hf: + case lir_hf2f: intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); break; diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 23ef4178de7..41f93645581 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -2965,6 +2965,10 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) { case vmIntrinsics::_fmaD: do_FmaIntrinsic(x); break; case vmIntrinsics::_fmaF: do_FmaIntrinsic(x); break; + // Use java.lang.Math intrinsics code since it works for these intrinsics too. + case vmIntrinsics::_floatToFloat16: // fall through + case vmIntrinsics::_float16ToFloat: do_MathIntrinsic(x); break; + case vmIntrinsics::_Preconditions_checkIndex: do_PreconditionsCheckIndex(x, T_INT); break; diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp index c2dc5789c9a..5a37e5243e2 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -6731,6 +6731,8 @@ void LinearScanStatistic::collect(LinearScan* allocator) { case lir_rem: case lir_sqrt: case lir_abs: + case lir_f2hf: + case lir_hf2f: case lir_log10: case lir_logic_and: case lir_logic_or: diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp index 82bdcacd8e4..336c2f95aa0 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.cpp +++ b/src/hotspot/share/classfile/vmIntrinsics.cpp @@ -309,6 +309,10 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) { case vmIntrinsics::_fmaF: if (!InlineMathNatives || !UseFMA) return true; break; + case vmIntrinsics::_floatToFloat16: + case vmIntrinsics::_float16ToFloat: + if (!InlineIntrinsics) return true; + break; case vmIntrinsics::_arraycopy: if (!InlineArrayCopy) return true; break; diff --git a/src/hotspot/share/compiler/abstractCompiler.hpp b/src/hotspot/share/compiler/abstractCompiler.hpp index c2f9c6cb84b..29151b063de 100644 --- a/src/hotspot/share/compiler/abstractCompiler.hpp +++ b/src/hotspot/share/compiler/abstractCompiler.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -125,7 +125,7 @@ class AbstractCompiler : public CHeapObj { // GraphBuilder::GraphBuilder() in src/share/vm/c1/c1_GraphBuilder.cpp // for more details. - virtual bool is_intrinsic_available(const methodHandle& method, DirectiveSet* directive) { + bool is_intrinsic_available(const methodHandle& method, DirectiveSet* directive) { return is_intrinsic_supported(method) && !directive->is_intrinsic_disabled(method) && !vmIntrinsics::is_disabled_by_flags(method); diff --git a/src/hotspot/share/interpreter/abstractInterpreter.cpp b/src/hotspot/share/interpreter/abstractInterpreter.cpp index 8d6ef06754c..de2b3156e87 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.cpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.cpp @@ -133,6 +133,8 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(const methodHan case vmIntrinsics::_floatToRawIntBits: return java_lang_Float_floatToRawIntBits; case vmIntrinsics::_longBitsToDouble: return java_lang_Double_longBitsToDouble; case vmIntrinsics::_doubleToRawLongBits: return java_lang_Double_doubleToRawLongBits; + case vmIntrinsics::_float16ToFloat: return java_lang_Float_float16ToFloat; + case vmIntrinsics::_floatToFloat16: return java_lang_Float_floatToFloat16; #if defined(AMD64) || defined(AARCH64) || defined(RISCV64) case vmIntrinsics::_currentThread: return java_lang_Thread_currentThread; #endif diff --git a/src/hotspot/share/interpreter/abstractInterpreter.hpp b/src/hotspot/share/interpreter/abstractInterpreter.hpp index 7b839a38ee9..16878695d04 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.hpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.hpp @@ -88,6 +88,8 @@ class AbstractInterpreter: AllStatic { java_util_zip_CRC32C_updateDirectByteBuffer, // implementation of java.util.zip.CRC32C.updateDirectByteBuffer(crc, address, off, end) java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat() java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits() + java_lang_Float_float16ToFloat, // implementation of java.lang.Float.float16ToFloat() + java_lang_Float_floatToFloat16, // implementation of java.lang.Float.floatToFloat16() java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble() java_lang_Double_doubleToRawLongBits, // implementation of java.lang.Double.doubleToRawLongBits() java_lang_Thread_currentThread, // implementation of java.lang.Thread.currentThread() @@ -157,6 +159,8 @@ class AbstractInterpreter: AllStatic { case vmIntrinsics::_dexp : // fall thru case vmIntrinsics::_fmaD : // fall thru case vmIntrinsics::_fmaF : // fall thru + case vmIntrinsics::_floatToFloat16 : // fall thru + case vmIntrinsics::_float16ToFloat : // fall thru case vmIntrinsics::_Continuation_doYield : // fall thru return false; diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp index 499cff7237d..088c0086220 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp @@ -223,6 +223,9 @@ void TemplateInterpreterGenerator::generate_all() { method_entry(java_lang_Double_longBitsToDouble); method_entry(java_lang_Double_doubleToRawLongBits); + method_entry(java_lang_Float_float16ToFloat); + method_entry(java_lang_Float_floatToFloat16); + #undef method_entry // Bytecodes @@ -437,7 +440,10 @@ address TemplateInterpreterGenerator::generate_method_entry( case Interpreter::java_lang_Thread_currentThread : entry_point = generate_currentThread(); break; #endif - + case Interpreter::java_lang_Float_float16ToFloat + : entry_point = generate_Float_float16ToFloat_entry(); break; + case Interpreter::java_lang_Float_floatToFloat16 + : entry_point = generate_Float_floatToFloat16_entry(); break; #ifdef IA32 // On x86_32 platforms, a special entry is generated for the following four methods. // On other platforms the normal entry is used to enter these methods. diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp index 850745f9f4f..726e0495650 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp @@ -103,6 +103,9 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { address generate_Double_longBitsToDouble_entry(); address generate_Double_doubleToRawLongBits_entry(); #endif // IA32 + address generate_Float_float16ToFloat_entry(); + address generate_Float_floatToFloat16_entry(); + // Some platforms don't need registers, other need two. Unused function is // left unimplemented. void generate_stack_overflow_check(void); diff --git a/src/hotspot/share/opto/convertnode.cpp b/src/hotspot/share/opto/convertnode.cpp index cc30b60ebf4..38c539ee3ab 100644 --- a/src/hotspot/share/opto/convertnode.cpp +++ b/src/hotspot/share/opto/convertnode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,7 +29,7 @@ #include "opto/matcher.hpp" #include "opto/phaseX.hpp" #include "opto/subnode.hpp" -#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" //============================================================================= //------------------------------Identity--------------------------------------- @@ -165,15 +165,12 @@ const Type* ConvF2DNode::Value(PhaseGVN* phase) const { //------------------------------Value------------------------------------------ const Type* ConvF2HFNode::Value(PhaseGVN* phase) const { const Type *t = phase->type( in(1) ); - if( t == Type::TOP ) return Type::TOP; - if( t == Type::FLOAT ) return TypeInt::SHORT; - const TypeF *tf = t->is_float_constant(); - return TypeInt::make( SharedRuntime::f2hf( tf->getf() ) ); -} + if (t == Type::TOP) return Type::TOP; + if (t == Type::FLOAT) return TypeInt::SHORT; + if (StubRoutines::f2hf_adr() == nullptr) return bottom_type(); -//------------------------------Identity--------------------------------------- -Node* ConvF2HFNode::Identity(PhaseGVN* phase) { - return (in(1)->Opcode() == Op_ConvHF2F) ? in(1)->in(1) : this; + const TypeF *tf = t->is_float_constant(); + return TypeInt::make( StubRoutines::f2hf(tf->getf()) ); } //============================================================================= @@ -238,11 +235,14 @@ Node *ConvF2LNode::Ideal(PhaseGVN *phase, bool can_reshape) { //------------------------------Value------------------------------------------ const Type* ConvHF2FNode::Value(PhaseGVN* phase) const { const Type *t = phase->type( in(1) ); - if( t == Type::TOP ) return Type::TOP; - if( t == TypeInt::SHORT ) return Type::FLOAT; - const TypeInt *ti = t->is_int(); - if ( ti->is_con() ) return TypeF::make( SharedRuntime::hf2f( ti->get_con() ) ); + if (t == Type::TOP) return Type::TOP; + if (t == TypeInt::SHORT) return Type::FLOAT; + if (StubRoutines::hf2f_adr() == nullptr) return bottom_type(); + const TypeInt *ti = t->is_int(); + if (ti->is_con()) { + return TypeF::make( StubRoutines::hf2f(ti->get_con()) ); + } return bottom_type(); } diff --git a/src/hotspot/share/opto/convertnode.hpp b/src/hotspot/share/opto/convertnode.hpp index 35254df6e18..e58213fbc09 100644 --- a/src/hotspot/share/opto/convertnode.hpp +++ b/src/hotspot/share/opto/convertnode.hpp @@ -108,7 +108,6 @@ class ConvF2HFNode : public Node { virtual int Opcode() const; virtual const Type *bottom_type() const { return TypeInt::SHORT; } virtual const Type* Value(PhaseGVN* phase) const; - virtual Node* Identity(PhaseGVN* phase); virtual uint ideal_reg() const { return Op_RegI; } }; diff --git a/src/hotspot/share/runtime/abstract_vm_version.hpp b/src/hotspot/share/runtime/abstract_vm_version.hpp index e34eb9c0446..a192d41b739 100644 --- a/src/hotspot/share/runtime/abstract_vm_version.hpp +++ b/src/hotspot/share/runtime/abstract_vm_version.hpp @@ -179,6 +179,8 @@ class Abstract_VM_Version: AllStatic { // Does platform support stack watermark barriers for concurrent stack processing? constexpr static bool supports_stack_watermark_barrier() { return false; } + // Does platform support float16 instructions? + static bool supports_float16() { return false; } static bool print_matching_lines_from_file(const char* filename, outputStream* st, const char* keywords_to_match[]); protected: diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index c563376084b..3c6e5795102 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -445,97 +445,6 @@ JRT_LEAF(jdouble, SharedRuntime::l2d(jlong x)) return (jdouble)x; JRT_END -// Reference implementation at src/java.base/share/classes/java/lang/Float.java:floatToFloat16 -JRT_LEAF(jshort, SharedRuntime::f2hf(jfloat x)) - union {jfloat f; jint i;} bits; - bits.f = x; - jint doppel = bits.i; - jshort sign_bit = (jshort) ((doppel & 0x80000000) >> 16); - if (g_isnan(x)) - return (jshort)(sign_bit | 0x7c00 | (doppel & 0x007fe000) >> 13 | (doppel & 0x00001ff0) >> 4 | (doppel & 0x0000000f)); - - jfloat abs_f = (x >= 0.0f) ? x : (x * -1.0f); - - // Overflow threshold is halffloat max value + 1/2 ulp - if (abs_f >= (65504.0f + 16.0f)) { - return (jshort)(sign_bit | 0x7c00); // Positive or negative infinity - } - - // Smallest magnitude of Halffloat is 0x1.0p-24, half-way or smaller rounds to zero - if (abs_f <= (pow(2, -24) * 0.5f)) { // Covers float zeros and subnormals. - return sign_bit; // Positive or negative zero - } - - jint exp = ((0x7f800000 & doppel) >> (24 - 1)) - 127; - - // For binary16 subnormals, beside forcing exp to -15, retain - // the difference exp_delta = E_min - exp. This is the excess - // shift value, in addition to 13, to be used in the - // computations below. Further the (hidden) msb with value 1 - // in f must be involved as well - jint exp_delta = 0; - jint msb = 0x00000000; - if (exp < -14) { - exp_delta = -14 - exp; - exp = -15; - msb = 0x00800000; - } - jint f_signif_bits = ((doppel & 0x007fffff) | msb); - - // Significand bits as if using rounding to zero - jshort signif_bits = (jshort)(f_signif_bits >> (13 + exp_delta)); - - jint lsb = f_signif_bits & (1 << (13 + exp_delta)); - jint round = f_signif_bits & (1 << (12 + exp_delta)); - jint sticky = f_signif_bits & ((1 << (12 + exp_delta)) - 1); - - if (round != 0 && ((lsb | sticky) != 0 )) { - signif_bits++; - } - - return (jshort)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) ); -JRT_END - -// Reference implementation at src/java.base/share/classes/java/lang/Float.java:float16ToFloat -JRT_LEAF(jfloat, SharedRuntime::hf2f(jshort x)) - // Halffloat format has 1 signbit, 5 exponent bits and - // 10 significand bits - union {jfloat f; jint i;} bits; - jint hf_arg = (jint)x; - jint hf_sign_bit = 0x8000 & hf_arg; - jint hf_exp_bits = 0x7c00 & hf_arg; - jint hf_significand_bits = 0x03ff & hf_arg; - - jint significand_shift = 13; //difference between float and halffloat precision - - jfloat sign = (hf_sign_bit != 0) ? -1.0f : 1.0f; - - // Extract halffloat exponent, remove its bias - jint hf_exp = (hf_exp_bits >> 10) - 15; - - if (hf_exp == -15) { - // For subnormal values, return 2^-24 * significand bits - return (sign * (pow(2,-24)) * hf_significand_bits); - } else if (hf_exp == 16) { - if (hf_significand_bits == 0) { - bits.i = 0x7f800000; - return sign * bits.f; - } else { - bits.i = (hf_sign_bit << 16) | 0x7f800000 | - (hf_significand_bits << significand_shift); - return bits.f; - } - } - - // Add the bias of float exponent and shift - jint float_exp_bits = (hf_exp + 127) << (24 - 1); - - // Combine sign, exponent and significand bits - bits.i = (hf_sign_bit << 16) | float_exp_bits | - (hf_significand_bits << significand_shift); - - return bits.f; -JRT_END // Exception handling across interpreter/compiler boundaries // diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp index 83685abefa0..31871b9f087 100644 --- a/src/hotspot/share/runtime/sharedRuntime.hpp +++ b/src/hotspot/share/runtime/sharedRuntime.hpp @@ -128,8 +128,6 @@ class SharedRuntime: AllStatic { static jfloat d2f (jdouble x); static jfloat l2f (jlong x); static jdouble l2d (jlong x); - static jfloat hf2f(jshort x); - static jshort f2hf(jfloat x); static jfloat i2f (jint x); #ifdef __SOFTFP__ diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index b582260a844..19f5b37f9da 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -168,6 +168,9 @@ address StubRoutines::_dlibm_reduce_pi04l = nullptr; address StubRoutines::_dlibm_tan_cot_huge = nullptr; address StubRoutines::_dtan = nullptr; +address StubRoutines::_f2hf = nullptr; +address StubRoutines::_hf2f = nullptr; + address StubRoutines::_vector_f_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}}; address StubRoutines::_vector_d_math[VectorSupport::NUM_VEC_SIZES][VectorSupport::NUM_SVML_OP] = {{nullptr}, {nullptr}}; diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp index 428f6641e9e..6bc4543a249 100644 --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -249,6 +249,9 @@ class StubRoutines: AllStatic { static address _dlibm_tan_cot_huge; static address _dtan; + static address _f2hf; + static address _hf2f; + static address _cont_thaw; static address _cont_returnBarrier; static address _cont_returnBarrierExc; @@ -424,6 +427,24 @@ class StubRoutines: AllStatic { static address dlibm_tan_cot_huge() { return _dlibm_tan_cot_huge; } static address dtan() { return _dtan; } + // These are versions of the java.lang.Float::floatToFloat16() and float16ToFloat() + // methods which perform the same operations as the intrinsic version. + // They are used for constant folding in JIT compiler to ensure equivalence. + // + static address f2hf_adr() { return _f2hf; } + static address hf2f_adr() { return _hf2f; } + + static jshort f2hf(jfloat x) { + assert(_f2hf != nullptr, "stub is not implemented on this platform"); + typedef jshort (*f2hf_stub_t)(jfloat x); + return ((f2hf_stub_t)_f2hf)(x); + } + static jfloat hf2f(jshort x) { + assert(_hf2f != nullptr, "stub is not implemented on this platform"); + typedef jfloat (*hf2f_stub_t)(jshort x); + return ((hf2f_stub_t)_hf2f)(x); + } + static address cont_thaw() { return _cont_thaw; } static address cont_returnBarrier() { return _cont_returnBarrier; } static address cont_returnBarrierExc(){return _cont_returnBarrierExc; } diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java new file mode 100644 index 00000000000..eeba614de4f --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16Conversion.java @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8289551 8302976 + * @summary Verify conversion between float and the binary16 format + * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64" + * @requires vm.compiler1.enabled & vm.compiler2.enabled + * @requires vm.compMode != "Xcomp" + * @comment default run + * @run main Binary16Conversion + * @comment C1 JIT compilation only: + * @run main/othervm -Xcomp -XX:TieredStopAtLevel=1 -XX:CompileCommand=compileonly,Binary16Conversion::test* Binary16Conversion + * @comment C2 JIT compilation only: + * @run main/othervm -Xcomp -XX:-TieredCompilation -XX:CompileCommand=compileonly,Binary16Conversion::test* Binary16Conversion + */ + +public class Binary16Conversion { + + public static final int FLOAT_SIGNIFICAND_WIDTH = 24; + + public static void main(String... argv) { + System.out.println("Start ..."); + short s = Float.floatToFloat16(0.0f); // Load Float class + + int errors = 0; + errors += testBinary16RoundTrip(); + // Note that helper methods do sign-symmetric testing + errors += testBinary16CardinalValues(); + errors += testRoundFloatToBinary16(); + errors += testRoundFloatToBinary16HalfWayCases(); + errors += testRoundFloatToBinary16FullBinade(); + errors += testAlternativeImplementation(); + + if (errors > 0) + throw new RuntimeException(errors + " errors"); + } + + /* + * Put all 16-bit values through a conversion loop and make sure + * the values are preserved (NaN bit patterns notwithstanding). + */ + private static int testBinary16RoundTrip() { + int errors = 0; + for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { + short s = (short)i; + float f = Float.float16ToFloat(s); + short s2 = Float.floatToFloat16(f); + + if (!Binary16.equivalent(s, s2)) { + errors++; + System.out.println("Roundtrip failure on " + + Integer.toHexString(0xFFFF & (int)s) + + "\t got back " + Integer.toHexString(0xFFFF & (int)s2)); + } + } + return errors; + } + + private static int testBinary16CardinalValues() { + int errors = 0; + // Encode short value for different binary16 cardinal values as an + // integer-valued float. + float[][] testCases = { + {Binary16.POSITIVE_ZERO, +0.0f}, + {Binary16.MIN_VALUE, 0x1.0p-24f}, + {Binary16.MAX_SUBNORMAL, 0x1.ff8p-15f}, + {Binary16.MIN_NORMAL, 0x1.0p-14f}, + {Binary16.ONE, 1.0f}, + {Binary16.MAX_VALUE, 65504.0f}, + {Binary16.POSITIVE_INFINITY, Float.POSITIVE_INFINITY}, + }; + + // Check conversions in both directions + + // short -> float + for (var testCase : testCases) { + errors += compareAndReportError((short)testCase[0], + testCase[1]); + } + + // float -> short + for (var testCase : testCases) { + errors += compareAndReportError(testCase[1], + (short)testCase[0]); + } + + return errors; + } + + private static int testRoundFloatToBinary16() { + int errors = 0; + + float[][] testCases = { + // Test all combinations of LSB, round, and sticky bit + + // LSB = 0, test combination of round and sticky + {0x1.ff8000p-1f, (short)0x3bfe}, // round = 0, sticky = 0 + {0x1.ff8010p-1f, (short)0x3bfe}, // round = 0, sticky = 1 + {0x1.ffa000p-1f, (short)0x3bfe}, // round = 1, sticky = 0 + {0x1.ffa010p-1f, (short)0x3bff}, // round = 1, sticky = 1 => ++ + + // LSB = 1, test combination of round and sticky + {0x1.ffc000p-1f, Binary16.ONE-1}, // round = 0, sticky = 0 + {0x1.ffc010p-1f, Binary16.ONE-1}, // round = 0, sticky = 1 + {0x1.ffe000p-1f, Binary16.ONE}, // round = 1, sticky = 0 => ++ + {0x1.ffe010p-1f, Binary16.ONE}, // round = 1, sticky = 1 => ++ + + // Test subnormal rounding + // Largest subnormal binary16 0x03ff => 0x1.ff8p-15f; LSB = 1 + {0x1.ff8000p-15f, Binary16.MAX_SUBNORMAL}, // round = 0, sticky = 0 + {0x1.ff8010p-15f, Binary16.MAX_SUBNORMAL}, // round = 0, sticky = 1 + {0x1.ffc000p-15f, Binary16.MIN_NORMAL}, // round = 1, sticky = 0 => ++ + {0x1.ffc010p-15f, Binary16.MIN_NORMAL}, // round = 1, sticky = 1 => ++ + + // Test rounding near binary16 MIN_VALUE + // Smallest in magnitude subnormal binary16 value 0x0001 => 0x1.0p-24f + // Half-way case,0x1.0p-25f, and smaller should round down to zero + {0x1.fffffep-26f, Binary16.POSITIVE_ZERO}, // nextDown in float + {0x1.000000p-25f, Binary16.POSITIVE_ZERO}, + {0x1.000002p-25f, Binary16.MIN_VALUE}, // nextUp in float + {0x1.100000p-25f, Binary16.MIN_VALUE}, + + // Test rounding near overflow threshold + // Largest normal binary16 number 0x7bff => 0x1.ffcp15f; LSB = 1 + {0x1.ffc000p15f, Binary16.MAX_VALUE}, // round = 0, sticky = 0 + {0x1.ffc010p15f, Binary16.MAX_VALUE}, // round = 0, sticky = 1 + {0x1.ffe000p15f, Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 0 => ++ + {0x1.ffe010p15f, Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 1 => ++ + }; + + for (var testCase : testCases) { + errors += compareAndReportError(testCase[0], + (short)testCase[1]); + } + return errors; + } + + private static int testRoundFloatToBinary16HalfWayCases() { + int errors = 0; + + // Test rounding of exact half-way cases between each pair of + // finite exactly-representable binary16 numbers. Also test + // rounding of half-way +/- ulp of the *float* value. + // Additionally, test +/- float ulp of the endpoints. (Other + // tests in this file make sure all short values round-trip so + // that doesn't need to be tested here.) + + for (int i = Binary16.POSITIVE_ZERO; // 0x0000 + i <= Binary16.MAX_VALUE; // 0x7bff + i += 2) { // Check every even/odd pair once + short lower = (short) i; + short upper = (short)(i+1); + + float lowerFloat = Float.float16ToFloat(lower); + float upperFloat = Float.float16ToFloat(upper); + assert lowerFloat < upperFloat; + + float midway = (lowerFloat + upperFloat) * 0.5f; // Exact midpoint + + errors += compareAndReportError(Math.nextUp(lowerFloat), lower); + errors += compareAndReportError(Math.nextDown(midway), lower); + + // Under round to nearest even, the midway point will + // round *down* to the (even) lower endpoint. + errors += compareAndReportError( midway, lower); + + errors += compareAndReportError(Math.nextUp( midway), upper); + errors += compareAndReportError(Math.nextDown(upperFloat), upper); + } + + // More testing around the overflow threshold + // Binary16.ulp(Binary16.MAX_VALUE) == 32.0f; test around Binary16.MAX_VALUE + 1/2 ulp + float binary16_MAX_VALUE = Float.float16ToFloat(Binary16.MAX_VALUE); + float binary16_MAX_VALUE_halfUlp = binary16_MAX_VALUE + 16.0f; + + errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE), Binary16.MAX_VALUE); + errors += compareAndReportError( binary16_MAX_VALUE, Binary16.MAX_VALUE); + errors += compareAndReportError(Math.nextUp( binary16_MAX_VALUE), Binary16.MAX_VALUE); + + // Binary16.MAX_VALUE is an "odd" value since its LSB = 1 so + // the half-way value greater than Binary16.MAX_VALUE should + // round up to the next even value, in this case Binary16.POSITIVE_INFINITY. + errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE_halfUlp), Binary16.MAX_VALUE); + errors += compareAndReportError( binary16_MAX_VALUE_halfUlp, Binary16.POSITIVE_INFINITY); + errors += compareAndReportError(Math.nextUp( binary16_MAX_VALUE_halfUlp), Binary16.POSITIVE_INFINITY); + + return errors; + } + + private static int compareAndReportError(float input, + short expected) { + // Round to nearest even is sign symmetric + return compareAndReportError0( input, expected) + + compareAndReportError0(-input, Binary16.negate(expected)); + } + + private static int compareAndReportError0(float input, + short expected) { + short actual = Float.floatToFloat16(input); + if (!Binary16.equivalent(actual, expected)) { + System.out.println("Unexpected result of converting " + + Float.toHexString(input) + + " to short. Expected 0x" + Integer.toHexString(0xFFFF & expected) + + " got 0x" + Integer.toHexString(0xFFFF & actual)); + return 1; + } + return 0; + } + + private static int compareAndReportError0(short input, + float expected) { + float actual = Float.float16ToFloat(input); + if (Float.compare(actual, expected) != 0) { + System.out.println("Unexpected result of converting " + + Integer.toHexString(input & 0xFFFF) + + " to float. Expected " + Float.toHexString(expected) + + " got " + Float.toHexString(actual)); + return 1; + } + return 0; + } + + private static int compareAndReportError(short input, + float expected) { + // Round to nearest even is sign symmetric + return compareAndReportError0( input, expected) + + compareAndReportError0(Binary16.negate(input), -expected); + } + + private static int testRoundFloatToBinary16FullBinade() { + int errors = 0; + + // For each float value between 1.0 and less than 2.0 + // (i.e. set of float values with an exponent of 0), convert + // each value to binary16 and then convert that binary16 value + // back to float. + // + // Any exponent could be used; the maximum exponent for normal + // values would not exercise the full set of code paths since + // there is an up-front check on values that would overflow, + // which correspond to a ripple-carry of the significand that + // bumps the exponent. + short previous = (short)0; + for (int i = Float.floatToIntBits(1.0f); + i <= Float.floatToIntBits(Math.nextDown(2.0f)); + i++) { + // (Could also express the loop control directly in terms + // of floating-point operations, incrementing by ulp(1.0), + // etc.) + + float f = Float.intBitsToFloat(i); + short f_as_bin16 = Float.floatToFloat16(f); + short f_as_bin16_down = (short)(f_as_bin16 - 1); + short f_as_bin16_up = (short)(f_as_bin16 + 1); + + // Across successive float values to convert to binary16, + // the binary16 results should be semi-monotonic, + // non-decreasing in this case. + + // Only positive binary16 values so can compare using integer operations + if (f_as_bin16 < previous) { + errors++; + System.out.println("Semi-monotonicity violation observed on loat: " + Float.toHexString(f) + "/" + Integer.toHexString(i) + " " + + Integer.toHexString(0xffff & f_as_bin16) + " previous: " + Integer.toHexString(0xffff & previous) + " f_as_bin16: " + Integer.toHexString(0xffff & f_as_bin16)); + } + // previous = f_as_bin16; + + // If round-to-nearest was correctly done, when exactly + // mapped back to float, f_as_bin16 should be at least as + // close as either of its neighbors to the original value + // of f. + + float f_prime_down = Float.float16ToFloat(f_as_bin16_down); + float f_prime = Float.float16ToFloat(f_as_bin16); + float f_prime_up = Float.float16ToFloat(f_as_bin16_up); + + previous = f_as_bin16; + + float f_prime_diff = Math.abs(f - f_prime); + if (f_prime_diff == 0.0) { + continue; + } + float f_prime_down_diff = Math.abs(f - f_prime_down); + float f_prime_up_diff = Math.abs(f - f_prime_up); + + if (f_prime_diff > f_prime_down_diff || + f_prime_diff > f_prime_up_diff) { + errors++; + System.out.println("Round-to-nearest violation on converting " + + Float.toHexString(f) + "/" + Integer.toHexString(i) + " to binary16 and back: " + Integer.toHexString(0xffff & f_as_bin16) + " f_prime: " + Float.toHexString(f_prime)); + } + } + return errors; + } + + private static int testAlternativeImplementation() { + int errors = 0; + + // For exhaustive test of all float values use + // for (long ell = Integer.MIN_VALUE; ell <= Integer.MAX_VALUE; ell++) { + + for (long ell = Float.floatToIntBits(2.0f); + ell <= Float.floatToIntBits(4.0f); + ell++) { + float f = Float.intBitsToFloat((int)ell); + short s1 = Float.floatToFloat16(f); + short s2 = testAltFloatToFloat16(f); + + if (s1 != s2) { + errors++; + System.out.println("Different conversion of float value " + Float.toHexString(f)); + } + } + + return errors; + } + + /* + * Rely on float operations to do rounding in both normal and + * subnormal binary16 cases. + */ + public static short testAltFloatToFloat16(float f) { + int doppel = Float.floatToRawIntBits(f); + short sign_bit = (short)((doppel & 0x8000_0000) >> 16); + + if (Float.isNaN(f)) { + // Preserve sign and attempt to preserve significand bits + return (short)(sign_bit + | 0x7c00 // max exponent + 1 + // Preserve high order bit of float NaN in the + // binary16 result NaN (tenth bit); OR in remaining + // bits into lower 9 bits of binary 16 significand. + | (doppel & 0x007f_e000) >> 13 // 10 bits + | (doppel & 0x0000_1ff0) >> 4 // 9 bits + | (doppel & 0x0000_000f)); // 4 bits + } + + float abs_f = Math.abs(f); + + // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp + if (abs_f >= (65504.0f + 16.0f) ) { + return (short)(sign_bit | 0x7c00); // Positive or negative infinity + } else { + // Smallest magnitude nonzero representable binary16 value + // is equal to 0x1.0p-24; half-way and smaller rounds to zero. + if (abs_f <= 0x1.0p-25f) { // Covers float zeros and subnormals. + return sign_bit; // Positive or negative zero + } + + // Dealing with finite values in exponent range of + // binary16 (when rounding is done, could still round up) + int exp = Math.getExponent(f); + assert -25 <= exp && exp <= 15; + short signif_bits; + + if (exp <= -15) { // scale down to float subnormal range to do rounding + // Use a float multiply to compute the correct + // trailing significand bits for a binary16 subnormal. + // + // The exponent range of normalized binary16 subnormal + // values is [-24, -15]. The exponent range of float + // subnormals is [-149, -140]. Multiply abs_f down by + // 2^(-125) -- since (-125 = -149 - (-24)) -- so that + // the trailing bits of a subnormal float represent + // the correct trailing bits of a binary16 subnormal. + exp = -15; // Subnormal encoding using -E_max. + float f_adjust = abs_f * 0x1.0p-125f; + + // In case the significand rounds up and has a carry + // propagate all the way up, take the bottom 11 bits + // rather than bottom 10 bits. Adding this value, + // rather than OR'ing htis value, will cause the right + // exponent adjustment. + signif_bits = (short)(Float.floatToRawIntBits(f_adjust) & 0x07ff); + return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) ); + } else { + // Scale down to subnormal range to round off excess bits + int scalingExp = -139 - exp; + float scaled = Math.scalb(Math.scalb(f, scalingExp), + -scalingExp); + exp = Math.getExponent(scaled); + doppel = Float.floatToRawIntBits(scaled); + + signif_bits = (short)((doppel & 0x007f_e000) >> + (FLOAT_SIGNIFICAND_WIDTH - 11)); + return (short)(sign_bit | ( ((exp + 15) << 10) | signif_bits ) ); + } + } + } + + public static class Binary16 { + public static final short POSITIVE_INFINITY = (short)0x7c00; + public static final short MAX_VALUE = 0x7bff; + public static final short ONE = 0x3c00; + public static final short MIN_NORMAL = 0x0400; + public static final short MAX_SUBNORMAL = 0x03ff; + public static final short MIN_VALUE = 0x0001; + public static final short POSITIVE_ZERO = 0x0000; + + public static boolean isNaN(short binary16) { + return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and... + && ((binary16 & 0x03ff) != 0 ); // significand nonzero. + } + + public static short negate(short binary16) { + return (short)(binary16 ^ 0x8000 ); // Flip only sign bit. + } + + public static boolean equivalent(short bin16_1, short bin16_2) { + return (bin16_1 == bin16_2) || + isNaN(bin16_1) && isNaN(bin16_2); + } + } +} diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java new file mode 100644 index 00000000000..38060dfb504 --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/Binary16ConversionNaN.java @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8289551 8302976 + * @summary Verify NaN sign and significand bits are preserved across conversions + * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64" + * @requires vm.compiler1.enabled & vm.compiler2.enabled + * @requires vm.compMode != "Xcomp" + * @library /test/lib / + * + * @build jdk.test.whitebox.WhiteBox + * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * -Xmixed -XX:-BackgroundCompilation -XX:-UseOnStackReplacement + * -XX:CompileThresholdScaling=1000.0 Binary16ConversionNaN + */ + +/* + * The behavior tested below is an implementation property not + * required by the specification. It would be acceptable for this + * information to not be preserved (as long as a NaN is returned) if, + * say, a intrinsified version using native hardware instructions + * behaved differently. + * + * If that is the case, this test should be modified to disable + * intrinsics or to otherwise not run on platforms with an differently + * behaving intrinsic. + */ + +import compiler.whitebox.CompilerWhiteBoxTest; +import jdk.test.whitebox.WhiteBox; +import java.lang.reflect.Method; + +public class Binary16ConversionNaN { + + private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); + + /* + * Put all 16-bit NaN values through a conversion loop and make + * sure the significand, sign, and exponent are all preserved. + */ + public static void main(String... argv) throws NoSuchMethodException { + int errors = 0; + final int NAN_EXPONENT = 0x7c00; + final int SIGN_BIT = 0x8000; + + // First, run with Interpreter only to collect "gold" data. + // Glags -Xmixed -XX:CompileThresholdScaling=1000.0 are used + // to prevent compilation during this phase. + short[] pVal = new short[1024]; + short[] pRes = new short[1024]; + short[] nVal = new short[1024]; + short[] nRes = new short[1024]; + + // A NaN has a nonzero significand + for (int i = 1; i <= 0x3ff; i++) { + short binary16NaN = (short)(NAN_EXPONENT | i); + assert isNaN(binary16NaN); + short s1 = testRoundTrip(binary16NaN); + errors += verify(binary16NaN, s1); + pVal[i] = binary16NaN; + pRes[i] = s1; + + short binary16NegNaN = (short)(SIGN_BIT | binary16NaN); + short s2 = testRoundTrip(binary16NegNaN); + errors += verify(binary16NegNaN, s2); + nVal[i] = binary16NegNaN; + nRes[i] = s2; + } + if (errors > 0) { // Exit if Interpreter failed + throw new RuntimeException(errors + " errors"); + } + + Method test_method = Binary16ConversionNaN.class.getDeclaredMethod("testRoundTrip", short.class); + + // Compile with C1 and compare results + WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); + if (!WHITE_BOX.isMethodCompiled(test_method)) { + throw new RuntimeException("test is not compiled by C1"); + } + for (int i = 1; i <= 0x3ff; i++) { + short s1 = testRoundTrip(pVal[i]); + errors += verifyCompiler(pRes[i], s1, "C1"); + short s2 = testRoundTrip(nVal[i]); + errors += verifyCompiler(nRes[i], s2, "C1"); + } + + WHITE_BOX.deoptimizeMethod(test_method); + + // Compile with C2 and compare results + WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); + if (!WHITE_BOX.isMethodCompiled(test_method)) { + throw new RuntimeException("test is not compiled by C2"); + } + for (int i = 1; i <= 0x3ff; i++) { + short s1 = testRoundTrip(pVal[i]); + errors += verifyCompiler(pRes[i], s1, "C2"); + short s2 = testRoundTrip(nVal[i]); + errors += verifyCompiler(nRes[i], s2, "C2"); + } + + if (errors > 0) { + throw new RuntimeException(errors + " errors"); + } + } + + private static boolean isNaN(short binary16) { + return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and... + && ((binary16 & 0x03ff) != 0 ); // significand nonzero. + } + + private static short testRoundTrip(short i) { + float f = Float.float16ToFloat(i); + return Float.floatToFloat16(f); + } + + private static int verify(short s, short s2) { + int errors = 0; + if ((s & ~0x0200) != (s2 & ~0x0200)) { // ignore QNaN bit + errors++; + System.out.println("Roundtrip failure on NaN value " + + Integer.toHexString(0xFFFF & (int)s) + + "\t got back " + Integer.toHexString(0xFFFF & (int)s2)); + } + return errors; + } + + private static int verifyCompiler(short s, short s2, String name) { + int errors = 0; + if (s != s2) { + errors++; + System.out.println("Roundtrip failure on NaN value " + + Integer.toHexString(0xFFFF & (int)s) + + "\t got back " + Integer.toHexString(0xFFFF & (int)s2) + + "\t from " + name + " code"); + } + return errors; + } +} diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java b/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java new file mode 100644 index 00000000000..4afda120709 --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/TestAllFloat16ToFloat.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8302976 + * @summary Verify conversion between float and the binary16 format + * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch == "aarch64" + * @requires vm.compiler1.enabled & vm.compiler2.enabled + * @requires vm.compMode != "Xcomp" + * @comment default run: + * @run main TestAllFloat16ToFloat + * @comment disable intrinsics: + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_float16ToFloat,_floatToFloat16 TestAllFloat16ToFloat + * @comment eager JIT compilation: + * @run main/othervm -XX:CompileCommand=compileonly,TestAllFloat16ToFloat::test* -Xbatch TestAllFloat16ToFloat + * @comment C2 JIT compilation only: + * @run main/othervm -XX:CompileCommand=compileonly,TestAllFloat16ToFloat::test* -Xbatch -XX:-TieredCompilation TestAllFloat16ToFloat + * @comment C1 JIT compilation only: + * @run main/othervm -XX:CompileCommand=compileonly,TestAllFloat16ToFloat::test* -Xbatch -XX:TieredStopAtLevel=1 TestAllFloat16ToFloat + */ + +public class TestAllFloat16ToFloat { + public static short testFloatToFloat16(float f) { + return Float.floatToFloat16(f); + } + + public static float testFloat16ToFloat(short s) { + return Float.float16ToFloat(s); + } + + public static short testRoundTrip(short s) { + return Float.floatToFloat16(Float.float16ToFloat(s)); + } + + public static void verify(short sVal, float fVal, short sRes) { + if (sRes != sVal) { + if (!Float.isNaN(fVal) || ((sRes & ~0x0200) != (sVal & ~0x0200)) ) { + String fVal_hex = Integer.toHexString(Float.floatToRawIntBits(fVal)); + String sRes_hex = Integer.toHexString(sRes & 0xffff); + String sVal_hex = Integer.toHexString(sVal & 0xffff); + throw new RuntimeException("Inconsistent result for Float.floatToFloat16(" + fVal + "/" + fVal_hex + "): " + sRes_hex + " != " + sVal_hex); + } + } + } + + public static void run() { + // Testing all float16 values. + for (short sVal = Short.MIN_VALUE; sVal < Short.MAX_VALUE; ++sVal) { + float fVal = Float.float16ToFloat(sVal); + short sRes = testFloatToFloat16(fVal); + verify(sVal, fVal, sRes); + float fRes = testFloat16ToFloat(sVal); + if (!Float.isNaN(fRes) && fRes != fVal) { + String sVal_hex = Integer.toHexString(sVal & 0xffff); + String fRes_hex = Integer.toHexString(Float.floatToRawIntBits(fRes)); + String fVal_hex = Integer.toHexString(Float.floatToRawIntBits(fVal)); + throw new RuntimeException("Inconsistent result for Float.float16ToFloat(" + sVal_hex + "): " + fRes + "/" + fRes_hex + " != " + fVal + "/" + fVal_hex); + } + sRes = testRoundTrip(sVal); + verify(sVal, fVal, sRes); + if (Float.floatToFloat16(fRes) != Float.floatToFloat16(fVal)) { + String sVal_hex = Integer.toHexString(sVal & 0xffff); + String sfRes_hex = Integer.toHexString(Float.floatToFloat16(fRes) & 0xffff); + String sfVal_hex = Integer.toHexString(Float.floatToFloat16(fVal)& 0xffff); + throw new RuntimeException("Inconsistent result for Float.float16ToFloat(" + sVal_hex + "): " + sfRes_hex + " != " + sfVal_hex); + } + } + } + + public static void main(String[] args) { + // Run twice to trigger compilation + for (int i = 0; i < 2; i++) { + run(); + } + } +} diff --git a/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java b/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java new file mode 100644 index 00000000000..062e6804897 --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/float16/TestConstFloat16ToFloat.java @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8302976 + * @summary Verify conversion cons between float and the binary16 format + * @requires (vm.cpu.features ~= ".*avx512vl.*" | vm.cpu.features ~= ".*f16c.*") | os.arch=="aarch64" + * @requires vm.compiler1.enabled & vm.compiler2.enabled + * @requires vm.compMode != "Xcomp" + * @comment default run: + * @run main TestConstFloat16ToFloat + * @comment C1 JIT compilation only: + * @run main/othervm -Xcomp -XX:CompileCommand=compileonly,TestConstFloat16ToFloat::test* -XX:TieredStopAtLevel=1 TestConstFloat16ToFloat + * @comment C2 JIT compilation only: + * @run main/othervm -Xcomp -XX:CompileCommand=compileonly,TestConstFloat16ToFloat::test* -XX:-TieredCompilation TestConstFloat16ToFloat + */ + +public class TestConstFloat16ToFloat { + + public static class Binary16 { + public static final short POSITIVE_INFINITY = (short)0x7c00; + public static final short MAX_VALUE = 0x7bff; + public static final short ONE = 0x3c00; + public static final short MIN_NORMAL = 0x0400; + public static final short MAX_SUBNORMAL = 0x03ff; + public static final short MIN_VALUE = 0x0001; + public static final short POSITIVE_ZERO = 0x0000; + } + + static final short[] sCon = { + Short.MIN_VALUE, + Short.MIN_VALUE + 1, + -1, + 0, + +1, + Short.MAX_VALUE - 1, + Short.MAX_VALUE, + Binary16.MIN_VALUE, + Binary16.MIN_NORMAL, + Binary16.POSITIVE_ZERO, + Binary16.ONE, + Binary16.MAX_VALUE, + Binary16.MAX_SUBNORMAL, + Binary16.POSITIVE_INFINITY + }; + + public final static class BinaryF16 { + public static final float POSITIVE_INFINITY = Float.POSITIVE_INFINITY; + public static final float MAX_VALUE = 65504.0f; + public static final float ONE = 1.0f; + public static final float MIN_NORMAL = 0x1.0p-14f; + public static final float MAX_SUBNORMAL = 0x1.ff8p-15f; + public static final float MIN_VALUE = 0x1.0p-24f; + public static final float POSITIVE_ZERO = +0x0f; + } + + static float[] fCon = { + 0.0f - BinaryF16.POSITIVE_INFINITY, + 0.0f - BinaryF16.MAX_VALUE, + 0.0f - BinaryF16.MAX_SUBNORMAL, + 0.0f - BinaryF16.MIN_VALUE, + 0.0f - BinaryF16.MIN_NORMAL, + -1.0f, + -0.0f, + BinaryF16.MIN_VALUE, + BinaryF16.MIN_NORMAL, + BinaryF16.POSITIVE_ZERO, + BinaryF16.ONE, + BinaryF16.MAX_VALUE, + BinaryF16.MAX_SUBNORMAL, + BinaryF16.POSITIVE_INFINITY + }; + + // Testing some constant values (optimized by C2). + public static void testFloat16Const(float[] fRes) { + fRes[ 0] = Float.float16ToFloat(Short.MIN_VALUE); + fRes[ 1] = Float.float16ToFloat((short)(Short.MIN_VALUE + 1)); + fRes[ 2] = Float.float16ToFloat((short)-1); + fRes[ 3] = Float.float16ToFloat((short)0); + fRes[ 4] = Float.float16ToFloat((short)+1); + fRes[ 5] = Float.float16ToFloat((short)(Short.MAX_VALUE - 1)); + fRes[ 6] = Float.float16ToFloat(Short.MAX_VALUE); + fRes[ 7] = Float.float16ToFloat(Binary16.MIN_VALUE); + fRes[ 8] = Float.float16ToFloat(Binary16.MIN_NORMAL); + fRes[ 9] = Float.float16ToFloat(Binary16.POSITIVE_ZERO); + fRes[10] = Float.float16ToFloat(Binary16.ONE); + fRes[11] = Float.float16ToFloat(Binary16.MAX_VALUE); + fRes[12] = Float.float16ToFloat(Binary16.MAX_SUBNORMAL); + fRes[13] = Float.float16ToFloat(Binary16.POSITIVE_INFINITY); + } + + public static void testFloatConst(short[] sRes) { + sRes[ 0] = Float.floatToFloat16(0.0f - BinaryF16.POSITIVE_INFINITY); + sRes[ 1] = Float.floatToFloat16(0.0f - BinaryF16.MAX_VALUE); + sRes[ 2] = Float.floatToFloat16(0.0f - BinaryF16.MAX_SUBNORMAL); + sRes[ 3] = Float.floatToFloat16(0.0f - BinaryF16.MIN_VALUE); + sRes[ 4] = Float.floatToFloat16(0.0f - BinaryF16.MIN_NORMAL); + sRes[ 5] = Float.floatToFloat16(-1.0f); + sRes[ 6] = Float.floatToFloat16(-0.0f); + sRes[ 7] = Float.floatToFloat16(BinaryF16.MIN_VALUE); + sRes[ 8] = Float.floatToFloat16(BinaryF16.MIN_NORMAL); + sRes[ 9] = Float.floatToFloat16(BinaryF16.POSITIVE_ZERO); + sRes[10] = Float.floatToFloat16(BinaryF16.ONE); + sRes[11] = Float.floatToFloat16(BinaryF16.MAX_VALUE); + sRes[12] = Float.floatToFloat16(BinaryF16.MAX_SUBNORMAL); + sRes[13] = Float.floatToFloat16(BinaryF16.POSITIVE_INFINITY); + } + + public static void run() { + short s = Float.floatToFloat16(0.0f); // Load Float class + // Testing constant float16 values. + float[] fRes = new float[sCon.length]; + testFloat16Const(fRes); + for (int i = 0; i < sCon.length; i++) { + float fVal = Float.float16ToFloat(sCon[i]); + if (Float.floatToRawIntBits(fRes[i]) != Float.floatToRawIntBits(fVal)) { + String cVal_hex = Integer.toHexString(sCon[i] & 0xffff); + String fRes_hex = Integer.toHexString(Float.floatToRawIntBits(fRes[i])); + String fVal_hex = Integer.toHexString(Float.floatToRawIntBits(fVal)); + throw new RuntimeException("Inconsistent result for Float.float16ToFloat(" + cVal_hex + "): " + fRes[i] + "/" + fRes_hex + " != " + fVal + "/" + fVal_hex); + } + } + + // Testing constant float values. + short[] sRes = new short[fCon.length]; + testFloatConst(sRes); + for (int i = 0; i < fCon.length; i++) { + short sVal = Float.floatToFloat16(fCon[i]); + if (sRes[i] != sVal) { + String cVal_hex = Integer.toHexString(Float.floatToRawIntBits(fCon[i])); + String sRes_hex = Integer.toHexString(sRes[i] & 0xffff); + String sVal_hex = Integer.toHexString(sVal & 0xffff); + throw new RuntimeException("Inconsistent result for Float.floatToFloat16(" + fCon[i] + "/" + cVal_hex + "): " + sRes_hex + " != " + sVal_hex); + } + } + + } + + public static void main(String[] args) { + // Run twice to trigger compilation + for (int i = 0; i < 2; i++) { + run(); + } + } +} diff --git a/test/jdk/java/lang/Float/Binary16ConversionNaN.java b/test/jdk/java/lang/Float/Binary16ConversionNaN.java index 99820c5e79e..7be3f70971e 100644 --- a/test/jdk/java/lang/Float/Binary16ConversionNaN.java +++ b/test/jdk/java/lang/Float/Binary16ConversionNaN.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,7 +26,6 @@ * @bug 8289551 * @requires (os.arch != "x86" & os.arch != "i386") | vm.opt.UseSSE == "null" | vm.opt.UseSSE > 0 * @summary Verify NaN sign and significand bits are preserved across conversions - * @run main/othervm -XX:-TieredCompilation -XX:CompileThresholdScaling=0.1 Binary16ConversionNaN * @run main/othervm -XX:+UnlockDiagnosticVMOptions * -XX:DisableIntrinsic=_float16ToFloat,_floatToFloat16 Binary16ConversionNaN */