From ce2a7ea40a22c652e5f8559c91d5eea197e2d708 Mon Sep 17 00:00:00 2001 From: Scott Gibbons Date: Wed, 30 Aug 2023 01:28:27 +0000 Subject: [PATCH] 8314056: Remove runtime platform check from frem/drem Reviewed-by: sviswanathan, jbhateja --- src/hotspot/cpu/x86/assembler_x86.cpp | 72 ++++++++-------- src/hotspot/cpu/x86/assembler_x86.hpp | 17 ++-- src/hotspot/cpu/x86/sharedRuntime_x86.cpp | 50 ++--------- src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 6 +- .../cpu/x86/stubGenerator_x86_64_fmod.cpp | 83 ++++++++++--------- src/hotspot/cpu/x86/stubRoutines_x86.hpp | 6 ++ src/hotspot/cpu/x86/stubRoutines_x86_64.cpp | 1 + src/hotspot/share/runtime/sharedRuntime.cpp | 4 +- 8 files changed, 110 insertions(+), 129 deletions(-) diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 6c1440f1ca0..43e08269504 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -6232,11 +6232,17 @@ void Assembler::subss(XMMRegister dst, Address src) { emit_operand(dst, src, 0); } -void Assembler::testb(Register dst, int imm8) { +void Assembler::testb(Register dst, int imm8, bool use_ral) { NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); if (dst == rax) { - emit_int8((unsigned char)0xA8); - emit_int8(imm8); + if (use_ral) { + emit_int8((unsigned char)0xA8); + emit_int8(imm8); + } else { + emit_int8((unsigned char)0xF6); + emit_int8((unsigned char)0xC4); + emit_int8(imm8); + } } else { (void) prefix_and_encode(dst->encoding(), true); emit_arith_b(0xF6, 0xC0, dst, imm8); @@ -10967,6 +10973,36 @@ void Assembler::emit_operand32(Register reg, Address adr, int post_addr_length) emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec, post_addr_length); } +void Assembler::fld_d(Address adr) { + InstructionMark im(this); + emit_int8((unsigned char)0xDD); + emit_operand32(rax, adr, 0); +} + +void Assembler::fprem() { + emit_int16((unsigned char)0xD9, (unsigned char)0xF8); +} + +void Assembler::fnstsw_ax() { + emit_int16((unsigned char)0xDF, (unsigned char)0xE0); +} + +void Assembler::fstp_d(Address adr) { + InstructionMark im(this); + emit_int8((unsigned char)0xDD); + emit_operand32(rbx, adr, 0); +} + +void Assembler::fstp_d(int index) { + emit_farith(0xDD, 0xD8, index); +} + +void Assembler::emit_farith(int b1, int b2, int i) { + assert(isByte(b1) && isByte(b2), "wrong opcode"); + assert(0 <= i && i < 8, "illegal stack offset"); + emit_int16(b1, b2 + i); +} + #ifndef _LP64 // 32bit only pieces of the assembler @@ -11010,12 +11046,6 @@ void Assembler::decl(Register dst) { // 64bit doesn't use the x87 -void Assembler::emit_farith(int b1, int b2, int i) { - assert(isByte(b1) && isByte(b2), "wrong opcode"); - assert(0 <= i && i < 8, "illegal stack offset"); - emit_int16(b1, b2 + i); -} - void Assembler::fabs() { emit_int16((unsigned char)0xD9, (unsigned char)0xE1); } @@ -11177,12 +11207,6 @@ void Assembler::fld1() { emit_int16((unsigned char)0xD9, (unsigned char)0xE8); } -void Assembler::fld_d(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDD); - emit_operand32(rax, adr, 0); -} - void Assembler::fld_s(Address adr) { InstructionMark im(this); emit_int8((unsigned char)0xD9); @@ -11266,14 +11290,6 @@ void Assembler::fnstcw(Address src) { emit_operand32(rdi, src, 0); } -void Assembler::fnstsw_ax() { - emit_int16((unsigned char)0xDF, (unsigned char)0xE0); -} - -void Assembler::fprem() { - emit_int16((unsigned char)0xD9, (unsigned char)0xF8); -} - void Assembler::fprem1() { emit_int16((unsigned char)0xD9, (unsigned char)0xF5); } @@ -11304,16 +11320,6 @@ void Assembler::fst_s(Address adr) { emit_operand32(rdx, adr, 0); } -void Assembler::fstp_d(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDD); - emit_operand32(rbx, adr, 0); -} - -void Assembler::fstp_d(int index) { - emit_farith(0xDD, 0xD8, index); -} - void Assembler::fstp_s(Address adr) { InstructionMark im(this); emit_int8((unsigned char)0xD9); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index c12045c6c5f..abd6aaf9b1e 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1245,12 +1245,18 @@ private: void divss(XMMRegister dst, XMMRegister src); -#ifndef _LP64 + void fnstsw_ax(); + void fprem(); + void fld_d(Address adr); + void fstp_d(Address adr); + void fstp_d(int index); + private: void emit_farith(int b1, int b2, int i); public: +#ifndef _LP64 void emms(); void fabs(); @@ -1309,7 +1315,6 @@ private: void fld1(); - void fld_d(Address adr); void fld_s(Address adr); void fld_s(int index); @@ -1338,10 +1343,6 @@ private: void fnsave(Address dst); void fnstcw(Address src); - - void fnstsw_ax(); - - void fprem(); void fprem1(); void frstor(Address src); @@ -1353,8 +1354,6 @@ private: void fst_d(Address adr); void fst_s(Address adr); - void fstp_d(Address adr); - void fstp_d(int index); void fstp_s(Address adr); void fsub(int i); @@ -2184,7 +2183,7 @@ private: void subss(XMMRegister dst, XMMRegister src); void testb(Address dst, int imm8); - void testb(Register dst, int imm8); + void testb(Register dst, int imm8, bool use_ral = true); void testl(Address dst, int32_t imm32); void testl(Register dst, int32_t imm32); diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp index d39cab092f8..de759d2ff57 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp @@ -84,52 +84,18 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas } #endif //COMPILER1 -#if defined(TARGET_COMPILER_gcc) && !defined(_WIN64) JRT_LEAF(jfloat, SharedRuntime::frem(jfloat x, jfloat y)) - jfloat retval; - const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); - if (!is_LP64 || UseAVX < 1 || !UseFMA) { - asm ("\ -1: \n\ -fprem \n\ -fnstsw %%ax \n\ -test $0x4,%%ah \n\ -jne 1b \n\ -" - :"=t"(retval) - :"0"(x), "u"(y) - :"cc", "ax"); - } else { - assert(StubRoutines::fmod() != nullptr, ""); - jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod(); - jdouble dx = (jdouble) x; - jdouble dy = (jdouble) y; + assert(StubRoutines::fmod() != nullptr, ""); + jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod(); + jdouble dx = (jdouble) x; + jdouble dy = (jdouble) y; - retval = (jfloat) (*addr)(dx, dy); - } - return retval; + return (jfloat) (*addr)(dx, dy); JRT_END JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y)) - jdouble retval; - const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); - if (!is_LP64 || UseAVX < 1 || !UseFMA) { - asm ("\ -1: \n\ -fprem \n\ -fnstsw %%ax \n\ -test $0x4,%%ah \n\ -jne 1b \n\ -" - :"=t"(retval) - :"0"(x), "u"(y) - :"cc", "ax"); - } else { - assert(StubRoutines::fmod() != nullptr, ""); - jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod(); + assert(StubRoutines::fmod() != nullptr, ""); + jdouble (*addr)(jdouble, jdouble) = (double (*)(double, double))StubRoutines::fmod(); - retval = (*addr)(x, y); - } - return retval; + return (*addr)(x, y); JRT_END -#endif // TARGET_COMPILER_gcc && !_WIN64 diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index e70723730cd..cf45aad2fc7 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3897,6 +3897,8 @@ address StubGenerator::generate_throw_exception(const char* name, void StubGenerator::create_control_words() { // Round to nearest, 64-bit mode, exceptions masked StubRoutines::x86::_mxcsr_std = 0x1F80; + // Round to zero, 64-bit mode, exceptions masked + StubRoutines::x86::_mxcsr_rz = 0x7F80; } // Initialization @@ -3979,9 +3981,7 @@ void StubGenerator::generate_initial_stubs() { generate_libm_stubs(); - if ((UseAVX >= 1) && (VM_Version::supports_avx512vlbwdq() || VM_Version::supports_fma())) { - StubRoutines::_fmod = generate_libmFmod(); // from stubGenerator_x86_64_fmod.cpp - } + StubRoutines::_fmod = generate_libmFmod(); // from stubGenerator_x86_64_fmod.cpp } void StubGenerator::generate_continuation_stubs() { diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp index 04ad300ddcd..26b5b594424 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_fmod.cpp @@ -27,6 +27,7 @@ #include "precompiled.hpp" #include "macroAssembler_x86.hpp" #include "stubGenerator_x86_64.hpp" +#include "runtime/stubRoutines.hpp" /******************************************************************************/ // ALGORITHM DESCRIPTION - FMOD() @@ -72,6 +73,7 @@ ATTRIBUTE_ALIGNED(32) static const uint64_t CONST_e307[] = { }; address StubGenerator::generate_libmFmod() { + __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "libmFmod"); address start = __ pc(); __ enter(); // required for proper stackwalking of RuntimeStub frame @@ -287,21 +289,11 @@ address StubGenerator::generate_libmFmod() { // { // double a, b, sgn_a, q, bs, bs2, corr, res; // unsigned eq; - // unsigned mxcsr, mxcsr_rz; - - // __asm { stmxcsr DWORD PTR[mxcsr] } - // mxcsr_rz = 0x7f80 | mxcsr; - __ push(rax); - __ stmxcsr(Address(rsp, 0)); - __ movl(rax, Address(rsp, 0)); - __ movl(rcx, rax); - __ orl(rcx, 0x7f80); - __ movl(Address(rsp, 0x04), rcx); // // |x|, |y| // a = DP_AND(x, DP_CONST(7fffffffffffffff)); __ movq(xmm2, xmm0); - __ vmovdqu(xmm3, ExternalAddress((address)CONST_NaN), rcx); + __ movdqu(xmm3, ExternalAddress((address)CONST_NaN), rcx); __ vpand(xmm4, xmm2, xmm3, Assembler::AVX_128bit); // b = DP_AND(y, DP_CONST(7fffffffffffffff)); __ vpand(xmm3, xmm1, xmm3, Assembler::AVX_128bit); @@ -313,18 +305,16 @@ address StubGenerator::generate_libmFmod() { // if (a < b) return x + sgn_a; __ ucomisd(xmm3, xmm4); - __ jcc(Assembler::belowEqual, L_104a); + __ jccb(Assembler::belowEqual, L_104a); __ vaddsd(xmm0, xmm2, xmm0); __ jmp(L_11bd); - // if (((mxcsr & 0x6000)!=0x2000) && (a < b * 0x1p+260)) + // if (a < b * 0x1p+260) __ bind(L_104a); - __ andl(rax, 0x6000); - __ cmpl(rax, 0x2000); - __ jcc(Assembler::equal, L_10c1); + __ vmulsd(xmm0, xmm3, ExternalAddress((address)CONST_1p260), rax); __ ucomisd(xmm0, xmm4); - __ jcc(Assembler::belowEqual, L_10c1); + __ jccb(Assembler::belowEqual, L_10c1); // { // q = DP_DIV(a, b); __ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit); @@ -340,7 +330,7 @@ address StubGenerator::generate_libmFmod() { __ vroundsd(xmm0, xmm0, xmm0, 3); // a = DP_FNMA(b, q, a); __ vfnmadd213sd(xmm0, xmm3, xmm4); - __ align32(); + __ align(16); // while (b <= a) __ bind(L_1090); __ ucomisd(xmm0, xmm3); @@ -359,14 +349,14 @@ address StubGenerator::generate_libmFmod() { __ vroundsd(xmm4, xmm4, xmm4, 3); // a = DP_FNMA(b, q, a); __ vfnmadd231sd(xmm0, xmm3, xmm4); - __ jmp(L_1090); + __ jmpb(L_1090); // } // return DP_XOR(a, sgn_a); // } // __asm { ldmxcsr DWORD PTR [mxcsr_rz] } __ bind(L_10c1); - __ ldmxcsr(Address(rsp, 0x04)); + __ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_rz()), rax /*rscratch*/); // q = DP_DIV(a, b); __ vdivpd(xmm0, xmm4, xmm3, Assembler::AVX_128bit); @@ -378,7 +368,7 @@ address StubGenerator::generate_libmFmod() { // if (__builtin_expect((eq >= 0x7fefffffu), (0==1))) goto SPECIAL_FMOD; __ cmpl(rax, 0x7feffffe); - __ jcc(Assembler::above, L_10e7); + __ jccb(Assembler::above, L_10e7); // a = DP_FNMA(b, q, a); __ vfnmadd213sd(xmm0, xmm3, xmm4); @@ -391,31 +381,31 @@ address StubGenerator::generate_libmFmod() { __ bind(L_10e7); __ vpxor(xmm5, xmm5, xmm5, Assembler::AVX_128bit); __ ucomisd(xmm3, xmm5); - __ jcc(Assembler::notEqual, L_10f3); - __ jcc(Assembler::noParity, L_111c); + __ jccb(Assembler::notEqual, L_10f3); + __ jccb(Assembler::noParity, L_111c); __ bind(L_10f3); __ movsd(xmm5, ExternalAddress((address)CONST_MAX), rax); __ ucomisd(xmm5, xmm4); - __ jcc(Assembler::below, L_111c); + __ jccb(Assembler::below, L_111c); // return res; // } // // y is NaN? // if (!(b <= DP_CONST(7ff0000000000000))) { __ movsd(xmm0, ExternalAddress((address)CONST_INF), rax); __ ucomisd(xmm0, xmm3); - __ jcc(Assembler::aboveEqual, L_112a); + __ jccb(Assembler::aboveEqual, L_112a); // res = y + y; __ vaddsd(xmm0, xmm1, xmm1); // __asm { ldmxcsr DWORD PTR[mxcsr] } - __ ldmxcsr(Address(rsp, 0)); + __ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rax /*rscratch*/); __ jmp(L_11bd); // { // res = DP_FNMA(b, q, a); // NaN __ bind(L_111c); __ vfnmadd213sd(xmm0, xmm3, xmm4); // __asm { ldmxcsr DWORD PTR[mxcsr] } - __ ldmxcsr(Address(rsp, 0)); + __ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rax /*rscratch*/); __ jmp(L_11bd); // return res; // } @@ -435,14 +425,14 @@ address StubGenerator::generate_libmFmod() { // if (eq >= 0x7fefffffu) __ cmpl(rax, 0x7fefffff); - __ jcc(Assembler::below, L_116e); + __ jccb(Assembler::below, L_116e); // { // // b* 2*1023 * 2^1023 // bs2 = bs * DP_CONST(7fe0000000000000); __ vmulsd(xmm0, xmm1, ExternalAddress((address)CONST_e307), rax); // while (bs2 <= a) __ ucomisd(xmm4, xmm0); - __ jcc(Assembler::below, L_1173); + __ jccb(Assembler::below, L_1173); // { // q = DP_DIV(a, bs2); __ bind(L_1157); @@ -453,8 +443,8 @@ address StubGenerator::generate_libmFmod() { __ vfnmadd231sd(xmm4, xmm0, xmm5); // while (bs2 <= a) __ ucomisd(xmm4, xmm0); - __ jcc(Assembler::aboveEqual, L_1157); - __ jmp(L_1173); + __ jccb(Assembler::aboveEqual, L_1157); + __ jmpb(L_1173); // } // } // else @@ -465,9 +455,9 @@ address StubGenerator::generate_libmFmod() { // while (bs <= a) __ bind(L_1173); __ ucomisd(xmm4, xmm1); - __ jcc(Assembler::aboveEqual, L_117f); + __ jccb(Assembler::aboveEqual, L_117f); __ movapd(xmm0, xmm4); - __ jmp(L_11af); + __ jmpb(L_11af); // { // q = DP_DIV(a, bs); __ bind(L_117f); @@ -480,9 +470,9 @@ address StubGenerator::generate_libmFmod() { // while (bs <= a) __ ucomisd(xmm0, xmm1); __ movapd(xmm4, xmm0); - __ jcc(Assembler::aboveEqual, L_117f); - __ jmp(L_11af); - __ align32(); + __ jccb(Assembler::aboveEqual, L_117f); + __ jmpb(L_11af); + __ align(16); // { // q = DP_DIV(a, b); __ bind(L_11a0); @@ -496,11 +486,11 @@ address StubGenerator::generate_libmFmod() { // while (b <= a) __ bind(L_11af); __ ucomisd(xmm0, xmm3); - __ jcc(Assembler::aboveEqual, L_11a0); + __ jccb(Assembler::aboveEqual, L_11a0); // } // __asm { ldmxcsr DWORD PTR[mxcsr] } - __ ldmxcsr(Address(rsp, 0)); + __ ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), rax /*rscratch*/); __ bind(L_11b9); __ vpxor(xmm0, xmm2, xmm0, Assembler::AVX_128bit); // } @@ -509,10 +499,23 @@ address StubGenerator::generate_libmFmod() { // } __ bind(L_11bd); - __ pop(rax); } else { // SSE version - assert(false, "SSE not implemented"); + Label x87_loop; + __ movsd(Address(rbp, -8), xmm1); + __ movsd(Address(rbp, -16), xmm0); + __ fld_d(Address(rbp, -8)); + __ fld_d(Address(rbp, -16)); + + __ bind(x87_loop); + __ fprem(); + __ fnstsw_ax(); + __ testb(rax, 0x4, false); + __ jcc(Assembler::notZero, x87_loop); + + __ fstp_d(1); + __ fstp_d(Address(rbp, -8)); + __ movsd(xmm0, Address(rbp, -8)); } __ leave(); // required for proper stackwalking of RuntimeStub frame diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp index 2038fdff5ae..a5246860c0f 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp @@ -126,6 +126,9 @@ class x86 { private: static jint _mxcsr_std; +#ifdef _LP64 + static jint _mxcsr_rz; +#endif // _LP64 static address _verify_mxcsr_entry; @@ -207,6 +210,9 @@ class x86 { public: static address addr_mxcsr_std() { return (address)&_mxcsr_std; } +#ifdef _LP64 + static address addr_mxcsr_rz() { return (address)&_mxcsr_rz; } +#endif // _LP64 static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } #ifdef _LP64 diff --git a/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp b/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp index b4d96473c37..4287580e7f9 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86_64.cpp @@ -32,6 +32,7 @@ // a description of how to extend it, see the stubRoutines.hpp file. jint StubRoutines::x86::_mxcsr_std = 0; +jint StubRoutines::x86::_mxcsr_rz = 0; address StubRoutines::x86::_get_previous_sp_entry = nullptr; diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 9ab2b8cedd0..0920190d679 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -238,7 +238,7 @@ const julong double_sign_mask = CONST64(0x7FFFFFFFFFFFFFFF); const julong double_infinity = CONST64(0x7FF0000000000000); #endif -#if !defined(X86) || !defined(TARGET_COMPILER_gcc) || defined(_WIN64) +#if !defined(X86) JRT_LEAF(jfloat, SharedRuntime::frem(jfloat x, jfloat y)) #ifdef _WIN64 // 64-bit Windows on amd64 returns the wrong values for @@ -270,7 +270,7 @@ JRT_LEAF(jdouble, SharedRuntime::drem(jdouble x, jdouble y)) return ((jdouble)fmod((double)x,(double)y)); #endif JRT_END -#endif // !X86 || !TARGET_COMPILER_gcc || _WIN64 +#endif // !X86 JRT_LEAF(jfloat, SharedRuntime::i2f(jint x)) return (jfloat)x;