8345169: Implement JEP 503: Remove the 32-bit x86 Port

Reviewed-by: ihse, mdoerr, vlivanov, kvn, coleenp, dholmes
This commit is contained in:
Aleksey Shipilev 2025-03-26 09:23:45 +00:00
parent eb6e8288c6
commit ee710fec21
25 changed files with 5 additions and 29729 deletions

View File

@ -75,10 +75,11 @@ AC_DEFUN_ONCE([BASIC_SETUP_PATHS],
AC_MSG_NOTICE([Rewriting ORIGINAL_PATH to $REWRITTEN_PATH])
fi
if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then
AC_MSG_ERROR([32-bit x86 builds are not supported])
fi
if test "x$OPENJDK_TARGET_OS" = "xwindows"; then
if test "x$OPENJDK_TARGET_CPU_BITS" = "x32"; then
AC_MSG_ERROR([32-bit Windows builds are not supported])
fi
BASIC_SETUP_PATHS_WINDOWS
fi

View File

@ -666,17 +666,7 @@ AC_DEFUN([PLATFORM_CHECK_DEPRECATION],
[
AC_ARG_ENABLE(deprecated-ports, [AS_HELP_STRING([--enable-deprecated-ports@<:@=yes/no@:>@],
[Suppress the error when configuring for a deprecated port @<:@no@:>@])])
# Unfortunately, variants have not been parsed yet, so we have to check the configure option
# directly. Allow only the directly specified Zero variant, treat any other mix as containing
# something non-Zero.
if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then
if test "x$enable_deprecated_ports" = "xyes"; then
AC_MSG_WARN([The 32-bit x86 port is deprecated and may be removed in a future release.])
else
AC_MSG_ERROR(m4_normalize([The 32-bit x86 port is deprecated and may be removed in a future release.
Use --enable-deprecated-ports=yes to suppress this error.]))
fi
fi
# There are no deprecated ports. Implement the deprecation warnings here.
])
AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_BUILD_OS_VERSION],

View File

@ -1,42 +0,0 @@
/*
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "prims/downcallLinker.hpp"
RuntimeStub* DowncallLinker::make_downcall_stub(BasicType* signature,
int num_args,
BasicType ret_bt,
const ABIDescriptor& abi,
const GrowableArray<VMStorage>& input_registers,
const GrowableArray<VMStorage>& output_registers,
bool needs_return_buffer,
int captured_state_mask,
bool needs_transition) {
Unimplemented();
return nullptr;
}
void DowncallLinker::StubGenerator::pd_add_offset_to_oop(VMStorage reg_oop, VMStorage reg_offset,
VMStorage tmp1, VMStorage tmp2) const {
Unimplemented();
}

View File

@ -1,54 +0,0 @@
/*
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "code/vmreg.hpp"
#include "prims/foreignGlobals.hpp"
#include "utilities/debug.hpp"
class MacroAssembler;
bool ForeignGlobals::is_foreign_linker_supported() {
return false;
}
const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) {
Unimplemented();
return {};
}
int RegSpiller::pd_reg_size(VMStorage reg) {
Unimplemented();
return -1;
}
void RegSpiller::pd_store_reg(MacroAssembler* masm, int offset, VMStorage reg) {
Unimplemented();
}
void RegSpiller::pd_load_reg(MacroAssembler* masm, int offset, VMStorage reg) {
Unimplemented();
}
void ArgumentShuffle::pd_generate(MacroAssembler* masm, VMStorage tmp, int in_stk_bias, int out_stk_bias) const {
Unimplemented();
}

View File

@ -1,71 +0,0 @@
//
// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
source_hpp %{
#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
#include "gc/shenandoah/c2/shenandoahSupport.hpp"
%}
instruct compareAndSwapP_shenandoah(rRegI res,
memory mem_ptr,
eRegP tmp1, eRegP tmp2,
eAXRegP oldval, eRegP newval,
eFlagsReg cr)
%{
match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval)));
match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval)));
effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval);
format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
ins_encode %{
ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
$res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
false, // swap
$tmp1$$Register, $tmp2$$Register
);
%}
ins_pipe( pipe_cmpxchg );
%}
instruct compareAndExchangeP_shenandoah(memory mem_ptr,
eAXRegP oldval, eRegP newval,
eRegP tmp1, eRegP tmp2,
eFlagsReg cr)
%{
match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval)));
effect(KILL cr, TEMP tmp1, TEMP tmp2);
ins_cost(1000);
format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
ins_encode %{
ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
true, // exchange
$tmp1$$Register, $tmp2$$Register
);
%}
ins_pipe( pipe_cmpxchg );
%}

View File

@ -1,145 +0,0 @@
/*
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "interpreter/interp_masm.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/method.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/icache.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/signature.hpp"
#define __ _masm->
// Implementation of SignatureHandlerGenerator
InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer) :
NativeSignatureIterator(method) {
_masm = new MacroAssembler(buffer);
}
void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
move(offset(), jni_offset() + 1);
}
void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
move(offset(), jni_offset() + 1);
}
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
move(offset(), jni_offset() + 2);
move(offset() + 1, jni_offset() + 1);
}
void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
box (offset(), jni_offset() + 1);
}
void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
__ movl(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset)));
__ movl(Address(to(), to_offset * wordSize), temp());
}
void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
__ lea(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset)));
__ cmpptr(Address(from(), Interpreter::local_offset_in_bytes(from_offset)), NULL_WORD); // do not use temp() to avoid AGI
Label L;
__ jcc(Assembler::notZero, L);
__ movptr(temp(), NULL_WORD);
__ bind(L);
__ movptr(Address(to(), to_offset * wordSize), temp());
}
void InterpreterRuntime::SignatureHandlerGenerator::generate( uint64_t fingerprint) {
// generate code to handle arguments
iterate(fingerprint);
// return result handler
__ lea(rax,
ExternalAddress((address)Interpreter::result_handler(method()->result_type())));
// return
__ ret(0);
__ flush();
}
Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rdi; }
Register InterpreterRuntime::SignatureHandlerGenerator::to() { return rsp; }
Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rcx; }
// Implementation of SignatureHandlerLibrary
void SignatureHandlerLibrary::pd_set_handler(address handler) {}
class SlowSignatureHandler: public NativeSignatureIterator {
private:
address _from;
intptr_t* _to;
virtual void pass_int() {
*_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
_from -= Interpreter::stackElementSize;
}
virtual void pass_float() {
*_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
_from -= Interpreter::stackElementSize;
}
virtual void pass_long() {
_to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
_to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
_to += 2;
_from -= 2*Interpreter::stackElementSize;
}
virtual void pass_object() {
// pass address of from
intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
*_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr;
_from -= Interpreter::stackElementSize;
}
public:
SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) :
NativeSignatureIterator(method) {
_from = from;
_to = to + (is_static() ? 2 : 1);
}
};
JRT_ENTRY(address, InterpreterRuntime::slow_signature_handler(JavaThread* current, Method* method, intptr_t* from, intptr_t* to))
methodHandle m(current, (Method*)method);
assert(m->is_native(), "sanity check");
// handle arguments
SlowSignatureHandler(m, (address)from, to + 1).iterate((uint64_t)CONST64(-1));
// return result handler
return Interpreter::result_handler(m->result_type());
JRT_END

View File

@ -1,323 +0,0 @@
/*
* Copyright (c) 2004, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "asm/macroAssembler.hpp"
#include "memory/resourceArea.hpp"
#include "prims/jniFastGetField.hpp"
#include "prims/jvm_misc.hpp"
#include "prims/jvmtiExport.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/safepoint.hpp"
#include "runtime/stubRoutines.hpp"
#define __ masm->
#define BUFFER_SIZE 30
// Instead of issuing lfence for LoadLoad barrier, we create data dependency
// between loads, which is much more efficient than lfence.
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
const char *name = nullptr;
switch (type) {
case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
case T_BYTE: name = "jni_fast_GetByteField"; break;
case T_CHAR: name = "jni_fast_GetCharField"; break;
case T_SHORT: name = "jni_fast_GetShortField"; break;
case T_INT: name = "jni_fast_GetIntField"; break;
default: ShouldNotReachHere();
}
ResourceMark rm;
BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
CodeBuffer cbuf(blob);
MacroAssembler* masm = new MacroAssembler(&cbuf);
address fast_entry = __ pc();
Label slow;
// stack layout: offset from rsp (in words):
// return pc 0
// jni env 1
// obj 2
// jfieldID 3
ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
__ mov32 (rcx, counter);
__ testb (rcx, 1);
__ jcc (Assembler::notZero, slow);
if (JvmtiExport::can_post_field_access()) {
// Check to see if a field access watch has been set before we
// take the fast path.
__ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
__ jcc(Assembler::notZero, slow);
}
__ mov(rax, rcx);
__ andptr(rax, 1); // rax, must end up 0
__ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize));
// obj, notice rax, is 0.
// rdx is data dependent on rcx.
__ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID
__ clear_jobject_tag(rdx);
__ movptr(rdx, Address(rdx, 0)); // *obj
__ shrptr (rax, 2); // offset
assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
speculative_load_pclist[count] = __ pc();
switch (type) {
case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break;
case T_BYTE: __ movsbl (rax, Address(rdx, rax, Address::times_1)); break;
case T_CHAR: __ movzwl (rax, Address(rdx, rax, Address::times_1)); break;
case T_SHORT: __ movswl (rax, Address(rdx, rax, Address::times_1)); break;
case T_INT: __ movl (rax, Address(rdx, rax, Address::times_1)); break;
default: ShouldNotReachHere();
}
Address ca1;
__ lea(rdx, counter);
__ xorptr(rdx, rax);
__ xorptr(rdx, rax);
__ cmp32(rcx, Address(rdx, 0));
// ca1 is the same as ca because
// rax, ^ counter_addr ^ rax, = address
// ca1 is data dependent on rax,.
__ jcc (Assembler::notEqual, slow);
__ ret (0);
slowcase_entry_pclist[count++] = __ pc();
__ bind (slow);
address slow_case_addr = nullptr;
switch (type) {
case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break;
case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break;
case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break;
case T_INT: slow_case_addr = jni_GetIntField_addr(); break;
default: ShouldNotReachHere();
}
// tail call
__ jump (RuntimeAddress(slow_case_addr));
__ flush ();
return fast_entry;
}
address JNI_FastGetField::generate_fast_get_boolean_field() {
return generate_fast_get_int_field0(T_BOOLEAN);
}
address JNI_FastGetField::generate_fast_get_byte_field() {
return generate_fast_get_int_field0(T_BYTE);
}
address JNI_FastGetField::generate_fast_get_char_field() {
return generate_fast_get_int_field0(T_CHAR);
}
address JNI_FastGetField::generate_fast_get_short_field() {
return generate_fast_get_int_field0(T_SHORT);
}
address JNI_FastGetField::generate_fast_get_int_field() {
return generate_fast_get_int_field0(T_INT);
}
address JNI_FastGetField::generate_fast_get_long_field() {
const char *name = "jni_fast_GetLongField";
ResourceMark rm;
BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
CodeBuffer cbuf(blob);
MacroAssembler* masm = new MacroAssembler(&cbuf);
address fast_entry = __ pc();
Label slow;
// stack layout: offset from rsp (in words):
// old rsi 0
// return pc 1
// jni env 2
// obj 3
// jfieldID 4
ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
__ push (rsi);
__ mov32 (rcx, counter);
__ testb (rcx, 1);
__ jcc (Assembler::notZero, slow);
if (JvmtiExport::can_post_field_access()) {
// Check to see if a field access watch has been set before we
// take the fast path.
__ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
__ jcc(Assembler::notZero, slow);
}
__ mov(rax, rcx);
__ andptr(rax, 1); // rax, must end up 0
__ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize));
// obj, notice rax, is 0.
// rdx is data dependent on rcx.
__ movptr(rsi, Address(rsp, 4*wordSize)); // jfieldID
__ clear_jobject_tag(rdx);
__ movptr(rdx, Address(rdx, 0)); // *obj
__ shrptr(rsi, 2); // offset
assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small");
speculative_load_pclist[count++] = __ pc();
__ movptr(rax, Address(rdx, rsi, Address::times_1));
speculative_load_pclist[count] = __ pc();
__ movl(rdx, Address(rdx, rsi, Address::times_1, 4));
__ lea(rsi, counter);
__ xorptr(rsi, rdx);
__ xorptr(rsi, rax);
__ xorptr(rsi, rdx);
__ xorptr(rsi, rax);
__ cmp32(rcx, Address(rsi, 0));
// ca1 is the same as ca because
// rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address
// ca1 is data dependent on both rax, and rdx.
__ jcc (Assembler::notEqual, slow);
__ pop (rsi);
__ ret (0);
slowcase_entry_pclist[count-1] = __ pc();
slowcase_entry_pclist[count++] = __ pc();
__ bind (slow);
__ pop (rsi);
address slow_case_addr = jni_GetLongField_addr();;
// tail call
__ jump (RuntimeAddress(slow_case_addr));
__ flush ();
return fast_entry;
}
address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
const char *name = nullptr;
switch (type) {
case T_FLOAT: name = "jni_fast_GetFloatField"; break;
case T_DOUBLE: name = "jni_fast_GetDoubleField"; break;
default: ShouldNotReachHere();
}
ResourceMark rm;
BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
CodeBuffer cbuf(blob);
MacroAssembler* masm = new MacroAssembler(&cbuf);
address fast_entry = __ pc();
Label slow_with_pop, slow;
// stack layout: offset from rsp (in words):
// return pc 0
// jni env 1
// obj 2
// jfieldID 3
ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
__ mov32 (rcx, counter);
__ testb (rcx, 1);
__ jcc (Assembler::notZero, slow);
if (JvmtiExport::can_post_field_access()) {
// Check to see if a field access watch has been set before we
// take the fast path.
__ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
__ jcc(Assembler::notZero, slow);
}
__ mov(rax, rcx);
__ andptr(rax, 1); // rax, must end up 0
__ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize));
// obj, notice rax, is 0.
// rdx is data dependent on rcx.
__ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID
__ clear_jobject_tag(rdx);
__ movptr(rdx, Address(rdx, 0)); // *obj
__ shrptr(rax, 2); // offset
assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
speculative_load_pclist[count] = __ pc();
switch (type) {
case T_FLOAT: __ fld_s (Address(rdx, rax, Address::times_1)); break;
case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break;
default: ShouldNotReachHere();
}
Address ca1;
__ fst_s (Address(rsp, -4));
__ lea(rdx, counter);
__ movl (rax, Address(rsp, -4));
// garbage hi-order bits on 64bit are harmless.
__ xorptr(rdx, rax);
__ xorptr(rdx, rax);
__ cmp32(rcx, Address(rdx, 0));
// rax, ^ counter_addr ^ rax, = address
// ca1 is data dependent on the field
// access.
__ jcc (Assembler::notEqual, slow_with_pop);
__ ret (0);
__ bind (slow_with_pop);
// invalid load. pop FPU stack.
__ fstp_d (0);
slowcase_entry_pclist[count++] = __ pc();
__ bind (slow);
address slow_case_addr = nullptr;
switch (type) {
case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break;
case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break;
default: ShouldNotReachHere();
}
// tail call
__ jump (RuntimeAddress(slow_case_addr));
__ flush ();
return fast_entry;
}
address JNI_FastGetField::generate_fast_get_float_field() {
return generate_fast_get_float_field0(T_FLOAT);
}
address JNI_FastGetField::generate_fast_get_double_field() {
return generate_fast_get_float_field0(T_DOUBLE);
}

View File

@ -1,52 +0,0 @@
/*
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "macroAssembler_x86.hpp"
ATTRIBUTE_ALIGNED(16) static const juint _ONES[] = {
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xbff00000UL
};
address MacroAssembler::ONES = (address)_ONES;
ATTRIBUTE_ALIGNED(16) static const juint _PI4_INV[] = {
0x6dc9c883UL, 0x3ff45f30UL
};
address MacroAssembler::PI4_INV = (address)_PI4_INV;
ATTRIBUTE_ALIGNED(16) static const juint _PI4X3[] = {
0x54443000UL, 0xbfe921fbUL, 0x3b39a000UL, 0x3d373dcbUL, 0xe0e68948UL,
0xba845c06UL
};
address MacroAssembler::PI4X3 = (address)_PI4X3;
ATTRIBUTE_ALIGNED(16) static const juint _PI4X4[] = {
0x54400000UL, 0xbfe921fbUL, 0x1a600000UL, 0xbdc0b461UL, 0x2e000000UL,
0xbb93198aUL, 0x252049c1UL, 0xb96b839aUL
};
address MacroAssembler::PI4X4 = (address)_PI4X4;
ATTRIBUTE_ALIGNED(16) static const juint _L_2IL0FLOATPACKET_0[] = {
0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL
};
address MacroAssembler::L_2IL0FLOATPACKET_0 = (address)_L_2IL0FLOATPACKET_0;

View File

@ -1,427 +0,0 @@
/*
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
/******************************************************************************/
// ALGORITHM DESCRIPTION - COS()
// ---------------------
//
// 1. RANGE REDUCTION
//
// We perform an initial range reduction from X to r with
//
// X =~= N * pi/32 + r
//
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
// where |N| <= 932560. Beyond this, the range reduction is
// insufficiently accurate. For extremely small inputs,
// denormalization can occur internally, impacting performance.
// This means that the main path is actually only taken for
// 2^-252 <= |X| < 90112.
//
// To avoid branches, we perform the range reduction to full
// accuracy each time.
//
// X - N * (P_1 + P_2 + P_3)
//
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
// is exact) and P_3 is a 53-bit number. Together, these
// approximate pi well enough for all cases in the restricted
// range.
//
// The main reduction sequence is:
//
// y = 32/pi * x
// N = integer(y)
// (computed by adding and subtracting off SHIFTER)
//
// m_1 = N * P_1
// m_2 = N * P_2
// r_1 = x - m_1
// r = r_1 - m_2
// (this r can be used for most of the calculation)
//
// c_1 = r_1 - r
// m_3 = N * P_3
// c_2 = c_1 - m_2
// c = c_2 - m_3
//
// 2. MAIN ALGORITHM
//
// The algorithm uses a table lookup based on B = M * pi / 32
// where M = N mod 64. The stored values are:
// sigma closest power of 2 to cos(B)
// C_hl 53-bit cos(B) - sigma
// S_hi + S_lo 2 * 53-bit sin(B)
//
// The computation is organized as follows:
//
// sin(B + r + c) = [sin(B) + sigma * r] +
// r * (cos(B) - sigma) +
// sin(B) * [cos(r + c) - 1] +
// cos(B) * [sin(r + c) - r]
//
// which is approximately:
//
// [S_hi + sigma * r] +
// C_hl * r +
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
// (C_hl + sigma) * [(sin(r) - r) + c]
//
// and this is what is actually computed. We separate this sum
// into four parts:
//
// hi + med + pols + corr
//
// where
//
// hi = S_hi + sigma r
// med = C_hl * r
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
//
// 3. POLYNOMIAL
//
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
// (sin(r) - r) can be rearranged freely, since it is quite
// small, so we exploit parallelism to the fullest.
//
// psc4 = SC_4 * r_1
// msc4 = psc4 * r
// r2 = r * r
// msc2 = SC_2 * r2
// r4 = r2 * r2
// psc3 = SC_3 + msc4
// psc1 = SC_1 + msc2
// msc3 = r4 * psc3
// sincospols = psc1 + msc3
// pols = sincospols *
// <S_hi * r^2 | (C_hl + sigma) * r^3>
//
// 4. CORRECTION TERM
//
// This is where the "c" component of the range reduction is
// taken into account; recall that just "r" is used for most of
// the calculation.
//
// -c = m_3 - c_2
// -d = S_hi * r - (C_hl + sigma)
// corr = -c * -d + S_lo
//
// 5. COMPENSATED SUMMATIONS
//
// The two successive compensated summations add up the high
// and medium parts, leaving just the low parts to add up at
// the end.
//
// rs = sigma * r
// res_int = S_hi + rs
// k_0 = S_hi - res_int
// k_2 = k_0 + rs
// med = C_hl * r
// res_hi = res_int + med
// k_1 = res_int - res_hi
// k_3 = k_1 + med
//
// 6. FINAL SUMMATION
//
// We now add up all the small parts:
//
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
//
// Now the overall result is just:
//
// res_hi + res_lo
//
// 7. SMALL ARGUMENTS
//
// Inputs with |X| < 2^-252 are treated specially as
// 1 - |x|.
//
// Special cases:
// cos(NaN) = quiet NaN, and raise invalid exception
// cos(INF) = NaN and raise invalid exception
// cos(0) = 1
//
/******************************************************************************/
// The 32 bit code is at most SSE2 compliant
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_cos[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// eax, ecx, edx, ebx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_cos = (address)_static_const_table_cos;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 56), tmp);
lea(tmp, ExternalAddress(static_const_table_cos));
movsd(xmm0, Address(rsp, 128));
pextrw(eax, xmm0, 3);
andl(eax, 32767);
subl(eax, 12336);
cmpl(eax, 4293);
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
movsd(xmm1, Address(tmp, 2160));
mulsd(xmm1, xmm0);
movdqu(xmm5, Address(tmp, 2240));
movsd(xmm4, Address(tmp, 2224));
pand(xmm4, xmm0);
por(xmm5, xmm4);
movsd(xmm3, Address(tmp, 2128));
movdqu(xmm2, Address(tmp, 2112));
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
andl(edx, 63);
movdqu(xmm5, Address(tmp, 2096));
lea(eax, Address(tmp, 0));
shll(edx, 5);
addl(eax, edx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, Address(tmp, 2144));
subsd(xmm4, xmm3);
movsd(xmm7, Address(eax, 8));
unpcklpd(xmm0, xmm0);
movapd(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, Address(tmp, 2064));
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(eax, 0));
subsd(xmm1, xmm3);
movsd(xmm3, Address(eax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, Address(tmp, 2080));
mulsd(xmm4, Address(eax, 0));
addpd(xmm6, Address(tmp, 2048));
mulpd(xmm5, xmm0);
movapd(xmm0, xmm3);
addsd(xmm3, Address(eax, 8));
mulpd(xmm1, xmm7);
movapd(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movsd(xmm5, Address(eax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(eax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm1, xmm6);
addsd(xmm4, xmm1);
movsd(Address(rsp, 0), xmm4);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_0_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movsd(xmm1, Address(tmp, 2192));
subsd(xmm1, xmm0);
movsd(Address(rsp, 0), xmm1);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2146435072);
cmpl(eax, 2146435072);
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
subl(rsp, 32);
movsd(Address(rsp, 0), xmm0);
lea(eax, Address(rsp, 40));
movl(Address(rsp, 8), eax);
movl(eax, 1);
movl(Address(rsp, 12), eax);
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
addl(rsp, 32);
fld_d(Address(rsp, 8));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_3_0_2);
fld_d(Address(rsp, 128));
fmul_d(Address(tmp, 2208));
bind(L_2TAG_PACKET_1_0_2);
movl(tmp, Address(rsp, 56));
}

View File

@ -1,329 +0,0 @@
/*
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
/******************************************************************************/
// ALGORITHM DESCRIPTION - EXP()
// ---------------------
//
// Description:
// Let K = 64 (table size).
// x x/log(2) n
// e = 2 = 2 * T[j] * (1 + P(y))
// where
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
// j/K
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
//
// P(y) is a minimax polynomial approximation of exp(x)-1
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
//
// To avoid problems with arithmetic overflow and underflow,
// n n1 n2
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
// where BIAS is a value of exponent bias.
//
// Special cases:
// exp(NaN) = NaN
// exp(+INF) = +INF
// exp(-INF) = 0
// exp(x) = 1 for subnormals
// for finite argument, only exp(0)=1 is exact
// For IEEE double
// if x > 709.782712893383973096 then exp(x) overflow
// if x < -745.133219101941108420 then exp(x) underflow
//
/******************************************************************************/
// The 32 bit code is at most SSE2 compliant
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table[] =
{
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
0x00100000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table = (address)_static_const_table;
subl(rsp, 120);
movl(Address(rsp, 64), tmp);
lea(tmp, ExternalAddress(static_const_table));
movsd(xmm0, Address(rsp, 128));
unpcklpd(xmm0, xmm0);
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_1_0_2);
fnstcw(Address(rsp, 24));
movzwl(edx, Address(rsp, 24));
orl(edx, 768);
movw(Address(rsp, 28), edx);
fldcw(Address(rsp, 28));
movl(edx, eax);
sarl(eax, 1);
subl(edx, eax);
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
pandn(xmm6, xmm2);
addl(eax, 1023);
movdl(xmm3, eax);
psllq(xmm3, 52);
por(xmm6, xmm3);
addl(edx, 1023);
movdl(xmm4, edx);
psllq(xmm4, 52);
movsd(Address(rsp, 8), xmm0);
fld_d(Address(rsp, 8));
movsd(Address(rsp, 16), xmm6);
fld_d(Address(rsp, 16));
fmula(1);
faddp(1);
movsd(Address(rsp, 8), xmm4);
fld_d(Address(rsp, 8));
fmulp(1);
fstp_d(Address(rsp, 8));
movsd(xmm0, Address(rsp, 8));
fldcw(Address(rsp, 24));
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_2_0_2);
cmpl(ecx, INT_MIN);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
cmpl(ecx, -1064950997);
jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
movl(edx, Address(rsp, 128));
cmpl(edx, -17155601);
jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
jmp(L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_3_0_2);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_4_0_2);
movl(edx, 15);
bind(L_2TAG_PACKET_5_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 128));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_7_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1216));
mulsd(xmm0, xmm0);
movl(edx, 15);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_8_0_2);
movl(edx, Address(rsp, 128));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 48), xmm0);
fld_d(Address(rsp, 48));
bind(L_2TAG_PACKET_6_0_2);
movl(tmp, Address(rsp, 64));
}

View File

@ -1,344 +0,0 @@
/*
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#include "utilities/globalDefinitions.hpp"
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG()
// ---------------------
//
// x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*2^7+0.5))/2^7
//
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
//
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log(NaN) = quiet NaN, and raise invalid exception
// log(+INF) = that INF
// log(0) = -INF with divide-by-zero exception raised
// log(1) = +0
// log(x) = NaN with invalid exception raised if x < -0, including -INF
//
/******************************************************************************/
// The 32 bit code is at most SSE2 compliant
//
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
0xffffe000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
Label L_2TAG_PACKET_10_0_2;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table = (address)_static_const_table_log;
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movapd(xmm1, xmm0);
movl(ecx, 32768);
movdl(xmm4, ecx);
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 16352);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx));
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
pshufd(xmm6, xmm0, 228);
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 3);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 2);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movapd(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 18416);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}

View File

@ -1,357 +0,0 @@
/*
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#include "runtime/stubRoutines.hpp"
#include "utilities/globalDefinitions.hpp"
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG10()
// ---------------------
//
// Let x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*LH*2^7+0.5))/2^7
// LH is a short approximation for log10(e)
//
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
//
// Result: k*log10(2) - log(B) + p(r)
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log10(0) = -INF with divide-by-zero exception raised
// log10(1) = +0
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
// log10(+INF) = +INF
//
/******************************************************************************/
// The 32 bit code is at most SSE2 compliant
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_log10 = (address)_static_const_table_log10;
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table_log10));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 9);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 8);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,41 +0,0 @@
/*
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "runtime/deoptimization.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/javaThread.hpp"
#include "runtime/stubRoutines.hpp"
// Implementation of the platform-specific part of StubRoutines - for
// a description of how to extend it, see the stubRoutines.hpp file.
jint StubRoutines::x86::_fpu_cntrl_wrd_std = 0;
jint StubRoutines::x86::_fpu_cntrl_wrd_24 = 0;
jint StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0;
jint StubRoutines::x86::_mxcsr_std = 0;
jint StubRoutines::x86::_fpu_subnormal_bias1[3] = { 0, 0, 0 };
jint StubRoutines::x86::_fpu_subnormal_bias2[3] = { 0, 0, 0 };

View File

@ -1,509 +0,0 @@
/*
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "asm/macroAssembler.hpp"
#include "compiler/disassembler.hpp"
#include "interpreter/interp_masm.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/templateInterpreterGenerator.hpp"
#include "runtime/arguments.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
address TemplateInterpreterGenerator::generate_slow_signature_handler() {
address entry = __ pc();
// rbx,: method
// rcx: temporary
// rdi: pointer to locals
// rsp: end of copied parameters area
__ mov(rcx, rsp);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), rbx, rdi, rcx);
__ ret(0);
return entry;
}
/**
* Method entry for static native methods:
* int java.util.zip.CRC32.update(int crc, int b)
*/
address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
assert(UseCRC32Intrinsics, "this intrinsic is not supported");
address entry = __ pc();
// rbx: Method*
// rsi: senderSP must preserved for slow path, set SP to it on fast path
// rdx: scratch
// rdi: scratch
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
__ get_thread(rdi);
__ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
// Load parameters
const Register crc = rax; // crc
const Register val = rdx; // source java byte value
const Register tbl = rdi; // scratch
// Arguments are reversed on java expression stack
__ movl(val, Address(rsp, wordSize)); // byte value
__ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
__ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
__ notl(crc); // ~crc
__ update_byte_crc32(crc, val, tbl);
__ notl(crc); // ~crc
// result in rax
// _areturn
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set sp to sender sp
__ jmp(rdi);
// generate a vanilla native entry as the slow path
__ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
return entry;
}
/**
* Method entry for static native methods:
* int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
* int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
*/
address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
assert(UseCRC32Intrinsics, "this intrinsic is not supported");
address entry = __ pc();
// rbx,: Method*
// rsi: senderSP must preserved for slow path, set SP to it on fast path
// rdx: scratch
// rdi: scratch
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
__ get_thread(rdi);
__ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
// Load parameters
const Register crc = rax; // crc
const Register buf = rdx; // source java byte array address
const Register len = rdi; // length
// value x86_32
// interp. arg ptr ESP + 4
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
// 3 2 1 0
// int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
// 4 2,3 1 0
// Arguments are reversed on java expression stack
__ movl(len, Address(rsp, 4 + 0)); // Length
// Calculate address of start element
if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
} else {
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
}
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
// result in rax
// _areturn
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set sp to sender sp
__ jmp(rdi);
// generate a vanilla native entry as the slow path
__ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
return entry;
}
/**
* Method entry for static native methods:
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
*/
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
assert(UseCRC32CIntrinsics, "this intrinsic is not supported");
address entry = __ pc();
// Load parameters
const Register crc = rax; // crc
const Register buf = rcx; // source java byte array address
const Register len = rdx; // length
const Register end = len;
// value x86_32
// interp. arg ptr ESP + 4
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end)
// 3 2 1 0
// int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end)
// 4 2,3 1 0
// Arguments are reversed on java expression stack
__ movl(end, Address(rsp, 4 + 0)); // end
__ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length
// Calculate address of start element
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
} else {
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
}
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
// result in rax
// _areturn
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set sp to sender sp
__ jmp(rdi);
return entry;
}
/**
* Method entry for static native method:
* java.lang.Float.intBitsToFloat(int bits)
*/
address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() {
if (UseSSE >= 1) {
address entry = __ pc();
// rsi: the sender's SP
// Skip safepoint check (compiler intrinsic versions of this method
// do not perform safepoint checks either).
// Load 'bits' into xmm0 (interpreter returns results in xmm0)
__ movflt(xmm0, Address(rsp, wordSize));
// Return
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi);
return entry;
}
return nullptr;
}
/**
* Method entry for static native method:
* java.lang.Float.floatToRawIntBits(float value)
*/
address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
if (UseSSE >= 1) {
address entry = __ pc();
// rsi: the sender's SP
// Skip safepoint check (compiler intrinsic versions of this method
// do not perform safepoint checks either).
// Load the parameter (a floating-point value) into rax.
__ movl(rax, Address(rsp, wordSize));
// Return
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi);
return entry;
}
return nullptr;
}
/**
* Method entry for static native method:
* java.lang.Double.longBitsToDouble(long bits)
*/
address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() {
if (UseSSE >= 2) {
address entry = __ pc();
// rsi: the sender's SP
// Skip safepoint check (compiler intrinsic versions of this method
// do not perform safepoint checks either).
// Load 'bits' into xmm0 (interpreter returns results in xmm0)
__ movdbl(xmm0, Address(rsp, wordSize));
// Return
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi);
return entry;
}
return nullptr;
}
/**
* Method entry for static native method:
* java.lang.Double.doubleToRawLongBits(double value)
*/
address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() {
if (UseSSE >= 2) {
address entry = __ pc();
// rsi: the sender's SP
// Skip safepoint check (compiler intrinsic versions of this method
// do not perform safepoint checks either).
// Load the parameter (a floating-point value) into rax.
__ movl(rdx, Address(rsp, 2*wordSize));
__ movl(rax, Address(rsp, wordSize));
// Return
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi);
return entry;
}
return nullptr;
}
/**
* Method entry for static method:
* java.lang.Float.float16ToFloat(short floatBinary16)
*/
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() {
assert(VM_Version::supports_float16(), "this intrinsic is not supported");
address entry = __ pc();
// rsi: the sender's SP
// Load value into xmm0 and convert
__ movswl(rax, Address(rsp, wordSize));
__ flt16_to_flt(xmm0, rax);
// Return
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi);
return entry;
}
/**
* Method entry for static method:
* java.lang.Float.floatToFloat16(float value)
*/
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() {
assert(VM_Version::supports_float16(), "this intrinsic is not supported");
address entry = __ pc();
// rsi: the sender's SP
// Load value into xmm0, convert and put result into rax
__ movflt(xmm0, Address(rsp, wordSize));
__ flt_to_flt16(rax, xmm0, xmm1);
// Return
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi);
return entry;
}
address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
// rbx,: Method*
// rcx: scratrch
// rsi: sender sp
address entry_point = __ pc();
// These don't need a safepoint check because they aren't virtually
// callable. We won't enter these intrinsics from compiled code.
// If in the future we added an intrinsic which was virtually callable
// we'd have to worry about how to safepoint so that this code is used.
// mathematical functions inlined by compiler
// (interpreter must provide identical implementation
// in order to avoid monotonicity bugs when switching
// from interpreter to compiler in the middle of some
// computation)
//
// stack: [ ret adr ] <-- rsp
// [ lo(arg) ]
// [ hi(arg) ]
//
if (kind == Interpreter::java_lang_math_tanh) {
return nullptr;
}
if (kind == Interpreter::java_lang_math_fmaD) {
if (!UseFMA) {
return nullptr; // Generate a vanilla entry
}
__ movdbl(xmm2, Address(rsp, 5 * wordSize));
__ movdbl(xmm1, Address(rsp, 3 * wordSize));
__ movdbl(xmm0, Address(rsp, 1 * wordSize));
__ fmad(xmm0, xmm1, xmm2, xmm0);
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set sp to sender sp
__ jmp(rdi);
return entry_point;
} else if (kind == Interpreter::java_lang_math_fmaF) {
if (!UseFMA) {
return nullptr; // Generate a vanilla entry
}
__ movflt(xmm2, Address(rsp, 3 * wordSize));
__ movflt(xmm1, Address(rsp, 2 * wordSize));
__ movflt(xmm0, Address(rsp, 1 * wordSize));
__ fmaf(xmm0, xmm1, xmm2, xmm0);
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set sp to sender sp
__ jmp(rdi);
return entry_point;
}
__ fld_d(Address(rsp, 1*wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dsin() != nullptr) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_cos :
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dcos() != nullptr) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_tan :
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dtan() != nullptr) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_sqrt:
__ fsqrt();
break;
case Interpreter::java_lang_math_abs:
__ fabs();
break;
case Interpreter::java_lang_math_log:
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dlog() != nullptr) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_log10:
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dlog10() != nullptr) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_pow:
__ fld_d(Address(rsp, 3*wordSize)); // second argument
__ subptr(rsp, 4 * wordSize);
__ fstp_d(Address(rsp, 0));
__ fstp_d(Address(rsp, 2 * wordSize));
if (StubRoutines::dpow() != nullptr) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
}
__ addptr(rsp, 4 * wordSize);
break;
case Interpreter::java_lang_math_exp:
__ subptr(rsp, 2*wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dexp() != nullptr) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
}
__ addptr(rsp, 2*wordSize);
break;
default :
ShouldNotReachHere();
}
// return double result in xmm0 for interpreter and compilers.
if (UseSSE >= 2) {
__ subptr(rsp, 2*wordSize);
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addptr(rsp, 2*wordSize);
}
// done, result in FPU ST(0) or XMM0
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set sp to sender sp
__ jmp(rdi);
return entry_point;
}
// Not supported
address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; }

View File

@ -1,33 +0,0 @@
/*
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "prims/upcallLinker.hpp"
address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
BasicType* out_sig_bt, int total_out_args,
BasicType ret_type,
jobject jabi, jobject jconv,
bool needs_return_buffer, int ret_buf_size) {
ShouldNotCallThis();
return nullptr;
}

View File

@ -1,265 +0,0 @@
/*
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "asm/macroAssembler.hpp"
#include "code/compiledIC.hpp"
#include "code/vtableStubs.hpp"
#include "interp_masm_x86.hpp"
#include "memory/resourceArea.hpp"
#include "oops/instanceKlass.hpp"
#include "oops/klassVtable.hpp"
#include "runtime/sharedRuntime.hpp"
#include "vmreg_x86.inline.hpp"
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
// machine-dependent part of VtableStubs: create VtableStub of correct size and
// initialize its code
#define __ masm->
#ifndef PRODUCT
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
#endif
// These stubs are used by the compiler only.
// Argument registers, which must be preserved:
// rcx - receiver (always first argument)
// rdx - second argument (if any)
// Other registers that might be usable:
// rax - inline cache register (is interface for itable stub)
// rbx - method (used when calling out to interpreter)
// Available now, but may become callee-save at some point:
// rsi, rdi
// Note that rax and rdx are also used for return values.
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(true);
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
// Can be null if there is no free space in the code cache.
if (s == nullptr) {
return nullptr;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
// No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
const int index_dependent_slop = 0;
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
#endif
// get receiver (need to skip return address on top of stack)
assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
// get receiver klass
address npe_addr = __ pc();
__ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
#ifndef PRODUCT
if (DebugVtables) {
Label L;
start_pc = __ pc();
// check offset vs vtable length
__ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
slop_delta = 10 - (__ pc() - start_pc); // cmpl varies in length, depending on data
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ jcc(Assembler::greater, L);
__ movl(rbx, vtable_index);
// VTABLE TODO: find upper bound for call_VM length.
start_pc = __ pc();
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx);
slop_delta = 500 - (__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
__ bind(L);
}
#endif // PRODUCT
const Register method = rbx;
// load Method* and target address
start_pc = __ pc();
__ lookup_virtual_method(rax, vtable_index, method);
slop_delta = 6 - (int)(__ pc() - start_pc);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
#ifndef PRODUCT
if (DebugVtables) {
Label L;
__ cmpptr(method, NULL_WORD);
__ jcc(Assembler::equal, L);
__ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD);
__ jcc(Assembler::notZero, L);
__ stop("Vtable entry is null");
__ bind(L);
}
#endif // PRODUCT
// rax: receiver klass
// method (rbx): Method*
// rcx: receiver
address ame_addr = __ pc();
__ jmp( Address(method, Method::from_compiled_offset()));
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
return s;
}
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
const int stub_code_length = code_size_limit(false);
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
// Can be null if there is no free space in the code cache.
if (s == nullptr) {
return nullptr;
}
// Count unused bytes in instruction sequences of variable size.
// We add them to the computed buffer size in order to avoid
// overflow in subsequently generated stubs.
address start_pc;
int slop_bytes = 0;
int slop_delta = 0;
const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32).
(itable_index < 32) ? 3 : 0; // index == 0 generates even shorter code.
ResourceMark rm;
CodeBuffer cb(s->entry_point(), stub_code_length);
MacroAssembler* masm = new MacroAssembler(&cb);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
}
#endif /* PRODUCT */
// Entry arguments:
// rax: CompiledICData
// rcx: Receiver
// Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi
// (If we need to make rsi, rdi callee-save, do a push/pop here.)
const Register recv_klass_reg = rsi;
const Register holder_klass_reg = rax; // declaring interface klass (DEFC)
const Register resolved_klass_reg = rdi; // resolved interface klass (REFC)
const Register temp_reg = rdx;
const Register method = rbx;
const Register icdata_reg = rax;
const Register receiver = rcx;
__ movptr(resolved_klass_reg, Address(icdata_reg, CompiledICData::itable_refc_klass_offset()));
__ movptr(holder_klass_reg, Address(icdata_reg, CompiledICData::itable_defc_klass_offset()));
Label L_no_such_interface;
// get receiver klass (also an implicit null-check)
assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
address npe_addr = __ pc();
__ load_klass(recv_klass_reg, rcx, noreg);
start_pc = __ pc();
__ push(rdx); // temp_reg
// Receiver subtype check against REFC.
// Get selected method from declaring class and itable index
__ lookup_interface_method_stub(recv_klass_reg, // input
holder_klass_reg, // input
resolved_klass_reg, // input
method, // output
temp_reg,
noreg,
receiver, // input (x86_32 only: to restore recv_klass value)
itable_index,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;
// We expect we need index_dependent_slop extra bytes. Reason:
// The emitted code in lookup_interface_method changes when itable_index exceeds 31.
// For windows, a narrow estimate was found to be 104. Other OSes not tested.
const ptrdiff_t estimate = 104;
const ptrdiff_t codesize = lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
// method (rbx): Method*
// rcx: receiver
#ifdef ASSERT
if (DebugVtables) {
Label L1;
__ cmpptr(method, NULL_WORD);
__ jcc(Assembler::equal, L1);
__ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD);
__ jcc(Assembler::notZero, L1);
__ stop("Method* is null");
__ bind(L1);
}
#endif // ASSERT
__ pop(rdx);
address ame_addr = __ pc();
__ jmp(Address(method, Method::from_compiled_offset()));
__ bind(L_no_such_interface);
// Handle IncompatibleClassChangeError in itable stubs.
// More detailed error message.
// We force resolving of the call site by jumping to the "handle
// wrong method" stub, and so let the interpreter runtime do all the
// dirty work.
__ pop(rdx);
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
masm->flush();
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
return s;
}
int VtableStub::pd_code_alignment() {
// x86 cache line size is 64 bytes, but we want to limit alignment loss.
const unsigned int icache_line_size = wordSize;
return icache_line_size;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,525 +0,0 @@
#
# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
#include "defs.S.inc"
# NOTE WELL! The _Copy functions are called directly
# from server-compiler-generated code via CallLeafNoFP,
# which means that they *must* either not use floating
# point or use it in the same manner as does the server
# compiler.
.text
# Set fpu to 53 bit precision. This happens too early to use a stub.
.p2align 4,,15
DECLARE_FUNC(fixcw):
pushl $0x27f
fldcw 0(%esp)
popl %eax
ret
.p2align 4,,15
DECLARE_FUNC(SpinPause):
rep
nop
movl $1, %eax
ret
# Support for void Copy::arrayof_conjoint_bytes(void* from,
# void* to,
# size_t count)
#
.p2align 4,,15
DECLARE_FUNC(_Copy_arrayof_conjoint_bytes):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -1(%esi,%ecx),%eax # from + count - 1
jbe acb_CopyRight
cmpl %eax,%edi
jbe acb_CopyLeft
# copy from low to high
acb_CopyRight:
cmpl $3,%ecx
jbe 5f
1: movl %ecx,%eax
shrl $2,%ecx
jz 4f
cmpl $32,%ecx
ja 3f
# copy aligned dwords
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
# copy aligned dwords
3: rep; smovl
4: movl %eax,%ecx
5: andl $3,%ecx
jz 7f
# copy suffix
xorl %eax,%eax
6: movb (%esi,%eax,1),%dl
movb %dl,(%edi,%eax,1)
addl $1,%eax
subl $1,%ecx
jnz 6b
7: popl %edi
popl %esi
ret
acb_CopyLeft:
std
leal -4(%edi,%ecx),%edi # to + count - 4
movl %eax,%esi # from + count - 1
movl %ecx,%eax
subl $3,%esi # from + count - 4
cmpl $3,%ecx
jbe 5f
1: shrl $2,%ecx
jz 4f
cmpl $32,%ecx
jbe 2f # <= 32 dwords
rep; smovl
jmp 4f
.space 8
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: movl %eax,%ecx
5: andl $3,%ecx
jz 7f
subl %esi,%edi
addl $3,%esi
6: movb (%esi),%dl
movb %dl,(%edi,%esi,1)
subl $1,%esi
subl $1,%ecx
jnz 6b
7: cld
popl %edi
popl %esi
ret
# Support for void Copy::conjoint_jshorts_atomic(void* from,
# void* to,
# size_t count)
.p2align 4,,15
DECLARE_FUNC(_Copy_conjoint_jshorts_atomic):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
jbe cs_CopyRight
cmpl %eax,%edi
jbe cs_CopyLeft
# copy from low to high
cs_CopyRight:
# align source address at dword address boundary
movl %esi,%eax # original from
andl $3,%eax # either 0 or 2
jz 1f # no prefix
# copy prefix
subl $1,%ecx
jl 5f # zero count
movw (%esi),%dx
movw %dx,(%edi)
addl %eax,%esi # %eax == 2
addl %eax,%edi
1: movl %ecx,%eax # word count less prefix
sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
jbe 2f # <= 32 dwords
# copy aligned dwords
rep; smovl
jmp 4f
# copy aligned dwords
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
movw (%esi),%dx
movw %dx,(%edi)
5: popl %edi
popl %esi
ret
# copy from high to low
cs_CopyLeft:
std
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
movl %eax,%esi # from + count*2 - 2
movl %ecx,%eax
subl $2,%esi # from + count*2 - 4
1: sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
ja 3f # > 32 dwords
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
3: rep; smovl
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
addl $2,%esi
addl $2,%edi
movw (%esi),%dx
movw %dx,(%edi)
5: cld
popl %edi
popl %esi
ret
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
# void* to,
# size_t count)
.p2align 4,,15
DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
jbe acs_CopyRight
cmpl %eax,%edi
jbe acs_CopyLeft
acs_CopyRight:
movl %ecx,%eax # word count
sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
jbe 2f # <= 32 dwords
# copy aligned dwords
rep; smovl
jmp 4f
# copy aligned dwords
.space 5
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
movw (%esi),%dx
movw %dx,(%edi)
5: popl %edi
popl %esi
ret
acs_CopyLeft:
std
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
movl %eax,%esi # from + count*2 - 2
movl %ecx,%eax
subl $2,%esi # from + count*2 - 4
sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
ja 3f # > 32 dwords
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
3: rep; smovl
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
addl $2,%esi
addl $2,%edi
movw (%esi),%dx
movw %dx,(%edi)
5: cld
popl %edi
popl %esi
ret
# Support for void Copy::conjoint_jints_atomic(void* from,
# void* to,
# size_t count)
# Equivalent to
# arrayof_conjoint_jints
.p2align 4,,15
DECLARE_FUNC(_Copy_conjoint_jints_atomic):
DECLARE_FUNC(_Copy_arrayof_conjoint_jints):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -4(%esi,%ecx,4),%eax # from + count*4 - 4
jbe ci_CopyRight
cmpl %eax,%edi
jbe ci_CopyLeft
ci_CopyRight:
cmpl $32,%ecx
jbe 2f # <= 32 dwords
rep; smovl
popl %edi
popl %esi
ret
.space 10
2: subl %esi,%edi
jmp 4f
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
4: subl $1,%ecx
jge 3b
popl %edi
popl %esi
ret
ci_CopyLeft:
std
leal -4(%edi,%ecx,4),%edi # to + count*4 - 4
cmpl $32,%ecx
ja 4f # > 32 dwords
subl %eax,%edi # eax == from + count*4 - 4
jmp 3f
.p2align 4,,15
2: movl (%eax),%edx
movl %edx,(%edi,%eax,1)
subl $4,%eax
3: subl $1,%ecx
jge 2b
cld
popl %edi
popl %esi
ret
4: movl %eax,%esi # from + count*4 - 4
rep; smovl
cld
popl %edi
popl %esi
ret
# Support for void Copy::conjoint_jlongs_atomic(jlong* from,
# jlong* to,
# size_t count)
#
# 32-bit
#
# count treated as signed
#
# // if (from > to) {
# while (--count >= 0) {
# *to++ = *from++;
# }
# } else {
# while (--count >= 0) {
# to[count] = from[count];
# }
# }
.p2align 4,,15
DECLARE_FUNC(_Copy_conjoint_jlongs_atomic):
movl 4+8(%esp),%ecx # count
movl 4+0(%esp),%eax # from
movl 4+4(%esp),%edx # to
cmpl %eax,%edx
jae cla_CopyLeft
cla_CopyRight:
subl %eax,%edx
jmp 2f
.p2align 4,,15
1: fildll (%eax)
fistpll (%edx,%eax,1)
addl $8,%eax
2: subl $1,%ecx
jge 1b
ret
.p2align 4,,15
3: fildll (%eax,%ecx,8)
fistpll (%edx,%ecx,8)
cla_CopyLeft:
subl $1,%ecx
jge 3b
ret
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
# void* to,
# size_t count)
.p2align 4,,15
DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts):
pushl %esi
movl 4+12(%esp),%ecx
pushl %edi
movl 8+ 4(%esp),%esi
movl 8+ 8(%esp),%edi
cmpl %esi,%edi
leal -2(%esi,%ecx,2),%eax
jbe mmx_acs_CopyRight
cmpl %eax,%edi
jbe mmx_acs_CopyLeft
mmx_acs_CopyRight:
movl %ecx,%eax
sarl %ecx
je 5f
cmpl $33,%ecx
jae 3f
1: subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 5f
3: smovl # align to 8 bytes, we know we are 4 byte aligned to start
subl $1,%ecx
4: .p2align 4,,15
movq 0(%esi),%mm0
addl $64,%edi
movq 8(%esi),%mm1
subl $16,%ecx
movq 16(%esi),%mm2
movq %mm0,-64(%edi)
movq 24(%esi),%mm0
movq %mm1,-56(%edi)
movq 32(%esi),%mm1
movq %mm2,-48(%edi)
movq 40(%esi),%mm2
movq %mm0,-40(%edi)
movq 48(%esi),%mm0
movq %mm1,-32(%edi)
movq 56(%esi),%mm1
movq %mm2,-24(%edi)
movq %mm0,-16(%edi)
addl $64,%esi
movq %mm1,-8(%edi)
cmpl $16,%ecx
jge 4b
emms
testl %ecx,%ecx
ja 1b
5: andl $1,%eax
je 7f
6: movw (%esi),%dx
movw %dx,(%edi)
7: popl %edi
popl %esi
ret
mmx_acs_CopyLeft:
std
leal -4(%edi,%ecx,2),%edi
movl %eax,%esi
movl %ecx,%eax
subl $2,%esi
sarl %ecx
je 4f
cmpl $32,%ecx
ja 3f
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
3: rep; smovl
4: andl $1,%eax
je 6f
addl $2,%esi
addl $2,%edi
5: movw (%esi),%dx
movw %dx,(%edi)
6: cld
popl %edi
popl %esi
ret
# Support for int64_t Atomic::cmpxchg(int64_t compare_value,
# volatile int64_t* dest,
# int64_t exchange_value)
#
.p2align 4,,15
DECLARE_FUNC(_Atomic_cmpxchg_long):
# 8(%esp) : return PC
pushl %ebx # 4(%esp) : old %ebx
pushl %edi # 0(%esp) : old %edi
movl 12(%esp), %ebx # 12(%esp) : exchange_value (low)
movl 16(%esp), %ecx # 16(%esp) : exchange_value (high)
movl 24(%esp), %eax # 24(%esp) : compare_value (low)
movl 28(%esp), %edx # 28(%esp) : compare_value (high)
movl 20(%esp), %edi # 20(%esp) : dest
lock
cmpxchg8b (%edi)
popl %edi
popl %ebx
ret
# Support for int64_t Atomic::load and Atomic::store.
# void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst)
.p2align 4,,15
DECLARE_FUNC(_Atomic_move_long):
movl 4(%esp), %eax # src
fildll (%eax)
movl 8(%esp), %eax # dest
fistpll (%eax)
ret

View File

@ -1,518 +0,0 @@
#
# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
#include "defs.S.inc"
# NOTE WELL! The _Copy functions are called directly
# from server-compiler-generated code via CallLeafNoFP,
# which means that they *must* either not use floating
# point or use it in the same manner as does the server
# compiler.
.text
.p2align 4,,15
DECLARE_FUNC(SpinPause):
rep
nop
movl $1, %eax
ret
# Support for void Copy::arrayof_conjoint_bytes(void* from,
# void* to,
# size_t count)
#
.p2align 4,,15
DECLARE_FUNC(_Copy_arrayof_conjoint_bytes):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -1(%esi,%ecx),%eax # from + count - 1
jbe acb_CopyRight
cmpl %eax,%edi
jbe acb_CopyLeft
# copy from low to high
acb_CopyRight:
cmpl $3,%ecx
jbe 5f
1: movl %ecx,%eax
shrl $2,%ecx
jz 4f
cmpl $32,%ecx
ja 3f
# copy aligned dwords
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
# copy aligned dwords
3: rep; smovl
4: movl %eax,%ecx
5: andl $3,%ecx
jz 7f
# copy suffix
xorl %eax,%eax
6: movb (%esi,%eax,1),%dl
movb %dl,(%edi,%eax,1)
addl $1,%eax
subl $1,%ecx
jnz 6b
7: popl %edi
popl %esi
ret
acb_CopyLeft:
std
leal -4(%edi,%ecx),%edi # to + count - 4
movl %eax,%esi # from + count - 1
movl %ecx,%eax
subl $3,%esi # from + count - 4
cmpl $3,%ecx
jbe 5f
1: shrl $2,%ecx
jz 4f
cmpl $32,%ecx
jbe 2f # <= 32 dwords
rep; smovl
jmp 4f
.space 8
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: movl %eax,%ecx
5: andl $3,%ecx
jz 7f
subl %esi,%edi
addl $3,%esi
6: movb (%esi),%dl
movb %dl,(%edi,%esi,1)
subl $1,%esi
subl $1,%ecx
jnz 6b
7: cld
popl %edi
popl %esi
ret
# Support for void Copy::conjoint_jshorts_atomic(void* from,
# void* to,
# size_t count)
.p2align 4,,15
DECLARE_FUNC(_Copy_conjoint_jshorts_atomic):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
jbe cs_CopyRight
cmpl %eax,%edi
jbe cs_CopyLeft
# copy from low to high
cs_CopyRight:
# align source address at dword address boundary
movl %esi,%eax # original from
andl $3,%eax # either 0 or 2
jz 1f # no prefix
# copy prefix
subl $1,%ecx
jl 5f # zero count
movw (%esi),%dx
movw %dx,(%edi)
addl %eax,%esi # %eax == 2
addl %eax,%edi
1: movl %ecx,%eax # word count less prefix
sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
jbe 2f # <= 32 dwords
# copy aligned dwords
rep; smovl
jmp 4f
# copy aligned dwords
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
movw (%esi),%dx
movw %dx,(%edi)
5: popl %edi
popl %esi
ret
# copy from high to low
cs_CopyLeft:
std
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
movl %eax,%esi # from + count*2 - 2
movl %ecx,%eax
subl $2,%esi # from + count*2 - 4
1: sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
ja 3f # > 32 dwords
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
3: rep; smovl
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
addl $2,%esi
addl $2,%edi
movw (%esi),%dx
movw %dx,(%edi)
5: cld
popl %edi
popl %esi
ret
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
# void* to,
# size_t count)
.p2align 4,,15
DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
jbe acs_CopyRight
cmpl %eax,%edi
jbe acs_CopyLeft
acs_CopyRight:
movl %ecx,%eax # word count
sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
jbe 2f # <= 32 dwords
# copy aligned dwords
rep; smovl
jmp 4f
# copy aligned dwords
.space 5
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
movw (%esi),%dx
movw %dx,(%edi)
5: popl %edi
popl %esi
ret
acs_CopyLeft:
std
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
movl %eax,%esi # from + count*2 - 2
movl %ecx,%eax
subl $2,%esi # from + count*2 - 4
sarl %ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
ja 3f # > 32 dwords
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
3: rep; smovl
4: andl $1,%eax # suffix count
jz 5f # no suffix
# copy suffix
addl $2,%esi
addl $2,%edi
movw (%esi),%dx
movw %dx,(%edi)
5: cld
popl %edi
popl %esi
ret
# Support for void Copy::conjoint_jints_atomic(void* from,
# void* to,
# size_t count)
# Equivalent to
# arrayof_conjoint_jints
.p2align 4,,15
DECLARE_FUNC(_Copy_conjoint_jints_atomic):
DECLARE_FUNC(_Copy_arrayof_conjoint_jints):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -4(%esi,%ecx,4),%eax # from + count*4 - 4
jbe ci_CopyRight
cmpl %eax,%edi
jbe ci_CopyLeft
ci_CopyRight:
cmpl $32,%ecx
jbe 2f # <= 32 dwords
rep; smovl
popl %edi
popl %esi
ret
.space 10
2: subl %esi,%edi
jmp 4f
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
4: subl $1,%ecx
jge 3b
popl %edi
popl %esi
ret
ci_CopyLeft:
std
leal -4(%edi,%ecx,4),%edi # to + count*4 - 4
cmpl $32,%ecx
ja 4f # > 32 dwords
subl %eax,%edi # eax == from + count*4 - 4
jmp 3f
.p2align 4,,15
2: movl (%eax),%edx
movl %edx,(%edi,%eax,1)
subl $4,%eax
3: subl $1,%ecx
jge 2b
cld
popl %edi
popl %esi
ret
4: movl %eax,%esi # from + count*4 - 4
rep; smovl
cld
popl %edi
popl %esi
ret
# Support for void Copy::conjoint_jlongs_atomic(jlong* from,
# jlong* to,
# size_t count)
#
# 32-bit
#
# count treated as signed
/*
#
# if (from > to) {
# while (--count >= 0) {
# *to++ = *from++;
# }
# } else {
# while (--count >= 0) {
# to[count] = from[count];
# }
# }
*/
.p2align 4,,15
DECLARE_FUNC(_Copy_conjoint_jlongs_atomic):
movl 4+8(%esp),%ecx # count
movl 4+0(%esp),%eax # from
movl 4+4(%esp),%edx # to
cmpl %eax,%edx
jae cla_CopyLeft
cla_CopyRight:
subl %eax,%edx
jmp 2f
.p2align 4,,15
1: fildll (%eax)
fistpll (%edx,%eax,1)
addl $8,%eax
2: subl $1,%ecx
jge 1b
ret
.p2align 4,,15
3: fildll (%eax,%ecx,8)
fistpll (%edx,%ecx,8)
cla_CopyLeft:
subl $1,%ecx
jge 3b
ret
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
# void* to,
# size_t count)
.p2align 4,,15
DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts):
pushl %esi
movl 4+12(%esp),%ecx
pushl %edi
movl 8+ 4(%esp),%esi
movl 8+ 8(%esp),%edi
cmpl %esi,%edi
leal -2(%esi,%ecx,2),%eax
jbe mmx_acs_CopyRight
cmpl %eax,%edi
jbe mmx_acs_CopyLeft
mmx_acs_CopyRight:
movl %ecx,%eax
sarl %ecx
je 5f
cmpl $33,%ecx
jae 3f
1: subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 5f
3: smovl # align to 8 bytes, we know we are 4 byte aligned to start
subl $1,%ecx
4: .p2align 4,,15
movq 0(%esi),%mm0
addl $64,%edi
movq 8(%esi),%mm1
subl $16,%ecx
movq 16(%esi),%mm2
movq %mm0,-64(%edi)
movq 24(%esi),%mm0
movq %mm1,-56(%edi)
movq 32(%esi),%mm1
movq %mm2,-48(%edi)
movq 40(%esi),%mm2
movq %mm0,-40(%edi)
movq 48(%esi),%mm0
movq %mm1,-32(%edi)
movq 56(%esi),%mm1
movq %mm2,-24(%edi)
movq %mm0,-16(%edi)
addl $64,%esi
movq %mm1,-8(%edi)
cmpl $16,%ecx
jge 4b
emms
testl %ecx,%ecx
ja 1b
5: andl $1,%eax
je 7f
6: movw (%esi),%dx
movw %dx,(%edi)
7: popl %edi
popl %esi
ret
mmx_acs_CopyLeft:
std
leal -4(%edi,%ecx,2),%edi
movl %eax,%esi
movl %ecx,%eax
subl $2,%esi
sarl %ecx
je 4f
cmpl $32,%ecx
ja 3f
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
3: rep; smovl
4: andl $1,%eax
je 6f
addl $2,%esi
addl $2,%edi
5: movw (%esi),%dx
movw %dx,(%edi)
6: cld
popl %edi
popl %esi
ret
# Support for jlong Atomic::cmpxchg(volatile jlong* dest,
# jlong compare_value,
# jlong exchange_value)
#
.p2align 4,,15
DECLARE_FUNC(_Atomic_cmpxchg_long):
# 8(%esp) : return PC
pushl %ebx # 4(%esp) : old %ebx
pushl %edi # 0(%esp) : old %edi
movl 12(%esp), %ebx # 12(%esp) : exchange_value (low)
movl 16(%esp), %ecx # 16(%esp) : exchange_value (high)
movl 24(%esp), %eax # 24(%esp) : compare_value (low)
movl 28(%esp), %edx # 28(%esp) : compare_value (high)
movl 20(%esp), %edi # 20(%esp) : dest
lock cmpxchg8b (%edi)
popl %edi
popl %ebx
ret
# Support for jlong Atomic::load and Atomic::store.
# void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)
.p2align 4,,15
DECLARE_FUNC(_Atomic_move_long):
movl 4(%esp), %eax # src
fildll (%eax)
movl 8(%esp), %eax # dest
fistpll (%eax)
ret

View File

@ -1,41 +0,0 @@
#
# Copyright (c) 2022 SAP SE. All rights reserved.
# Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
#include "defs.S.inc"
.text
# Support for int SafeFetch32(int* address, int defaultval);
#
# 8(%esp) : default value
# 4(%esp) : crash address
# 0(%esp) : return pc
DECLARE_FUNC(SafeFetch32_impl):
movl 4(%esp),%ecx # load address from stack
DECLARE_FUNC(_SafeFetch32_fault):
movl (%ecx), %eax # load target value, may fault
ret
DECLARE_FUNC(_SafeFetch32_continuation):
movl 8(%esp),%eax # load default value from stack
ret