mirror of
https://github.com/openjdk/jdk.git
synced 2026-01-28 03:58:21 +00:00
8345169: Implement JEP 503: Remove the 32-bit x86 Port
Reviewed-by: ihse, mdoerr, vlivanov, kvn, coleenp, dholmes
This commit is contained in:
parent
eb6e8288c6
commit
ee710fec21
@ -75,10 +75,11 @@ AC_DEFUN_ONCE([BASIC_SETUP_PATHS],
|
||||
AC_MSG_NOTICE([Rewriting ORIGINAL_PATH to $REWRITTEN_PATH])
|
||||
fi
|
||||
|
||||
if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then
|
||||
AC_MSG_ERROR([32-bit x86 builds are not supported])
|
||||
fi
|
||||
|
||||
if test "x$OPENJDK_TARGET_OS" = "xwindows"; then
|
||||
if test "x$OPENJDK_TARGET_CPU_BITS" = "x32"; then
|
||||
AC_MSG_ERROR([32-bit Windows builds are not supported])
|
||||
fi
|
||||
BASIC_SETUP_PATHS_WINDOWS
|
||||
fi
|
||||
|
||||
|
||||
@ -666,17 +666,7 @@ AC_DEFUN([PLATFORM_CHECK_DEPRECATION],
|
||||
[
|
||||
AC_ARG_ENABLE(deprecated-ports, [AS_HELP_STRING([--enable-deprecated-ports@<:@=yes/no@:>@],
|
||||
[Suppress the error when configuring for a deprecated port @<:@no@:>@])])
|
||||
# Unfortunately, variants have not been parsed yet, so we have to check the configure option
|
||||
# directly. Allow only the directly specified Zero variant, treat any other mix as containing
|
||||
# something non-Zero.
|
||||
if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then
|
||||
if test "x$enable_deprecated_ports" = "xyes"; then
|
||||
AC_MSG_WARN([The 32-bit x86 port is deprecated and may be removed in a future release.])
|
||||
else
|
||||
AC_MSG_ERROR(m4_normalize([The 32-bit x86 port is deprecated and may be removed in a future release.
|
||||
Use --enable-deprecated-ports=yes to suppress this error.]))
|
||||
fi
|
||||
fi
|
||||
# There are no deprecated ports. Implement the deprecation warnings here.
|
||||
])
|
||||
|
||||
AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_BUILD_OS_VERSION],
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#include "prims/downcallLinker.hpp"
|
||||
|
||||
RuntimeStub* DowncallLinker::make_downcall_stub(BasicType* signature,
|
||||
int num_args,
|
||||
BasicType ret_bt,
|
||||
const ABIDescriptor& abi,
|
||||
const GrowableArray<VMStorage>& input_registers,
|
||||
const GrowableArray<VMStorage>& output_registers,
|
||||
bool needs_return_buffer,
|
||||
int captured_state_mask,
|
||||
bool needs_transition) {
|
||||
Unimplemented();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void DowncallLinker::StubGenerator::pd_add_offset_to_oop(VMStorage reg_oop, VMStorage reg_offset,
|
||||
VMStorage tmp1, VMStorage tmp2) const {
|
||||
Unimplemented();
|
||||
}
|
||||
@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#include "code/vmreg.hpp"
|
||||
#include "prims/foreignGlobals.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
|
||||
class MacroAssembler;
|
||||
|
||||
bool ForeignGlobals::is_foreign_linker_supported() {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) {
|
||||
Unimplemented();
|
||||
return {};
|
||||
}
|
||||
|
||||
int RegSpiller::pd_reg_size(VMStorage reg) {
|
||||
Unimplemented();
|
||||
return -1;
|
||||
}
|
||||
|
||||
void RegSpiller::pd_store_reg(MacroAssembler* masm, int offset, VMStorage reg) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
void RegSpiller::pd_load_reg(MacroAssembler* masm, int offset, VMStorage reg) {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
void ArgumentShuffle::pd_generate(MacroAssembler* masm, VMStorage tmp, int in_stk_bias, int out_stk_bias) const {
|
||||
Unimplemented();
|
||||
}
|
||||
@ -1,71 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License version 2 only, as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
// version 2 for more details (a copy is included in the LICENSE file that
|
||||
// accompanied this code).
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License version
|
||||
// 2 along with this work; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
//
|
||||
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
// or visit www.oracle.com if you need additional information or have any
|
||||
// questions.
|
||||
//
|
||||
//
|
||||
|
||||
source_hpp %{
|
||||
#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
|
||||
#include "gc/shenandoah/c2/shenandoahSupport.hpp"
|
||||
%}
|
||||
|
||||
instruct compareAndSwapP_shenandoah(rRegI res,
|
||||
memory mem_ptr,
|
||||
eRegP tmp1, eRegP tmp2,
|
||||
eAXRegP oldval, eRegP newval,
|
||||
eFlagsReg cr)
|
||||
%{
|
||||
match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval)));
|
||||
match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval)));
|
||||
effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval);
|
||||
|
||||
format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
|
||||
|
||||
ins_encode %{
|
||||
ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
|
||||
$res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
|
||||
false, // swap
|
||||
$tmp1$$Register, $tmp2$$Register
|
||||
);
|
||||
%}
|
||||
ins_pipe( pipe_cmpxchg );
|
||||
%}
|
||||
|
||||
instruct compareAndExchangeP_shenandoah(memory mem_ptr,
|
||||
eAXRegP oldval, eRegP newval,
|
||||
eRegP tmp1, eRegP tmp2,
|
||||
eFlagsReg cr)
|
||||
%{
|
||||
match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval)));
|
||||
effect(KILL cr, TEMP tmp1, TEMP tmp2);
|
||||
ins_cost(1000);
|
||||
|
||||
format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
|
||||
|
||||
ins_encode %{
|
||||
ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
|
||||
noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
|
||||
true, // exchange
|
||||
$tmp1$$Register, $tmp2$$Register
|
||||
);
|
||||
%}
|
||||
ins_pipe( pipe_cmpxchg );
|
||||
%}
|
||||
@ -1,145 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "interpreter/interp_masm.hpp"
|
||||
#include "interpreter/interpreter.hpp"
|
||||
#include "interpreter/interpreterRuntime.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "oops/method.hpp"
|
||||
#include "oops/oop.inline.hpp"
|
||||
#include "runtime/handles.inline.hpp"
|
||||
#include "runtime/icache.hpp"
|
||||
#include "runtime/interfaceSupport.inline.hpp"
|
||||
#include "runtime/signature.hpp"
|
||||
|
||||
|
||||
#define __ _masm->
|
||||
|
||||
|
||||
// Implementation of SignatureHandlerGenerator
|
||||
InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer) :
|
||||
NativeSignatureIterator(method) {
|
||||
_masm = new MacroAssembler(buffer);
|
||||
}
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
|
||||
move(offset(), jni_offset() + 1);
|
||||
}
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
|
||||
move(offset(), jni_offset() + 1);
|
||||
}
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
|
||||
move(offset(), jni_offset() + 2);
|
||||
move(offset() + 1, jni_offset() + 1);
|
||||
}
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
|
||||
box (offset(), jni_offset() + 1);
|
||||
}
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
|
||||
__ movl(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset)));
|
||||
__ movl(Address(to(), to_offset * wordSize), temp());
|
||||
}
|
||||
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
|
||||
__ lea(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset)));
|
||||
__ cmpptr(Address(from(), Interpreter::local_offset_in_bytes(from_offset)), NULL_WORD); // do not use temp() to avoid AGI
|
||||
Label L;
|
||||
__ jcc(Assembler::notZero, L);
|
||||
__ movptr(temp(), NULL_WORD);
|
||||
__ bind(L);
|
||||
__ movptr(Address(to(), to_offset * wordSize), temp());
|
||||
}
|
||||
|
||||
|
||||
void InterpreterRuntime::SignatureHandlerGenerator::generate( uint64_t fingerprint) {
|
||||
// generate code to handle arguments
|
||||
iterate(fingerprint);
|
||||
// return result handler
|
||||
__ lea(rax,
|
||||
ExternalAddress((address)Interpreter::result_handler(method()->result_type())));
|
||||
// return
|
||||
__ ret(0);
|
||||
__ flush();
|
||||
}
|
||||
|
||||
|
||||
Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rdi; }
|
||||
Register InterpreterRuntime::SignatureHandlerGenerator::to() { return rsp; }
|
||||
Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rcx; }
|
||||
|
||||
|
||||
// Implementation of SignatureHandlerLibrary
|
||||
|
||||
void SignatureHandlerLibrary::pd_set_handler(address handler) {}
|
||||
|
||||
class SlowSignatureHandler: public NativeSignatureIterator {
|
||||
private:
|
||||
address _from;
|
||||
intptr_t* _to;
|
||||
|
||||
virtual void pass_int() {
|
||||
*_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
|
||||
_from -= Interpreter::stackElementSize;
|
||||
}
|
||||
|
||||
virtual void pass_float() {
|
||||
*_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
|
||||
_from -= Interpreter::stackElementSize;
|
||||
}
|
||||
|
||||
virtual void pass_long() {
|
||||
_to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
|
||||
_to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
|
||||
_to += 2;
|
||||
_from -= 2*Interpreter::stackElementSize;
|
||||
}
|
||||
|
||||
virtual void pass_object() {
|
||||
// pass address of from
|
||||
intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
|
||||
*_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr;
|
||||
_from -= Interpreter::stackElementSize;
|
||||
}
|
||||
|
||||
public:
|
||||
SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) :
|
||||
NativeSignatureIterator(method) {
|
||||
_from = from;
|
||||
_to = to + (is_static() ? 2 : 1);
|
||||
}
|
||||
};
|
||||
|
||||
JRT_ENTRY(address, InterpreterRuntime::slow_signature_handler(JavaThread* current, Method* method, intptr_t* from, intptr_t* to))
|
||||
methodHandle m(current, (Method*)method);
|
||||
assert(m->is_native(), "sanity check");
|
||||
// handle arguments
|
||||
SlowSignatureHandler(m, (address)from, to + 1).iterate((uint64_t)CONST64(-1));
|
||||
// return result handler
|
||||
return Interpreter::result_handler(m->result_type());
|
||||
JRT_END
|
||||
@ -1,323 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asm/macroAssembler.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "prims/jniFastGetField.hpp"
|
||||
#include "prims/jvm_misc.hpp"
|
||||
#include "prims/jvmtiExport.hpp"
|
||||
#include "runtime/os.inline.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
|
||||
#define __ masm->
|
||||
|
||||
#define BUFFER_SIZE 30
|
||||
|
||||
// Instead of issuing lfence for LoadLoad barrier, we create data dependency
|
||||
// between loads, which is much more efficient than lfence.
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
|
||||
const char *name = nullptr;
|
||||
switch (type) {
|
||||
case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
|
||||
case T_BYTE: name = "jni_fast_GetByteField"; break;
|
||||
case T_CHAR: name = "jni_fast_GetCharField"; break;
|
||||
case T_SHORT: name = "jni_fast_GetShortField"; break;
|
||||
case T_INT: name = "jni_fast_GetIntField"; break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
ResourceMark rm;
|
||||
BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
|
||||
CodeBuffer cbuf(blob);
|
||||
MacroAssembler* masm = new MacroAssembler(&cbuf);
|
||||
address fast_entry = __ pc();
|
||||
|
||||
Label slow;
|
||||
|
||||
// stack layout: offset from rsp (in words):
|
||||
// return pc 0
|
||||
// jni env 1
|
||||
// obj 2
|
||||
// jfieldID 3
|
||||
|
||||
ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
|
||||
__ mov32 (rcx, counter);
|
||||
__ testb (rcx, 1);
|
||||
__ jcc (Assembler::notZero, slow);
|
||||
|
||||
if (JvmtiExport::can_post_field_access()) {
|
||||
// Check to see if a field access watch has been set before we
|
||||
// take the fast path.
|
||||
__ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
|
||||
__ jcc(Assembler::notZero, slow);
|
||||
}
|
||||
|
||||
__ mov(rax, rcx);
|
||||
__ andptr(rax, 1); // rax, must end up 0
|
||||
__ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize));
|
||||
// obj, notice rax, is 0.
|
||||
// rdx is data dependent on rcx.
|
||||
__ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID
|
||||
|
||||
__ clear_jobject_tag(rdx);
|
||||
|
||||
__ movptr(rdx, Address(rdx, 0)); // *obj
|
||||
__ shrptr (rax, 2); // offset
|
||||
|
||||
assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
|
||||
speculative_load_pclist[count] = __ pc();
|
||||
switch (type) {
|
||||
case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break;
|
||||
case T_BYTE: __ movsbl (rax, Address(rdx, rax, Address::times_1)); break;
|
||||
case T_CHAR: __ movzwl (rax, Address(rdx, rax, Address::times_1)); break;
|
||||
case T_SHORT: __ movswl (rax, Address(rdx, rax, Address::times_1)); break;
|
||||
case T_INT: __ movl (rax, Address(rdx, rax, Address::times_1)); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
Address ca1;
|
||||
__ lea(rdx, counter);
|
||||
__ xorptr(rdx, rax);
|
||||
__ xorptr(rdx, rax);
|
||||
__ cmp32(rcx, Address(rdx, 0));
|
||||
// ca1 is the same as ca because
|
||||
// rax, ^ counter_addr ^ rax, = address
|
||||
// ca1 is data dependent on rax,.
|
||||
__ jcc (Assembler::notEqual, slow);
|
||||
|
||||
__ ret (0);
|
||||
|
||||
slowcase_entry_pclist[count++] = __ pc();
|
||||
__ bind (slow);
|
||||
address slow_case_addr = nullptr;
|
||||
switch (type) {
|
||||
case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
|
||||
case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break;
|
||||
case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break;
|
||||
case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break;
|
||||
case T_INT: slow_case_addr = jni_GetIntField_addr(); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
// tail call
|
||||
__ jump (RuntimeAddress(slow_case_addr));
|
||||
|
||||
__ flush ();
|
||||
|
||||
return fast_entry;
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_boolean_field() {
|
||||
return generate_fast_get_int_field0(T_BOOLEAN);
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_byte_field() {
|
||||
return generate_fast_get_int_field0(T_BYTE);
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_char_field() {
|
||||
return generate_fast_get_int_field0(T_CHAR);
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_short_field() {
|
||||
return generate_fast_get_int_field0(T_SHORT);
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_int_field() {
|
||||
return generate_fast_get_int_field0(T_INT);
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_long_field() {
|
||||
const char *name = "jni_fast_GetLongField";
|
||||
ResourceMark rm;
|
||||
BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
|
||||
CodeBuffer cbuf(blob);
|
||||
MacroAssembler* masm = new MacroAssembler(&cbuf);
|
||||
address fast_entry = __ pc();
|
||||
|
||||
Label slow;
|
||||
|
||||
// stack layout: offset from rsp (in words):
|
||||
// old rsi 0
|
||||
// return pc 1
|
||||
// jni env 2
|
||||
// obj 3
|
||||
// jfieldID 4
|
||||
|
||||
ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
|
||||
|
||||
__ push (rsi);
|
||||
__ mov32 (rcx, counter);
|
||||
__ testb (rcx, 1);
|
||||
__ jcc (Assembler::notZero, slow);
|
||||
|
||||
if (JvmtiExport::can_post_field_access()) {
|
||||
// Check to see if a field access watch has been set before we
|
||||
// take the fast path.
|
||||
__ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
|
||||
__ jcc(Assembler::notZero, slow);
|
||||
}
|
||||
|
||||
__ mov(rax, rcx);
|
||||
__ andptr(rax, 1); // rax, must end up 0
|
||||
__ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize));
|
||||
// obj, notice rax, is 0.
|
||||
// rdx is data dependent on rcx.
|
||||
__ movptr(rsi, Address(rsp, 4*wordSize)); // jfieldID
|
||||
|
||||
__ clear_jobject_tag(rdx);
|
||||
|
||||
__ movptr(rdx, Address(rdx, 0)); // *obj
|
||||
__ shrptr(rsi, 2); // offset
|
||||
|
||||
assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small");
|
||||
speculative_load_pclist[count++] = __ pc();
|
||||
__ movptr(rax, Address(rdx, rsi, Address::times_1));
|
||||
speculative_load_pclist[count] = __ pc();
|
||||
__ movl(rdx, Address(rdx, rsi, Address::times_1, 4));
|
||||
|
||||
__ lea(rsi, counter);
|
||||
__ xorptr(rsi, rdx);
|
||||
__ xorptr(rsi, rax);
|
||||
__ xorptr(rsi, rdx);
|
||||
__ xorptr(rsi, rax);
|
||||
__ cmp32(rcx, Address(rsi, 0));
|
||||
// ca1 is the same as ca because
|
||||
// rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address
|
||||
// ca1 is data dependent on both rax, and rdx.
|
||||
__ jcc (Assembler::notEqual, slow);
|
||||
|
||||
__ pop (rsi);
|
||||
|
||||
__ ret (0);
|
||||
|
||||
slowcase_entry_pclist[count-1] = __ pc();
|
||||
slowcase_entry_pclist[count++] = __ pc();
|
||||
__ bind (slow);
|
||||
__ pop (rsi);
|
||||
address slow_case_addr = jni_GetLongField_addr();;
|
||||
// tail call
|
||||
__ jump (RuntimeAddress(slow_case_addr));
|
||||
|
||||
__ flush ();
|
||||
|
||||
return fast_entry;
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
|
||||
const char *name = nullptr;
|
||||
switch (type) {
|
||||
case T_FLOAT: name = "jni_fast_GetFloatField"; break;
|
||||
case T_DOUBLE: name = "jni_fast_GetDoubleField"; break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
ResourceMark rm;
|
||||
BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
|
||||
CodeBuffer cbuf(blob);
|
||||
MacroAssembler* masm = new MacroAssembler(&cbuf);
|
||||
address fast_entry = __ pc();
|
||||
|
||||
Label slow_with_pop, slow;
|
||||
|
||||
// stack layout: offset from rsp (in words):
|
||||
// return pc 0
|
||||
// jni env 1
|
||||
// obj 2
|
||||
// jfieldID 3
|
||||
|
||||
ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
|
||||
|
||||
__ mov32 (rcx, counter);
|
||||
__ testb (rcx, 1);
|
||||
__ jcc (Assembler::notZero, slow);
|
||||
|
||||
if (JvmtiExport::can_post_field_access()) {
|
||||
// Check to see if a field access watch has been set before we
|
||||
// take the fast path.
|
||||
__ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
|
||||
__ jcc(Assembler::notZero, slow);
|
||||
}
|
||||
|
||||
__ mov(rax, rcx);
|
||||
__ andptr(rax, 1); // rax, must end up 0
|
||||
__ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize));
|
||||
// obj, notice rax, is 0.
|
||||
// rdx is data dependent on rcx.
|
||||
__ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID
|
||||
|
||||
__ clear_jobject_tag(rdx);
|
||||
|
||||
__ movptr(rdx, Address(rdx, 0)); // *obj
|
||||
__ shrptr(rax, 2); // offset
|
||||
|
||||
assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
|
||||
speculative_load_pclist[count] = __ pc();
|
||||
switch (type) {
|
||||
case T_FLOAT: __ fld_s (Address(rdx, rax, Address::times_1)); break;
|
||||
case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
Address ca1;
|
||||
__ fst_s (Address(rsp, -4));
|
||||
__ lea(rdx, counter);
|
||||
__ movl (rax, Address(rsp, -4));
|
||||
// garbage hi-order bits on 64bit are harmless.
|
||||
__ xorptr(rdx, rax);
|
||||
__ xorptr(rdx, rax);
|
||||
__ cmp32(rcx, Address(rdx, 0));
|
||||
// rax, ^ counter_addr ^ rax, = address
|
||||
// ca1 is data dependent on the field
|
||||
// access.
|
||||
__ jcc (Assembler::notEqual, slow_with_pop);
|
||||
|
||||
__ ret (0);
|
||||
|
||||
__ bind (slow_with_pop);
|
||||
// invalid load. pop FPU stack.
|
||||
__ fstp_d (0);
|
||||
|
||||
slowcase_entry_pclist[count++] = __ pc();
|
||||
__ bind (slow);
|
||||
address slow_case_addr = nullptr;
|
||||
switch (type) {
|
||||
case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break;
|
||||
case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
// tail call
|
||||
__ jump (RuntimeAddress(slow_case_addr));
|
||||
|
||||
__ flush ();
|
||||
|
||||
return fast_entry;
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_float_field() {
|
||||
return generate_fast_get_float_field0(T_FLOAT);
|
||||
}
|
||||
|
||||
address JNI_FastGetField::generate_fast_get_double_field() {
|
||||
return generate_fast_get_float_field0(T_DOUBLE);
|
||||
}
|
||||
@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _ONES[] = {
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xbff00000UL
|
||||
};
|
||||
address MacroAssembler::ONES = (address)_ONES;
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _PI4_INV[] = {
|
||||
0x6dc9c883UL, 0x3ff45f30UL
|
||||
};
|
||||
address MacroAssembler::PI4_INV = (address)_PI4_INV;
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _PI4X3[] = {
|
||||
0x54443000UL, 0xbfe921fbUL, 0x3b39a000UL, 0x3d373dcbUL, 0xe0e68948UL,
|
||||
0xba845c06UL
|
||||
};
|
||||
address MacroAssembler::PI4X3 = (address)_PI4X3;
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _PI4X4[] = {
|
||||
0x54400000UL, 0xbfe921fbUL, 0x1a600000UL, 0xbdc0b461UL, 0x2e000000UL,
|
||||
0xbb93198aUL, 0x252049c1UL, 0xb96b839aUL
|
||||
};
|
||||
address MacroAssembler::PI4X4 = (address)_PI4X4;
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _L_2IL0FLOATPACKET_0[] = {
|
||||
0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL
|
||||
};
|
||||
address MacroAssembler::L_2IL0FLOATPACKET_0 = (address)_L_2IL0FLOATPACKET_0;
|
||||
@ -1,427 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - COS()
|
||||
// ---------------------
|
||||
//
|
||||
// 1. RANGE REDUCTION
|
||||
//
|
||||
// We perform an initial range reduction from X to r with
|
||||
//
|
||||
// X =~= N * pi/32 + r
|
||||
//
|
||||
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
|
||||
// where |N| <= 932560. Beyond this, the range reduction is
|
||||
// insufficiently accurate. For extremely small inputs,
|
||||
// denormalization can occur internally, impacting performance.
|
||||
// This means that the main path is actually only taken for
|
||||
// 2^-252 <= |X| < 90112.
|
||||
//
|
||||
// To avoid branches, we perform the range reduction to full
|
||||
// accuracy each time.
|
||||
//
|
||||
// X - N * (P_1 + P_2 + P_3)
|
||||
//
|
||||
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
|
||||
// is exact) and P_3 is a 53-bit number. Together, these
|
||||
// approximate pi well enough for all cases in the restricted
|
||||
// range.
|
||||
//
|
||||
// The main reduction sequence is:
|
||||
//
|
||||
// y = 32/pi * x
|
||||
// N = integer(y)
|
||||
// (computed by adding and subtracting off SHIFTER)
|
||||
//
|
||||
// m_1 = N * P_1
|
||||
// m_2 = N * P_2
|
||||
// r_1 = x - m_1
|
||||
// r = r_1 - m_2
|
||||
// (this r can be used for most of the calculation)
|
||||
//
|
||||
// c_1 = r_1 - r
|
||||
// m_3 = N * P_3
|
||||
// c_2 = c_1 - m_2
|
||||
// c = c_2 - m_3
|
||||
//
|
||||
// 2. MAIN ALGORITHM
|
||||
//
|
||||
// The algorithm uses a table lookup based on B = M * pi / 32
|
||||
// where M = N mod 64. The stored values are:
|
||||
// sigma closest power of 2 to cos(B)
|
||||
// C_hl 53-bit cos(B) - sigma
|
||||
// S_hi + S_lo 2 * 53-bit sin(B)
|
||||
//
|
||||
// The computation is organized as follows:
|
||||
//
|
||||
// sin(B + r + c) = [sin(B) + sigma * r] +
|
||||
// r * (cos(B) - sigma) +
|
||||
// sin(B) * [cos(r + c) - 1] +
|
||||
// cos(B) * [sin(r + c) - r]
|
||||
//
|
||||
// which is approximately:
|
||||
//
|
||||
// [S_hi + sigma * r] +
|
||||
// C_hl * r +
|
||||
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
|
||||
// (C_hl + sigma) * [(sin(r) - r) + c]
|
||||
//
|
||||
// and this is what is actually computed. We separate this sum
|
||||
// into four parts:
|
||||
//
|
||||
// hi + med + pols + corr
|
||||
//
|
||||
// where
|
||||
//
|
||||
// hi = S_hi + sigma r
|
||||
// med = C_hl * r
|
||||
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
|
||||
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
|
||||
//
|
||||
// 3. POLYNOMIAL
|
||||
//
|
||||
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
|
||||
// (sin(r) - r) can be rearranged freely, since it is quite
|
||||
// small, so we exploit parallelism to the fullest.
|
||||
//
|
||||
// psc4 = SC_4 * r_1
|
||||
// msc4 = psc4 * r
|
||||
// r2 = r * r
|
||||
// msc2 = SC_2 * r2
|
||||
// r4 = r2 * r2
|
||||
// psc3 = SC_3 + msc4
|
||||
// psc1 = SC_1 + msc2
|
||||
// msc3 = r4 * psc3
|
||||
// sincospols = psc1 + msc3
|
||||
// pols = sincospols *
|
||||
// <S_hi * r^2 | (C_hl + sigma) * r^3>
|
||||
//
|
||||
// 4. CORRECTION TERM
|
||||
//
|
||||
// This is where the "c" component of the range reduction is
|
||||
// taken into account; recall that just "r" is used for most of
|
||||
// the calculation.
|
||||
//
|
||||
// -c = m_3 - c_2
|
||||
// -d = S_hi * r - (C_hl + sigma)
|
||||
// corr = -c * -d + S_lo
|
||||
//
|
||||
// 5. COMPENSATED SUMMATIONS
|
||||
//
|
||||
// The two successive compensated summations add up the high
|
||||
// and medium parts, leaving just the low parts to add up at
|
||||
// the end.
|
||||
//
|
||||
// rs = sigma * r
|
||||
// res_int = S_hi + rs
|
||||
// k_0 = S_hi - res_int
|
||||
// k_2 = k_0 + rs
|
||||
// med = C_hl * r
|
||||
// res_hi = res_int + med
|
||||
// k_1 = res_int - res_hi
|
||||
// k_3 = k_1 + med
|
||||
//
|
||||
// 6. FINAL SUMMATION
|
||||
//
|
||||
// We now add up all the small parts:
|
||||
//
|
||||
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
|
||||
//
|
||||
// Now the overall result is just:
|
||||
//
|
||||
// res_hi + res_lo
|
||||
//
|
||||
// 7. SMALL ARGUMENTS
|
||||
//
|
||||
// Inputs with |X| < 2^-252 are treated specially as
|
||||
// 1 - |x|.
|
||||
//
|
||||
// Special cases:
|
||||
// cos(NaN) = quiet NaN, and raise invalid exception
|
||||
// cos(INF) = NaN and raise invalid exception
|
||||
// cos(0) = 1
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_cos[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
|
||||
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
|
||||
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
|
||||
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
|
||||
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
|
||||
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
|
||||
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
|
||||
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
|
||||
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
|
||||
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
|
||||
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
|
||||
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
|
||||
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
|
||||
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
|
||||
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
|
||||
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
|
||||
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
|
||||
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
|
||||
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
|
||||
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
|
||||
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
|
||||
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
|
||||
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
|
||||
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
|
||||
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
|
||||
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
|
||||
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
|
||||
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
|
||||
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
|
||||
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
|
||||
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
|
||||
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
|
||||
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
|
||||
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
|
||||
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
|
||||
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
|
||||
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
|
||||
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
|
||||
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
|
||||
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
|
||||
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
|
||||
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
|
||||
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
|
||||
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
|
||||
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
|
||||
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
|
||||
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
|
||||
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
|
||||
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
|
||||
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
|
||||
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
|
||||
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
|
||||
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
|
||||
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
|
||||
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
|
||||
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
|
||||
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
|
||||
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
|
||||
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
|
||||
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
|
||||
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
|
||||
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
|
||||
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
|
||||
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
|
||||
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
|
||||
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
|
||||
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
|
||||
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
};
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// eax, ecx, edx, ebx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_cos = (address)_static_const_table_cos;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 56), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_cos));
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
subl(eax, 12336);
|
||||
cmpl(eax, 4293);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm1, Address(tmp, 2160));
|
||||
mulsd(xmm1, xmm0);
|
||||
movdqu(xmm5, Address(tmp, 2240));
|
||||
movsd(xmm4, Address(tmp, 2224));
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
movsd(xmm3, Address(tmp, 2128));
|
||||
movdqu(xmm2, Address(tmp, 2112));
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2sil(edx, xmm1);
|
||||
cvtsi2sdl(xmm1, edx);
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
addl(edx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
andl(edx, 63);
|
||||
movdqu(xmm5, Address(tmp, 2096));
|
||||
lea(eax, Address(tmp, 0));
|
||||
shll(edx, 5);
|
||||
addl(eax, edx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, Address(tmp, 2144));
|
||||
subsd(xmm4, xmm3);
|
||||
movsd(xmm7, Address(eax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movapd(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
movdqu(xmm6, Address(tmp, 2064));
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(eax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movsd(xmm3, Address(eax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, Address(tmp, 2080));
|
||||
mulsd(xmm4, Address(eax, 0));
|
||||
addpd(xmm6, Address(tmp, 2048));
|
||||
mulpd(xmm5, xmm0);
|
||||
movapd(xmm0, xmm3);
|
||||
addsd(xmm3, Address(eax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movapd(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movsd(xmm5, Address(eax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(eax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm5, xmm0);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm1, xmm3);
|
||||
addsd(xmm1, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
addsd(xmm1, xmm6);
|
||||
addsd(xmm4, xmm1);
|
||||
movsd(Address(rsp, 0), xmm4);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
movsd(xmm1, Address(tmp, 2192));
|
||||
subsd(xmm1, xmm0);
|
||||
movsd(Address(rsp, 0), xmm1);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2146435072);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
|
||||
subl(rsp, 32);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
lea(eax, Address(rsp, 40));
|
||||
movl(Address(rsp, 8), eax);
|
||||
movl(eax, 1);
|
||||
movl(Address(rsp, 12), eax);
|
||||
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
|
||||
addl(rsp, 32);
|
||||
fld_d(Address(rsp, 8));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
fld_d(Address(rsp, 128));
|
||||
fmul_d(Address(tmp, 2208));
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
movl(tmp, Address(rsp, 56));
|
||||
}
|
||||
@ -1,329 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
|
||||
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - EXP()
|
||||
// ---------------------
|
||||
//
|
||||
// Description:
|
||||
// Let K = 64 (table size).
|
||||
// x x/log(2) n
|
||||
// e = 2 = 2 * T[j] * (1 + P(y))
|
||||
// where
|
||||
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
|
||||
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
|
||||
// j/K
|
||||
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
|
||||
//
|
||||
// P(y) is a minimax polynomial approximation of exp(x)-1
|
||||
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
|
||||
//
|
||||
// To avoid problems with arithmetic overflow and underflow,
|
||||
// n n1 n2
|
||||
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
|
||||
// where BIAS is a value of exponent bias.
|
||||
//
|
||||
// Special cases:
|
||||
// exp(NaN) = NaN
|
||||
// exp(+INF) = +INF
|
||||
// exp(-INF) = 0
|
||||
// exp(x) = 1 for subnormals
|
||||
// for finite argument, only exp(0)=1 is exact
|
||||
// For IEEE double
|
||||
// if x > 709.782712893383973096 then exp(x) overflow
|
||||
// if x < -745.133219101941108420 then exp(x) underflow
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table[] =
|
||||
{
|
||||
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
|
||||
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
|
||||
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
|
||||
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
|
||||
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
|
||||
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
|
||||
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
|
||||
0x00100000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
address static_const_table = (address)_static_const_table;
|
||||
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 64), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
movl(edx, 16527);
|
||||
subl(edx, eax);
|
||||
subl(eax, 15504);
|
||||
orl(edx, eax);
|
||||
cmpl(edx, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
mulpd(xmm1, xmm0);
|
||||
addpd(xmm1, xmm6);
|
||||
movapd(xmm7, xmm1);
|
||||
subpd(xmm1, xmm6);
|
||||
mulpd(xmm2, xmm1);
|
||||
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||
mulpd(xmm3, xmm1);
|
||||
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
subpd(xmm0, xmm2);
|
||||
movdl(eax, xmm7);
|
||||
movl(ecx, eax);
|
||||
andl(ecx, 63);
|
||||
shll(ecx, 4);
|
||||
sarl(eax, 6);
|
||||
movl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
pand(xmm7, xmm6);
|
||||
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
paddq(xmm7, xmm6);
|
||||
psllq(xmm7, 46);
|
||||
subpd(xmm0, xmm3);
|
||||
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
|
||||
mulpd(xmm4, xmm0);
|
||||
movapd(xmm6, xmm0);
|
||||
movapd(xmm1, xmm0);
|
||||
mulpd(xmm6, xmm6);
|
||||
mulpd(xmm0, xmm6);
|
||||
addpd(xmm5, xmm4);
|
||||
mulsd(xmm0, xmm6);
|
||||
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||
addsd(xmm1, xmm2);
|
||||
unpckhpd(xmm2, xmm2);
|
||||
mulpd(xmm0, xmm5);
|
||||
addsd(xmm1, xmm0);
|
||||
por(xmm2, xmm7);
|
||||
unpckhpd(xmm0, xmm0);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
addl(edx, 894);
|
||||
cmpl(edx, 1916);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||
mulsd(xmm0, xmm2);
|
||||
addsd(xmm0, xmm2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
fnstcw(Address(rsp, 24));
|
||||
movzwl(edx, Address(rsp, 24));
|
||||
orl(edx, 768);
|
||||
movw(Address(rsp, 28), edx);
|
||||
fldcw(Address(rsp, 28));
|
||||
movl(edx, eax);
|
||||
sarl(eax, 1);
|
||||
subl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
|
||||
pandn(xmm6, xmm2);
|
||||
addl(eax, 1023);
|
||||
movdl(xmm3, eax);
|
||||
psllq(xmm3, 52);
|
||||
por(xmm6, xmm3);
|
||||
addl(edx, 1023);
|
||||
movdl(xmm4, edx);
|
||||
psllq(xmm4, 52);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
fld_d(Address(rsp, 8));
|
||||
movsd(Address(rsp, 16), xmm6);
|
||||
fld_d(Address(rsp, 16));
|
||||
fmula(1);
|
||||
faddp(1);
|
||||
movsd(Address(rsp, 8), xmm4);
|
||||
fld_d(Address(rsp, 8));
|
||||
fmulp(1);
|
||||
fstp_d(Address(rsp, 8));
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
fldcw(Address(rsp, 24));
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 32752);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
cmpl(ecx, INT_MIN);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, -1064950997);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(edx, -17155601);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
|
||||
jmp(L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, 15);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1216));
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 15);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2147483647);
|
||||
cmpl(eax, 1083179008);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 48), xmm0);
|
||||
fld_d(Address(rsp, 48));
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
movl(tmp, Address(rsp, 64));
|
||||
}
|
||||
@ -1,344 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
|
||||
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG()
|
||||
// ---------------------
|
||||
//
|
||||
// x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*2^7+0.5))/2^7
|
||||
//
|
||||
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log(NaN) = quiet NaN, and raise invalid exception
|
||||
// log(+INF) = that INF
|
||||
// log(0) = -INF with divide-by-zero exception raised
|
||||
// log(1) = +0
|
||||
// log(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
//
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
|
||||
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
|
||||
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
|
||||
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
|
||||
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
|
||||
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
|
||||
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
|
||||
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
|
||||
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
|
||||
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
|
||||
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
|
||||
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
|
||||
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
|
||||
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
|
||||
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
|
||||
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
|
||||
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
|
||||
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
|
||||
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
|
||||
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
|
||||
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
|
||||
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
|
||||
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
|
||||
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
|
||||
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
|
||||
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
|
||||
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
|
||||
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
|
||||
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
|
||||
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
|
||||
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
|
||||
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
|
||||
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
|
||||
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
|
||||
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
|
||||
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
|
||||
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
|
||||
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
|
||||
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
|
||||
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
|
||||
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
|
||||
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
|
||||
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
|
||||
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
|
||||
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
|
||||
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
|
||||
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
|
||||
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
|
||||
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
|
||||
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
|
||||
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
|
||||
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
|
||||
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
|
||||
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
|
||||
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
|
||||
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
|
||||
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
|
||||
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
|
||||
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
|
||||
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
|
||||
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
|
||||
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
|
||||
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
|
||||
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
|
||||
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
|
||||
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
|
||||
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
|
||||
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
|
||||
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
|
||||
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
|
||||
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
|
||||
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
|
||||
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
|
||||
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
|
||||
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
|
||||
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
|
||||
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
|
||||
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
|
||||
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
|
||||
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
|
||||
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
|
||||
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
|
||||
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
|
||||
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
|
||||
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
|
||||
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
|
||||
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
|
||||
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
|
||||
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
|
||||
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
|
||||
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
|
||||
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
|
||||
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
|
||||
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
|
||||
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
|
||||
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
|
||||
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
|
||||
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
|
||||
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
|
||||
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
|
||||
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
|
||||
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
|
||||
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
|
||||
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
|
||||
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
|
||||
0xffffe000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
|
||||
Label L_2TAG_PACKET_10_0_2;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
address static_const_table = (address)_static_const_table_log;
|
||||
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movapd(xmm1, xmm0);
|
||||
movl(ecx, 32768);
|
||||
movdl(xmm4, ecx);
|
||||
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
paddd(xmm0, xmm4);
|
||||
por(xmm1, xmm3);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
pand(xmm5, xmm1);
|
||||
pand(xmm0, xmm6);
|
||||
subsd(xmm1, xmm5);
|
||||
mulpd(xmm5, xmm0);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx));
|
||||
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
mulpd(xmm5, xmm5);
|
||||
pshufd(xmm6, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
subsd(xmm6, xmm0);
|
||||
mulsd(xmm4, xmm1);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
addsd(xmm1, xmm6);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 2);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movapd(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 18416);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
}
|
||||
@ -1,357 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG10()
|
||||
// ---------------------
|
||||
//
|
||||
// Let x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*LH*2^7+0.5))/2^7
|
||||
// LH is a short approximation for log10(e)
|
||||
//
|
||||
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log10(2) - log(B) + p(r)
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log10(0) = -INF with divide-by-zero exception raised
|
||||
// log10(1) = +0
|
||||
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
// log10(+INF) = +INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
|
||||
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
|
||||
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
|
||||
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
|
||||
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
|
||||
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
|
||||
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
|
||||
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
|
||||
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
|
||||
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
|
||||
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
|
||||
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
|
||||
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
|
||||
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
|
||||
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
|
||||
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
|
||||
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
|
||||
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
|
||||
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
|
||||
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
|
||||
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
|
||||
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
|
||||
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
|
||||
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
|
||||
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
|
||||
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
|
||||
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
|
||||
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
|
||||
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
|
||||
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
|
||||
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
|
||||
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
|
||||
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
|
||||
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
|
||||
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
|
||||
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
|
||||
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
|
||||
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
|
||||
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
|
||||
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
|
||||
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
|
||||
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
|
||||
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
|
||||
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
|
||||
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
|
||||
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
|
||||
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
|
||||
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
|
||||
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
|
||||
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
|
||||
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
|
||||
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
|
||||
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
|
||||
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
|
||||
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
|
||||
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
|
||||
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
|
||||
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
|
||||
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
|
||||
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
|
||||
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
|
||||
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
|
||||
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
|
||||
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
|
||||
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
|
||||
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
|
||||
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
|
||||
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
|
||||
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
|
||||
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
|
||||
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
|
||||
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
|
||||
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
|
||||
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
|
||||
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
|
||||
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
|
||||
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
|
||||
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
|
||||
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
|
||||
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
|
||||
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
|
||||
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
|
||||
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
|
||||
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
|
||||
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
|
||||
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
|
||||
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
|
||||
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
|
||||
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
|
||||
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
|
||||
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
|
||||
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
|
||||
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
|
||||
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
|
||||
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
|
||||
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
|
||||
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
|
||||
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
|
||||
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
|
||||
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
|
||||
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
|
||||
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
|
||||
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
|
||||
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
|
||||
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
|
||||
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
|
||||
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
};
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register eax, Register ecx, Register edx, Register tmp) {
|
||||
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_log10 = (address)_static_const_table_log10;
|
||||
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_log10));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movl(ecx, 1054736384);
|
||||
movdl(xmm7, ecx);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
movl(edx, 32768);
|
||||
movdl(xmm4, edx);
|
||||
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
mulss(xmm0, xmm7);
|
||||
por(xmm1, xmm3);
|
||||
andpd(xmm5, xmm1);
|
||||
paddd(xmm0, xmm4);
|
||||
subsd(xmm1, xmm5);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
andpd(xmm0, xmm6);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
|
||||
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
|
||||
mulpd(xmm5, xmm5);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
pshufd(xmm2, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
mulsd(xmm4, xmm1);
|
||||
subsd(xmm2, xmm0);
|
||||
mulsd(xmm6, xmm1);
|
||||
addsd(xmm1, xmm2);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addsd(xmm1, xmm6);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 9);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 8);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 18416);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/deoptimization.hpp"
|
||||
#include "runtime/frame.inline.hpp"
|
||||
#include "runtime/javaThread.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
|
||||
// Implementation of the platform-specific part of StubRoutines - for
|
||||
// a description of how to extend it, see the stubRoutines.hpp file.
|
||||
|
||||
jint StubRoutines::x86::_fpu_cntrl_wrd_std = 0;
|
||||
jint StubRoutines::x86::_fpu_cntrl_wrd_24 = 0;
|
||||
jint StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0;
|
||||
|
||||
jint StubRoutines::x86::_mxcsr_std = 0;
|
||||
|
||||
jint StubRoutines::x86::_fpu_subnormal_bias1[3] = { 0, 0, 0 };
|
||||
jint StubRoutines::x86::_fpu_subnormal_bias2[3] = { 0, 0, 0 };
|
||||
|
||||
@ -1,509 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asm/macroAssembler.hpp"
|
||||
#include "compiler/disassembler.hpp"
|
||||
#include "interpreter/interp_masm.hpp"
|
||||
#include "interpreter/interpreter.hpp"
|
||||
#include "interpreter/interpreterRuntime.hpp"
|
||||
#include "interpreter/templateInterpreterGenerator.hpp"
|
||||
#include "runtime/arguments.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
|
||||
#define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
|
||||
|
||||
|
||||
address TemplateInterpreterGenerator::generate_slow_signature_handler() {
|
||||
address entry = __ pc();
|
||||
// rbx,: method
|
||||
// rcx: temporary
|
||||
// rdi: pointer to locals
|
||||
// rsp: end of copied parameters area
|
||||
__ mov(rcx, rsp);
|
||||
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), rbx, rdi, rcx);
|
||||
__ ret(0);
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native methods:
|
||||
* int java.util.zip.CRC32.update(int crc, int b)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
|
||||
assert(UseCRC32Intrinsics, "this intrinsic is not supported");
|
||||
address entry = __ pc();
|
||||
|
||||
// rbx: Method*
|
||||
// rsi: senderSP must preserved for slow path, set SP to it on fast path
|
||||
// rdx: scratch
|
||||
// rdi: scratch
|
||||
|
||||
Label slow_path;
|
||||
// If we need a safepoint check, generate full interpreter entry.
|
||||
__ get_thread(rdi);
|
||||
__ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
|
||||
|
||||
// We don't generate local frame and don't align stack because
|
||||
// we call stub code and there is no safepoint on this path.
|
||||
|
||||
// Load parameters
|
||||
const Register crc = rax; // crc
|
||||
const Register val = rdx; // source java byte value
|
||||
const Register tbl = rdi; // scratch
|
||||
|
||||
// Arguments are reversed on java expression stack
|
||||
__ movl(val, Address(rsp, wordSize)); // byte value
|
||||
__ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
|
||||
|
||||
__ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
|
||||
__ notl(crc); // ~crc
|
||||
__ update_byte_crc32(crc, val, tbl);
|
||||
__ notl(crc); // ~crc
|
||||
// result in rax
|
||||
|
||||
// _areturn
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
// generate a vanilla native entry as the slow path
|
||||
__ bind(slow_path);
|
||||
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native methods:
|
||||
* int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
|
||||
* int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
||||
assert(UseCRC32Intrinsics, "this intrinsic is not supported");
|
||||
address entry = __ pc();
|
||||
|
||||
// rbx,: Method*
|
||||
// rsi: senderSP must preserved for slow path, set SP to it on fast path
|
||||
// rdx: scratch
|
||||
// rdi: scratch
|
||||
|
||||
Label slow_path;
|
||||
// If we need a safepoint check, generate full interpreter entry.
|
||||
__ get_thread(rdi);
|
||||
__ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
|
||||
|
||||
// We don't generate local frame and don't align stack because
|
||||
// we call stub code and there is no safepoint on this path.
|
||||
|
||||
// Load parameters
|
||||
const Register crc = rax; // crc
|
||||
const Register buf = rdx; // source java byte array address
|
||||
const Register len = rdi; // length
|
||||
|
||||
// value x86_32
|
||||
// interp. arg ptr ESP + 4
|
||||
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
|
||||
// 3 2 1 0
|
||||
// int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
|
||||
// 4 2,3 1 0
|
||||
|
||||
// Arguments are reversed on java expression stack
|
||||
__ movl(len, Address(rsp, 4 + 0)); // Length
|
||||
// Calculate address of start element
|
||||
if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
|
||||
} else {
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
|
||||
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
|
||||
}
|
||||
|
||||
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
|
||||
// result in rax
|
||||
|
||||
// _areturn
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
// generate a vanilla native entry as the slow path
|
||||
__ bind(slow_path);
|
||||
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native methods:
|
||||
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
|
||||
* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
||||
assert(UseCRC32CIntrinsics, "this intrinsic is not supported");
|
||||
address entry = __ pc();
|
||||
// Load parameters
|
||||
const Register crc = rax; // crc
|
||||
const Register buf = rcx; // source java byte array address
|
||||
const Register len = rdx; // length
|
||||
const Register end = len;
|
||||
|
||||
// value x86_32
|
||||
// interp. arg ptr ESP + 4
|
||||
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end)
|
||||
// 3 2 1 0
|
||||
// int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end)
|
||||
// 4 2,3 1 0
|
||||
|
||||
// Arguments are reversed on java expression stack
|
||||
__ movl(end, Address(rsp, 4 + 0)); // end
|
||||
__ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length
|
||||
// Calculate address of start element
|
||||
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
|
||||
} else {
|
||||
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
|
||||
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
|
||||
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
|
||||
__ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
|
||||
}
|
||||
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
|
||||
// result in rax
|
||||
// _areturn
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native method:
|
||||
* java.lang.Float.intBitsToFloat(int bits)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() {
|
||||
if (UseSSE >= 1) {
|
||||
address entry = __ pc();
|
||||
|
||||
// rsi: the sender's SP
|
||||
|
||||
// Skip safepoint check (compiler intrinsic versions of this method
|
||||
// do not perform safepoint checks either).
|
||||
|
||||
// Load 'bits' into xmm0 (interpreter returns results in xmm0)
|
||||
__ movflt(xmm0, Address(rsp, wordSize));
|
||||
|
||||
// Return
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set rsp to the sender's SP
|
||||
__ jmp(rdi);
|
||||
return entry;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native method:
|
||||
* java.lang.Float.floatToRawIntBits(float value)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
|
||||
if (UseSSE >= 1) {
|
||||
address entry = __ pc();
|
||||
|
||||
// rsi: the sender's SP
|
||||
|
||||
// Skip safepoint check (compiler intrinsic versions of this method
|
||||
// do not perform safepoint checks either).
|
||||
|
||||
// Load the parameter (a floating-point value) into rax.
|
||||
__ movl(rax, Address(rsp, wordSize));
|
||||
|
||||
// Return
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set rsp to the sender's SP
|
||||
__ jmp(rdi);
|
||||
return entry;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Method entry for static native method:
|
||||
* java.lang.Double.longBitsToDouble(long bits)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() {
|
||||
if (UseSSE >= 2) {
|
||||
address entry = __ pc();
|
||||
|
||||
// rsi: the sender's SP
|
||||
|
||||
// Skip safepoint check (compiler intrinsic versions of this method
|
||||
// do not perform safepoint checks either).
|
||||
|
||||
// Load 'bits' into xmm0 (interpreter returns results in xmm0)
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
|
||||
// Return
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set rsp to the sender's SP
|
||||
__ jmp(rdi);
|
||||
return entry;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static native method:
|
||||
* java.lang.Double.doubleToRawLongBits(double value)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() {
|
||||
if (UseSSE >= 2) {
|
||||
address entry = __ pc();
|
||||
|
||||
// rsi: the sender's SP
|
||||
|
||||
// Skip safepoint check (compiler intrinsic versions of this method
|
||||
// do not perform safepoint checks either).
|
||||
|
||||
// Load the parameter (a floating-point value) into rax.
|
||||
__ movl(rdx, Address(rsp, 2*wordSize));
|
||||
__ movl(rax, Address(rsp, wordSize));
|
||||
|
||||
// Return
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set rsp to the sender's SP
|
||||
__ jmp(rdi);
|
||||
return entry;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static method:
|
||||
* java.lang.Float.float16ToFloat(short floatBinary16)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() {
|
||||
assert(VM_Version::supports_float16(), "this intrinsic is not supported");
|
||||
address entry = __ pc();
|
||||
|
||||
// rsi: the sender's SP
|
||||
|
||||
// Load value into xmm0 and convert
|
||||
__ movswl(rax, Address(rsp, wordSize));
|
||||
__ flt16_to_flt(xmm0, rax);
|
||||
|
||||
// Return
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set rsp to the sender's SP
|
||||
__ jmp(rdi);
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method entry for static method:
|
||||
* java.lang.Float.floatToFloat16(float value)
|
||||
*/
|
||||
address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() {
|
||||
assert(VM_Version::supports_float16(), "this intrinsic is not supported");
|
||||
address entry = __ pc();
|
||||
|
||||
// rsi: the sender's SP
|
||||
|
||||
// Load value into xmm0, convert and put result into rax
|
||||
__ movflt(xmm0, Address(rsp, wordSize));
|
||||
__ flt_to_flt16(rax, xmm0, xmm1);
|
||||
|
||||
// Return
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set rsp to the sender's SP
|
||||
__ jmp(rdi);
|
||||
return entry;
|
||||
}
|
||||
|
||||
address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
|
||||
|
||||
// rbx,: Method*
|
||||
// rcx: scratrch
|
||||
// rsi: sender sp
|
||||
|
||||
address entry_point = __ pc();
|
||||
|
||||
// These don't need a safepoint check because they aren't virtually
|
||||
// callable. We won't enter these intrinsics from compiled code.
|
||||
// If in the future we added an intrinsic which was virtually callable
|
||||
// we'd have to worry about how to safepoint so that this code is used.
|
||||
|
||||
// mathematical functions inlined by compiler
|
||||
// (interpreter must provide identical implementation
|
||||
// in order to avoid monotonicity bugs when switching
|
||||
// from interpreter to compiler in the middle of some
|
||||
// computation)
|
||||
//
|
||||
// stack: [ ret adr ] <-- rsp
|
||||
// [ lo(arg) ]
|
||||
// [ hi(arg) ]
|
||||
//
|
||||
if (kind == Interpreter::java_lang_math_tanh) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (kind == Interpreter::java_lang_math_fmaD) {
|
||||
if (!UseFMA) {
|
||||
return nullptr; // Generate a vanilla entry
|
||||
}
|
||||
__ movdbl(xmm2, Address(rsp, 5 * wordSize));
|
||||
__ movdbl(xmm1, Address(rsp, 3 * wordSize));
|
||||
__ movdbl(xmm0, Address(rsp, 1 * wordSize));
|
||||
__ fmad(xmm0, xmm1, xmm2, xmm0);
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
return entry_point;
|
||||
} else if (kind == Interpreter::java_lang_math_fmaF) {
|
||||
if (!UseFMA) {
|
||||
return nullptr; // Generate a vanilla entry
|
||||
}
|
||||
__ movflt(xmm2, Address(rsp, 3 * wordSize));
|
||||
__ movflt(xmm1, Address(rsp, 2 * wordSize));
|
||||
__ movflt(xmm0, Address(rsp, 1 * wordSize));
|
||||
__ fmaf(xmm0, xmm1, xmm2, xmm0);
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
return entry_point;
|
||||
}
|
||||
|
||||
__ fld_d(Address(rsp, 1*wordSize));
|
||||
switch (kind) {
|
||||
case Interpreter::java_lang_math_sin :
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dsin() != nullptr) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
|
||||
} else {
|
||||
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_cos :
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dcos() != nullptr) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
|
||||
} else {
|
||||
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_tan :
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dtan() != nullptr) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
|
||||
} else {
|
||||
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_sqrt:
|
||||
__ fsqrt();
|
||||
break;
|
||||
case Interpreter::java_lang_math_abs:
|
||||
__ fabs();
|
||||
break;
|
||||
case Interpreter::java_lang_math_log:
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dlog() != nullptr) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
} else {
|
||||
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dlog10() != nullptr) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
|
||||
} else {
|
||||
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_pow:
|
||||
__ fld_d(Address(rsp, 3*wordSize)); // second argument
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ fstp_d(Address(rsp, 2 * wordSize));
|
||||
if (StubRoutines::dpow() != nullptr) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
|
||||
}
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_exp:
|
||||
__ subptr(rsp, 2*wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dexp() != nullptr) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
} else {
|
||||
__ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
|
||||
}
|
||||
__ addptr(rsp, 2*wordSize);
|
||||
break;
|
||||
default :
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
// return double result in xmm0 for interpreter and compilers.
|
||||
if (UseSSE >= 2) {
|
||||
__ subptr(rsp, 2*wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addptr(rsp, 2*wordSize);
|
||||
}
|
||||
|
||||
// done, result in FPU ST(0) or XMM0
|
||||
__ pop(rdi); // get return address
|
||||
__ mov(rsp, rsi); // set sp to sender sp
|
||||
__ jmp(rdi);
|
||||
|
||||
return entry_point;
|
||||
}
|
||||
|
||||
// Not supported
|
||||
address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; }
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#include "prims/upcallLinker.hpp"
|
||||
|
||||
address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
|
||||
BasicType* out_sig_bt, int total_out_args,
|
||||
BasicType ret_type,
|
||||
jobject jabi, jobject jconv,
|
||||
bool needs_return_buffer, int ret_buf_size) {
|
||||
ShouldNotCallThis();
|
||||
return nullptr;
|
||||
}
|
||||
@ -1,265 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "asm/macroAssembler.hpp"
|
||||
#include "code/compiledIC.hpp"
|
||||
#include "code/vtableStubs.hpp"
|
||||
#include "interp_masm_x86.hpp"
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "oops/instanceKlass.hpp"
|
||||
#include "oops/klassVtable.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
#include "vmreg_x86.inline.hpp"
|
||||
#ifdef COMPILER2
|
||||
#include "opto/runtime.hpp"
|
||||
#endif
|
||||
|
||||
// machine-dependent part of VtableStubs: create VtableStub of correct size and
|
||||
// initialize its code
|
||||
|
||||
#define __ masm->
|
||||
|
||||
#ifndef PRODUCT
|
||||
extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
|
||||
#endif
|
||||
|
||||
// These stubs are used by the compiler only.
|
||||
// Argument registers, which must be preserved:
|
||||
// rcx - receiver (always first argument)
|
||||
// rdx - second argument (if any)
|
||||
// Other registers that might be usable:
|
||||
// rax - inline cache register (is interface for itable stub)
|
||||
// rbx - method (used when calling out to interpreter)
|
||||
// Available now, but may become callee-save at some point:
|
||||
// rsi, rdi
|
||||
// Note that rax and rdx are also used for return values.
|
||||
|
||||
VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
|
||||
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
|
||||
const int stub_code_length = code_size_limit(true);
|
||||
VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
|
||||
// Can be null if there is no free space in the code cache.
|
||||
if (s == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Count unused bytes in instruction sequences of variable size.
|
||||
// We add them to the computed buffer size in order to avoid
|
||||
// overflow in subsequently generated stubs.
|
||||
address start_pc;
|
||||
int slop_bytes = 0;
|
||||
int slop_delta = 0;
|
||||
// No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
|
||||
const int index_dependent_slop = 0;
|
||||
|
||||
ResourceMark rm;
|
||||
CodeBuffer cb(s->entry_point(), stub_code_length);
|
||||
MacroAssembler* masm = new MacroAssembler(&cb);
|
||||
|
||||
#if (!defined(PRODUCT) && defined(COMPILER2))
|
||||
if (CountCompiledCalls) {
|
||||
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
|
||||
}
|
||||
#endif
|
||||
|
||||
// get receiver (need to skip return address on top of stack)
|
||||
assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
|
||||
|
||||
// get receiver klass
|
||||
address npe_addr = __ pc();
|
||||
__ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (DebugVtables) {
|
||||
Label L;
|
||||
start_pc = __ pc();
|
||||
// check offset vs vtable length
|
||||
__ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
|
||||
slop_delta = 10 - (__ pc() - start_pc); // cmpl varies in length, depending on data
|
||||
slop_bytes += slop_delta;
|
||||
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
|
||||
|
||||
__ jcc(Assembler::greater, L);
|
||||
__ movl(rbx, vtable_index);
|
||||
// VTABLE TODO: find upper bound for call_VM length.
|
||||
start_pc = __ pc();
|
||||
__ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx);
|
||||
slop_delta = 500 - (__ pc() - start_pc);
|
||||
slop_bytes += slop_delta;
|
||||
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
|
||||
__ bind(L);
|
||||
}
|
||||
#endif // PRODUCT
|
||||
|
||||
const Register method = rbx;
|
||||
|
||||
// load Method* and target address
|
||||
start_pc = __ pc();
|
||||
__ lookup_virtual_method(rax, vtable_index, method);
|
||||
slop_delta = 6 - (int)(__ pc() - start_pc);
|
||||
slop_bytes += slop_delta;
|
||||
assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (DebugVtables) {
|
||||
Label L;
|
||||
__ cmpptr(method, NULL_WORD);
|
||||
__ jcc(Assembler::equal, L);
|
||||
__ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD);
|
||||
__ jcc(Assembler::notZero, L);
|
||||
__ stop("Vtable entry is null");
|
||||
__ bind(L);
|
||||
}
|
||||
#endif // PRODUCT
|
||||
|
||||
// rax: receiver klass
|
||||
// method (rbx): Method*
|
||||
// rcx: receiver
|
||||
address ame_addr = __ pc();
|
||||
__ jmp( Address(method, Method::from_compiled_offset()));
|
||||
|
||||
masm->flush();
|
||||
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
|
||||
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
VtableStub* VtableStubs::create_itable_stub(int itable_index) {
|
||||
// Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
|
||||
const int stub_code_length = code_size_limit(false);
|
||||
VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
|
||||
// Can be null if there is no free space in the code cache.
|
||||
if (s == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
// Count unused bytes in instruction sequences of variable size.
|
||||
// We add them to the computed buffer size in order to avoid
|
||||
// overflow in subsequently generated stubs.
|
||||
address start_pc;
|
||||
int slop_bytes = 0;
|
||||
int slop_delta = 0;
|
||||
const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32).
|
||||
(itable_index < 32) ? 3 : 0; // index == 0 generates even shorter code.
|
||||
|
||||
ResourceMark rm;
|
||||
CodeBuffer cb(s->entry_point(), stub_code_length);
|
||||
MacroAssembler* masm = new MacroAssembler(&cb);
|
||||
|
||||
#if (!defined(PRODUCT) && defined(COMPILER2))
|
||||
if (CountCompiledCalls) {
|
||||
__ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
|
||||
}
|
||||
#endif /* PRODUCT */
|
||||
|
||||
// Entry arguments:
|
||||
// rax: CompiledICData
|
||||
// rcx: Receiver
|
||||
|
||||
// Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi
|
||||
// (If we need to make rsi, rdi callee-save, do a push/pop here.)
|
||||
const Register recv_klass_reg = rsi;
|
||||
const Register holder_klass_reg = rax; // declaring interface klass (DEFC)
|
||||
const Register resolved_klass_reg = rdi; // resolved interface klass (REFC)
|
||||
const Register temp_reg = rdx;
|
||||
const Register method = rbx;
|
||||
const Register icdata_reg = rax;
|
||||
const Register receiver = rcx;
|
||||
|
||||
__ movptr(resolved_klass_reg, Address(icdata_reg, CompiledICData::itable_refc_klass_offset()));
|
||||
__ movptr(holder_klass_reg, Address(icdata_reg, CompiledICData::itable_defc_klass_offset()));
|
||||
|
||||
Label L_no_such_interface;
|
||||
|
||||
// get receiver klass (also an implicit null-check)
|
||||
assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
|
||||
address npe_addr = __ pc();
|
||||
__ load_klass(recv_klass_reg, rcx, noreg);
|
||||
|
||||
start_pc = __ pc();
|
||||
__ push(rdx); // temp_reg
|
||||
|
||||
// Receiver subtype check against REFC.
|
||||
// Get selected method from declaring class and itable index
|
||||
__ lookup_interface_method_stub(recv_klass_reg, // input
|
||||
holder_klass_reg, // input
|
||||
resolved_klass_reg, // input
|
||||
method, // output
|
||||
temp_reg,
|
||||
noreg,
|
||||
receiver, // input (x86_32 only: to restore recv_klass value)
|
||||
itable_index,
|
||||
L_no_such_interface);
|
||||
const ptrdiff_t lookupSize = __ pc() - start_pc;
|
||||
|
||||
// We expect we need index_dependent_slop extra bytes. Reason:
|
||||
// The emitted code in lookup_interface_method changes when itable_index exceeds 31.
|
||||
// For windows, a narrow estimate was found to be 104. Other OSes not tested.
|
||||
const ptrdiff_t estimate = 104;
|
||||
const ptrdiff_t codesize = lookupSize + index_dependent_slop;
|
||||
slop_delta = (int)(estimate - codesize);
|
||||
slop_bytes += slop_delta;
|
||||
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
|
||||
|
||||
// method (rbx): Method*
|
||||
// rcx: receiver
|
||||
|
||||
#ifdef ASSERT
|
||||
if (DebugVtables) {
|
||||
Label L1;
|
||||
__ cmpptr(method, NULL_WORD);
|
||||
__ jcc(Assembler::equal, L1);
|
||||
__ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD);
|
||||
__ jcc(Assembler::notZero, L1);
|
||||
__ stop("Method* is null");
|
||||
__ bind(L1);
|
||||
}
|
||||
#endif // ASSERT
|
||||
|
||||
__ pop(rdx);
|
||||
address ame_addr = __ pc();
|
||||
__ jmp(Address(method, Method::from_compiled_offset()));
|
||||
|
||||
__ bind(L_no_such_interface);
|
||||
// Handle IncompatibleClassChangeError in itable stubs.
|
||||
// More detailed error message.
|
||||
// We force resolving of the call site by jumping to the "handle
|
||||
// wrong method" stub, and so let the interpreter runtime do all the
|
||||
// dirty work.
|
||||
__ pop(rdx);
|
||||
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
|
||||
|
||||
masm->flush();
|
||||
slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
|
||||
bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int VtableStub::pd_code_alignment() {
|
||||
// x86 cache line size is 64 bytes, but we want to limit alignment loss.
|
||||
const unsigned int icache_line_size = wordSize;
|
||||
return icache_line_size;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,525 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License version 2 only, as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# version 2 for more details (a copy is included in the LICENSE file that
|
||||
# accompanied this code).
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License version
|
||||
# 2 along with this work; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
# or visit www.oracle.com if you need additional information or have any
|
||||
# questions.
|
||||
#
|
||||
|
||||
#include "defs.S.inc"
|
||||
|
||||
# NOTE WELL! The _Copy functions are called directly
|
||||
# from server-compiler-generated code via CallLeafNoFP,
|
||||
# which means that they *must* either not use floating
|
||||
# point or use it in the same manner as does the server
|
||||
# compiler.
|
||||
|
||||
.text
|
||||
|
||||
# Set fpu to 53 bit precision. This happens too early to use a stub.
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(fixcw):
|
||||
pushl $0x27f
|
||||
fldcw 0(%esp)
|
||||
popl %eax
|
||||
ret
|
||||
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(SpinPause):
|
||||
rep
|
||||
nop
|
||||
movl $1, %eax
|
||||
ret
|
||||
|
||||
# Support for void Copy::arrayof_conjoint_bytes(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
#
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_arrayof_conjoint_bytes):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -1(%esi,%ecx),%eax # from + count - 1
|
||||
jbe acb_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe acb_CopyLeft
|
||||
# copy from low to high
|
||||
acb_CopyRight:
|
||||
cmpl $3,%ecx
|
||||
jbe 5f
|
||||
1: movl %ecx,%eax
|
||||
shrl $2,%ecx
|
||||
jz 4f
|
||||
cmpl $32,%ecx
|
||||
ja 3f
|
||||
# copy aligned dwords
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
# copy aligned dwords
|
||||
3: rep; smovl
|
||||
4: movl %eax,%ecx
|
||||
5: andl $3,%ecx
|
||||
jz 7f
|
||||
# copy suffix
|
||||
xorl %eax,%eax
|
||||
6: movb (%esi,%eax,1),%dl
|
||||
movb %dl,(%edi,%eax,1)
|
||||
addl $1,%eax
|
||||
subl $1,%ecx
|
||||
jnz 6b
|
||||
7: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
acb_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx),%edi # to + count - 4
|
||||
movl %eax,%esi # from + count - 1
|
||||
movl %ecx,%eax
|
||||
subl $3,%esi # from + count - 4
|
||||
cmpl $3,%ecx
|
||||
jbe 5f
|
||||
1: shrl $2,%ecx
|
||||
jz 4f
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
rep; smovl
|
||||
jmp 4f
|
||||
.space 8
|
||||
2: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 3b
|
||||
addl %esi,%edi
|
||||
4: movl %eax,%ecx
|
||||
5: andl $3,%ecx
|
||||
jz 7f
|
||||
subl %esi,%edi
|
||||
addl $3,%esi
|
||||
6: movb (%esi),%dl
|
||||
movb %dl,(%edi,%esi,1)
|
||||
subl $1,%esi
|
||||
subl $1,%ecx
|
||||
jnz 6b
|
||||
7: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::conjoint_jshorts_atomic(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_conjoint_jshorts_atomic):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
|
||||
jbe cs_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe cs_CopyLeft
|
||||
# copy from low to high
|
||||
cs_CopyRight:
|
||||
# align source address at dword address boundary
|
||||
movl %esi,%eax # original from
|
||||
andl $3,%eax # either 0 or 2
|
||||
jz 1f # no prefix
|
||||
# copy prefix
|
||||
subl $1,%ecx
|
||||
jl 5f # zero count
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
addl %eax,%esi # %eax == 2
|
||||
addl %eax,%edi
|
||||
1: movl %ecx,%eax # word count less prefix
|
||||
sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
# copy aligned dwords
|
||||
rep; smovl
|
||||
jmp 4f
|
||||
# copy aligned dwords
|
||||
2: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 3b
|
||||
addl %esi,%edi
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
# copy from high to low
|
||||
cs_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
|
||||
movl %eax,%esi # from + count*2 - 2
|
||||
movl %ecx,%eax
|
||||
subl $2,%esi # from + count*2 - 4
|
||||
1: sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
ja 3f # > 32 dwords
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
3: rep; smovl
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
addl $2,%esi
|
||||
addl $2,%edi
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
|
||||
jbe acs_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe acs_CopyLeft
|
||||
acs_CopyRight:
|
||||
movl %ecx,%eax # word count
|
||||
sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
# copy aligned dwords
|
||||
rep; smovl
|
||||
jmp 4f
|
||||
# copy aligned dwords
|
||||
.space 5
|
||||
2: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 3b
|
||||
addl %esi,%edi
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
acs_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
|
||||
movl %eax,%esi # from + count*2 - 2
|
||||
movl %ecx,%eax
|
||||
subl $2,%esi # from + count*2 - 4
|
||||
sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
ja 3f # > 32 dwords
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
3: rep; smovl
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
addl $2,%esi
|
||||
addl $2,%edi
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::conjoint_jints_atomic(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
# Equivalent to
|
||||
# arrayof_conjoint_jints
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_conjoint_jints_atomic):
|
||||
DECLARE_FUNC(_Copy_arrayof_conjoint_jints):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -4(%esi,%ecx,4),%eax # from + count*4 - 4
|
||||
jbe ci_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe ci_CopyLeft
|
||||
ci_CopyRight:
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
rep; smovl
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
.space 10
|
||||
2: subl %esi,%edi
|
||||
jmp 4f
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
4: subl $1,%ecx
|
||||
jge 3b
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
ci_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,4),%edi # to + count*4 - 4
|
||||
cmpl $32,%ecx
|
||||
ja 4f # > 32 dwords
|
||||
subl %eax,%edi # eax == from + count*4 - 4
|
||||
jmp 3f
|
||||
.p2align 4,,15
|
||||
2: movl (%eax),%edx
|
||||
movl %edx,(%edi,%eax,1)
|
||||
subl $4,%eax
|
||||
3: subl $1,%ecx
|
||||
jge 2b
|
||||
cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
4: movl %eax,%esi # from + count*4 - 4
|
||||
rep; smovl
|
||||
cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::conjoint_jlongs_atomic(jlong* from,
|
||||
# jlong* to,
|
||||
# size_t count)
|
||||
#
|
||||
# 32-bit
|
||||
#
|
||||
# count treated as signed
|
||||
#
|
||||
# // if (from > to) {
|
||||
# while (--count >= 0) {
|
||||
# *to++ = *from++;
|
||||
# }
|
||||
# } else {
|
||||
# while (--count >= 0) {
|
||||
# to[count] = from[count];
|
||||
# }
|
||||
# }
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_conjoint_jlongs_atomic):
|
||||
movl 4+8(%esp),%ecx # count
|
||||
movl 4+0(%esp),%eax # from
|
||||
movl 4+4(%esp),%edx # to
|
||||
cmpl %eax,%edx
|
||||
jae cla_CopyLeft
|
||||
cla_CopyRight:
|
||||
subl %eax,%edx
|
||||
jmp 2f
|
||||
.p2align 4,,15
|
||||
1: fildll (%eax)
|
||||
fistpll (%edx,%eax,1)
|
||||
addl $8,%eax
|
||||
2: subl $1,%ecx
|
||||
jge 1b
|
||||
ret
|
||||
.p2align 4,,15
|
||||
3: fildll (%eax,%ecx,8)
|
||||
fistpll (%edx,%ecx,8)
|
||||
cla_CopyLeft:
|
||||
subl $1,%ecx
|
||||
jge 3b
|
||||
ret
|
||||
|
||||
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi
|
||||
movl 8+ 8(%esp),%edi
|
||||
cmpl %esi,%edi
|
||||
leal -2(%esi,%ecx,2),%eax
|
||||
jbe mmx_acs_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe mmx_acs_CopyLeft
|
||||
mmx_acs_CopyRight:
|
||||
movl %ecx,%eax
|
||||
sarl %ecx
|
||||
je 5f
|
||||
cmpl $33,%ecx
|
||||
jae 3f
|
||||
1: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 5f
|
||||
3: smovl # align to 8 bytes, we know we are 4 byte aligned to start
|
||||
subl $1,%ecx
|
||||
4: .p2align 4,,15
|
||||
movq 0(%esi),%mm0
|
||||
addl $64,%edi
|
||||
movq 8(%esi),%mm1
|
||||
subl $16,%ecx
|
||||
movq 16(%esi),%mm2
|
||||
movq %mm0,-64(%edi)
|
||||
movq 24(%esi),%mm0
|
||||
movq %mm1,-56(%edi)
|
||||
movq 32(%esi),%mm1
|
||||
movq %mm2,-48(%edi)
|
||||
movq 40(%esi),%mm2
|
||||
movq %mm0,-40(%edi)
|
||||
movq 48(%esi),%mm0
|
||||
movq %mm1,-32(%edi)
|
||||
movq 56(%esi),%mm1
|
||||
movq %mm2,-24(%edi)
|
||||
movq %mm0,-16(%edi)
|
||||
addl $64,%esi
|
||||
movq %mm1,-8(%edi)
|
||||
cmpl $16,%ecx
|
||||
jge 4b
|
||||
emms
|
||||
testl %ecx,%ecx
|
||||
ja 1b
|
||||
5: andl $1,%eax
|
||||
je 7f
|
||||
6: movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
7: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
mmx_acs_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,2),%edi
|
||||
movl %eax,%esi
|
||||
movl %ecx,%eax
|
||||
subl $2,%esi
|
||||
sarl %ecx
|
||||
je 4f
|
||||
cmpl $32,%ecx
|
||||
ja 3f
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
3: rep; smovl
|
||||
4: andl $1,%eax
|
||||
je 6f
|
||||
addl $2,%esi
|
||||
addl $2,%edi
|
||||
5: movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
6: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
|
||||
# Support for int64_t Atomic::cmpxchg(int64_t compare_value,
|
||||
# volatile int64_t* dest,
|
||||
# int64_t exchange_value)
|
||||
#
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Atomic_cmpxchg_long):
|
||||
# 8(%esp) : return PC
|
||||
pushl %ebx # 4(%esp) : old %ebx
|
||||
pushl %edi # 0(%esp) : old %edi
|
||||
movl 12(%esp), %ebx # 12(%esp) : exchange_value (low)
|
||||
movl 16(%esp), %ecx # 16(%esp) : exchange_value (high)
|
||||
movl 24(%esp), %eax # 24(%esp) : compare_value (low)
|
||||
movl 28(%esp), %edx # 28(%esp) : compare_value (high)
|
||||
movl 20(%esp), %edi # 20(%esp) : dest
|
||||
lock
|
||||
cmpxchg8b (%edi)
|
||||
popl %edi
|
||||
popl %ebx
|
||||
ret
|
||||
|
||||
|
||||
# Support for int64_t Atomic::load and Atomic::store.
|
||||
# void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Atomic_move_long):
|
||||
movl 4(%esp), %eax # src
|
||||
fildll (%eax)
|
||||
movl 8(%esp), %eax # dest
|
||||
fistpll (%eax)
|
||||
ret
|
||||
@ -1,518 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License version 2 only, as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# version 2 for more details (a copy is included in the LICENSE file that
|
||||
# accompanied this code).
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License version
|
||||
# 2 along with this work; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
# or visit www.oracle.com if you need additional information or have any
|
||||
# questions.
|
||||
#
|
||||
|
||||
#include "defs.S.inc"
|
||||
|
||||
# NOTE WELL! The _Copy functions are called directly
|
||||
# from server-compiler-generated code via CallLeafNoFP,
|
||||
# which means that they *must* either not use floating
|
||||
# point or use it in the same manner as does the server
|
||||
# compiler.
|
||||
|
||||
.text
|
||||
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(SpinPause):
|
||||
rep
|
||||
nop
|
||||
movl $1, %eax
|
||||
ret
|
||||
|
||||
# Support for void Copy::arrayof_conjoint_bytes(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
#
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_arrayof_conjoint_bytes):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -1(%esi,%ecx),%eax # from + count - 1
|
||||
jbe acb_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe acb_CopyLeft
|
||||
# copy from low to high
|
||||
acb_CopyRight:
|
||||
cmpl $3,%ecx
|
||||
jbe 5f
|
||||
1: movl %ecx,%eax
|
||||
shrl $2,%ecx
|
||||
jz 4f
|
||||
cmpl $32,%ecx
|
||||
ja 3f
|
||||
# copy aligned dwords
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
# copy aligned dwords
|
||||
3: rep; smovl
|
||||
4: movl %eax,%ecx
|
||||
5: andl $3,%ecx
|
||||
jz 7f
|
||||
# copy suffix
|
||||
xorl %eax,%eax
|
||||
6: movb (%esi,%eax,1),%dl
|
||||
movb %dl,(%edi,%eax,1)
|
||||
addl $1,%eax
|
||||
subl $1,%ecx
|
||||
jnz 6b
|
||||
7: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
acb_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx),%edi # to + count - 4
|
||||
movl %eax,%esi # from + count - 1
|
||||
movl %ecx,%eax
|
||||
subl $3,%esi # from + count - 4
|
||||
cmpl $3,%ecx
|
||||
jbe 5f
|
||||
1: shrl $2,%ecx
|
||||
jz 4f
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
rep; smovl
|
||||
jmp 4f
|
||||
.space 8
|
||||
2: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 3b
|
||||
addl %esi,%edi
|
||||
4: movl %eax,%ecx
|
||||
5: andl $3,%ecx
|
||||
jz 7f
|
||||
subl %esi,%edi
|
||||
addl $3,%esi
|
||||
6: movb (%esi),%dl
|
||||
movb %dl,(%edi,%esi,1)
|
||||
subl $1,%esi
|
||||
subl $1,%ecx
|
||||
jnz 6b
|
||||
7: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::conjoint_jshorts_atomic(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_conjoint_jshorts_atomic):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
|
||||
jbe cs_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe cs_CopyLeft
|
||||
# copy from low to high
|
||||
cs_CopyRight:
|
||||
# align source address at dword address boundary
|
||||
movl %esi,%eax # original from
|
||||
andl $3,%eax # either 0 or 2
|
||||
jz 1f # no prefix
|
||||
# copy prefix
|
||||
subl $1,%ecx
|
||||
jl 5f # zero count
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
addl %eax,%esi # %eax == 2
|
||||
addl %eax,%edi
|
||||
1: movl %ecx,%eax # word count less prefix
|
||||
sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
# copy aligned dwords
|
||||
rep; smovl
|
||||
jmp 4f
|
||||
# copy aligned dwords
|
||||
2: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 3b
|
||||
addl %esi,%edi
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
# copy from high to low
|
||||
cs_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
|
||||
movl %eax,%esi # from + count*2 - 2
|
||||
movl %ecx,%eax
|
||||
subl $2,%esi # from + count*2 - 4
|
||||
1: sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
ja 3f # > 32 dwords
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
3: rep; smovl
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
addl $2,%esi
|
||||
addl $2,%edi
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -2(%esi,%ecx,2),%eax # from + count*2 - 2
|
||||
jbe acs_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe acs_CopyLeft
|
||||
acs_CopyRight:
|
||||
movl %ecx,%eax # word count
|
||||
sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
# copy aligned dwords
|
||||
rep; smovl
|
||||
jmp 4f
|
||||
# copy aligned dwords
|
||||
.space 5
|
||||
2: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 3b
|
||||
addl %esi,%edi
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
acs_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,2),%edi # to + count*2 - 4
|
||||
movl %eax,%esi # from + count*2 - 2
|
||||
movl %ecx,%eax
|
||||
subl $2,%esi # from + count*2 - 4
|
||||
sarl %ecx # dword count
|
||||
jz 4f # no dwords to move
|
||||
cmpl $32,%ecx
|
||||
ja 3f # > 32 dwords
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
3: rep; smovl
|
||||
4: andl $1,%eax # suffix count
|
||||
jz 5f # no suffix
|
||||
# copy suffix
|
||||
addl $2,%esi
|
||||
addl $2,%edi
|
||||
movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
5: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::conjoint_jints_atomic(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
# Equivalent to
|
||||
# arrayof_conjoint_jints
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_conjoint_jints_atomic):
|
||||
DECLARE_FUNC(_Copy_arrayof_conjoint_jints):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx # count
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi # from
|
||||
movl 8+ 8(%esp),%edi # to
|
||||
cmpl %esi,%edi
|
||||
leal -4(%esi,%ecx,4),%eax # from + count*4 - 4
|
||||
jbe ci_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe ci_CopyLeft
|
||||
ci_CopyRight:
|
||||
cmpl $32,%ecx
|
||||
jbe 2f # <= 32 dwords
|
||||
rep; smovl
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
.space 10
|
||||
2: subl %esi,%edi
|
||||
jmp 4f
|
||||
.p2align 4,,15
|
||||
3: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
4: subl $1,%ecx
|
||||
jge 3b
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
ci_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,4),%edi # to + count*4 - 4
|
||||
cmpl $32,%ecx
|
||||
ja 4f # > 32 dwords
|
||||
subl %eax,%edi # eax == from + count*4 - 4
|
||||
jmp 3f
|
||||
.p2align 4,,15
|
||||
2: movl (%eax),%edx
|
||||
movl %edx,(%edi,%eax,1)
|
||||
subl $4,%eax
|
||||
3: subl $1,%ecx
|
||||
jge 2b
|
||||
cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
4: movl %eax,%esi # from + count*4 - 4
|
||||
rep; smovl
|
||||
cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
# Support for void Copy::conjoint_jlongs_atomic(jlong* from,
|
||||
# jlong* to,
|
||||
# size_t count)
|
||||
#
|
||||
# 32-bit
|
||||
#
|
||||
# count treated as signed
|
||||
/*
|
||||
#
|
||||
# if (from > to) {
|
||||
# while (--count >= 0) {
|
||||
# *to++ = *from++;
|
||||
# }
|
||||
# } else {
|
||||
# while (--count >= 0) {
|
||||
# to[count] = from[count];
|
||||
# }
|
||||
# }
|
||||
*/
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Copy_conjoint_jlongs_atomic):
|
||||
movl 4+8(%esp),%ecx # count
|
||||
movl 4+0(%esp),%eax # from
|
||||
movl 4+4(%esp),%edx # to
|
||||
cmpl %eax,%edx
|
||||
jae cla_CopyLeft
|
||||
cla_CopyRight:
|
||||
subl %eax,%edx
|
||||
jmp 2f
|
||||
.p2align 4,,15
|
||||
1: fildll (%eax)
|
||||
fistpll (%edx,%eax,1)
|
||||
addl $8,%eax
|
||||
2: subl $1,%ecx
|
||||
jge 1b
|
||||
ret
|
||||
.p2align 4,,15
|
||||
3: fildll (%eax,%ecx,8)
|
||||
fistpll (%edx,%ecx,8)
|
||||
cla_CopyLeft:
|
||||
subl $1,%ecx
|
||||
jge 3b
|
||||
ret
|
||||
|
||||
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
|
||||
# void* to,
|
||||
# size_t count)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts):
|
||||
pushl %esi
|
||||
movl 4+12(%esp),%ecx
|
||||
pushl %edi
|
||||
movl 8+ 4(%esp),%esi
|
||||
movl 8+ 8(%esp),%edi
|
||||
cmpl %esi,%edi
|
||||
leal -2(%esi,%ecx,2),%eax
|
||||
jbe mmx_acs_CopyRight
|
||||
cmpl %eax,%edi
|
||||
jbe mmx_acs_CopyLeft
|
||||
mmx_acs_CopyRight:
|
||||
movl %ecx,%eax
|
||||
sarl %ecx
|
||||
je 5f
|
||||
cmpl $33,%ecx
|
||||
jae 3f
|
||||
1: subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
addl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 5f
|
||||
3: smovl # align to 8 bytes, we know we are 4 byte aligned to start
|
||||
subl $1,%ecx
|
||||
4: .p2align 4,,15
|
||||
movq 0(%esi),%mm0
|
||||
addl $64,%edi
|
||||
movq 8(%esi),%mm1
|
||||
subl $16,%ecx
|
||||
movq 16(%esi),%mm2
|
||||
movq %mm0,-64(%edi)
|
||||
movq 24(%esi),%mm0
|
||||
movq %mm1,-56(%edi)
|
||||
movq 32(%esi),%mm1
|
||||
movq %mm2,-48(%edi)
|
||||
movq 40(%esi),%mm2
|
||||
movq %mm0,-40(%edi)
|
||||
movq 48(%esi),%mm0
|
||||
movq %mm1,-32(%edi)
|
||||
movq 56(%esi),%mm1
|
||||
movq %mm2,-24(%edi)
|
||||
movq %mm0,-16(%edi)
|
||||
addl $64,%esi
|
||||
movq %mm1,-8(%edi)
|
||||
cmpl $16,%ecx
|
||||
jge 4b
|
||||
emms
|
||||
testl %ecx,%ecx
|
||||
ja 1b
|
||||
5: andl $1,%eax
|
||||
je 7f
|
||||
6: movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
7: popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
mmx_acs_CopyLeft:
|
||||
std
|
||||
leal -4(%edi,%ecx,2),%edi
|
||||
movl %eax,%esi
|
||||
movl %ecx,%eax
|
||||
subl $2,%esi
|
||||
sarl %ecx
|
||||
je 4f
|
||||
cmpl $32,%ecx
|
||||
ja 3f
|
||||
subl %esi,%edi
|
||||
.p2align 4,,15
|
||||
2: movl (%esi),%edx
|
||||
movl %edx,(%edi,%esi,1)
|
||||
subl $4,%esi
|
||||
subl $1,%ecx
|
||||
jnz 2b
|
||||
addl %esi,%edi
|
||||
jmp 4f
|
||||
3: rep; smovl
|
||||
4: andl $1,%eax
|
||||
je 6f
|
||||
addl $2,%esi
|
||||
addl $2,%edi
|
||||
5: movw (%esi),%dx
|
||||
movw %dx,(%edi)
|
||||
6: cld
|
||||
popl %edi
|
||||
popl %esi
|
||||
ret
|
||||
|
||||
|
||||
# Support for jlong Atomic::cmpxchg(volatile jlong* dest,
|
||||
# jlong compare_value,
|
||||
# jlong exchange_value)
|
||||
#
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Atomic_cmpxchg_long):
|
||||
# 8(%esp) : return PC
|
||||
pushl %ebx # 4(%esp) : old %ebx
|
||||
pushl %edi # 0(%esp) : old %edi
|
||||
movl 12(%esp), %ebx # 12(%esp) : exchange_value (low)
|
||||
movl 16(%esp), %ecx # 16(%esp) : exchange_value (high)
|
||||
movl 24(%esp), %eax # 24(%esp) : compare_value (low)
|
||||
movl 28(%esp), %edx # 28(%esp) : compare_value (high)
|
||||
movl 20(%esp), %edi # 20(%esp) : dest
|
||||
lock cmpxchg8b (%edi)
|
||||
popl %edi
|
||||
popl %ebx
|
||||
ret
|
||||
|
||||
|
||||
# Support for jlong Atomic::load and Atomic::store.
|
||||
# void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)
|
||||
.p2align 4,,15
|
||||
DECLARE_FUNC(_Atomic_move_long):
|
||||
movl 4(%esp), %eax # src
|
||||
fildll (%eax)
|
||||
movl 8(%esp), %eax # dest
|
||||
fistpll (%eax)
|
||||
ret
|
||||
@ -1,41 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2022 SAP SE. All rights reserved.
|
||||
# Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
#
|
||||
# This code is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License version 2 only, as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
# version 2 for more details (a copy is included in the LICENSE file that
|
||||
# accompanied this code).
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License version
|
||||
# 2 along with this work; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
# or visit www.oracle.com if you need additional information or have any
|
||||
# questions.
|
||||
#
|
||||
|
||||
#include "defs.S.inc"
|
||||
|
||||
.text
|
||||
|
||||
# Support for int SafeFetch32(int* address, int defaultval);
|
||||
#
|
||||
# 8(%esp) : default value
|
||||
# 4(%esp) : crash address
|
||||
# 0(%esp) : return pc
|
||||
DECLARE_FUNC(SafeFetch32_impl):
|
||||
movl 4(%esp),%ecx # load address from stack
|
||||
DECLARE_FUNC(_SafeFetch32_fault):
|
||||
movl (%ecx), %eax # load target value, may fault
|
||||
ret
|
||||
DECLARE_FUNC(_SafeFetch32_continuation):
|
||||
movl 8(%esp),%eax # load default value from stack
|
||||
ret
|
||||
Loading…
x
Reference in New Issue
Block a user