diff --git a/make/autoconf/basic.m4 b/make/autoconf/basic.m4 index 19e6641b94d..6daba35547b 100644 --- a/make/autoconf/basic.m4 +++ b/make/autoconf/basic.m4 @@ -75,10 +75,11 @@ AC_DEFUN_ONCE([BASIC_SETUP_PATHS], AC_MSG_NOTICE([Rewriting ORIGINAL_PATH to $REWRITTEN_PATH]) fi + if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then + AC_MSG_ERROR([32-bit x86 builds are not supported]) + fi + if test "x$OPENJDK_TARGET_OS" = "xwindows"; then - if test "x$OPENJDK_TARGET_CPU_BITS" = "x32"; then - AC_MSG_ERROR([32-bit Windows builds are not supported]) - fi BASIC_SETUP_PATHS_WINDOWS fi diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 index 937d8c37927..31451d0c37f 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -666,17 +666,7 @@ AC_DEFUN([PLATFORM_CHECK_DEPRECATION], [ AC_ARG_ENABLE(deprecated-ports, [AS_HELP_STRING([--enable-deprecated-ports@<:@=yes/no@:>@], [Suppress the error when configuring for a deprecated port @<:@no@:>@])]) - # Unfortunately, variants have not been parsed yet, so we have to check the configure option - # directly. Allow only the directly specified Zero variant, treat any other mix as containing - # something non-Zero. - if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then - if test "x$enable_deprecated_ports" = "xyes"; then - AC_MSG_WARN([The 32-bit x86 port is deprecated and may be removed in a future release.]) - else - AC_MSG_ERROR(m4_normalize([The 32-bit x86 port is deprecated and may be removed in a future release. - Use --enable-deprecated-ports=yes to suppress this error.])) - fi - fi + # There are no deprecated ports. Implement the deprecation warnings here. ]) AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_BUILD_OS_VERSION], diff --git a/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp deleted file mode 100644 index 3c7d93fc79e..00000000000 --- a/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "prims/downcallLinker.hpp" - -RuntimeStub* DowncallLinker::make_downcall_stub(BasicType* signature, - int num_args, - BasicType ret_bt, - const ABIDescriptor& abi, - const GrowableArray& input_registers, - const GrowableArray& output_registers, - bool needs_return_buffer, - int captured_state_mask, - bool needs_transition) { - Unimplemented(); - return nullptr; -} - -void DowncallLinker::StubGenerator::pd_add_offset_to_oop(VMStorage reg_oop, VMStorage reg_offset, - VMStorage tmp1, VMStorage tmp2) const { - Unimplemented(); -} diff --git a/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp b/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp deleted file mode 100644 index 18aa454e61c..00000000000 --- a/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "code/vmreg.hpp" -#include "prims/foreignGlobals.hpp" -#include "utilities/debug.hpp" - -class MacroAssembler; - -bool ForeignGlobals::is_foreign_linker_supported() { - return false; -} - -const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { - Unimplemented(); - return {}; -} - -int RegSpiller::pd_reg_size(VMStorage reg) { - Unimplemented(); - return -1; -} - -void RegSpiller::pd_store_reg(MacroAssembler* masm, int offset, VMStorage reg) { - Unimplemented(); -} - -void RegSpiller::pd_load_reg(MacroAssembler* masm, int offset, VMStorage reg) { - Unimplemented(); -} - -void ArgumentShuffle::pd_generate(MacroAssembler* masm, VMStorage tmp, int in_stk_bias, int out_stk_bias) const { - Unimplemented(); -} diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad deleted file mode 100644 index 3cf82bf9fb1..00000000000 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad +++ /dev/null @@ -1,71 +0,0 @@ -// -// Copyright (c) 2018, Red Hat, Inc. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -source_hpp %{ -#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/c2/shenandoahSupport.hpp" -%} - -instruct compareAndSwapP_shenandoah(rRegI res, - memory mem_ptr, - eRegP tmp1, eRegP tmp2, - eAXRegP oldval, eRegP newval, - eFlagsReg cr) -%{ - match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval))); - match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval))); - effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); - - format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} - - ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - false, // swap - $tmp1$$Register, $tmp2$$Register - ); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeP_shenandoah(memory mem_ptr, - eAXRegP oldval, eRegP newval, - eRegP tmp1, eRegP tmp2, - eFlagsReg cr) -%{ - match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval))); - effect(KILL cr, TEMP tmp1, TEMP tmp2); - ins_cost(1000); - - format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} - - ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - true, // exchange - $tmp1$$Register, $tmp2$$Register - ); - %} - ins_pipe( pipe_cmpxchg ); -%} diff --git a/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp b/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp deleted file mode 100644 index 14f11596924..00000000000 --- a/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "interpreter/interp_masm.hpp" -#include "interpreter/interpreter.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "memory/allocation.inline.hpp" -#include "oops/method.hpp" -#include "oops/oop.inline.hpp" -#include "runtime/handles.inline.hpp" -#include "runtime/icache.hpp" -#include "runtime/interfaceSupport.inline.hpp" -#include "runtime/signature.hpp" - - -#define __ _masm-> - - -// Implementation of SignatureHandlerGenerator -InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer) : - NativeSignatureIterator(method) { - _masm = new MacroAssembler(buffer); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { - move(offset(), jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { - move(offset(), jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { - move(offset(), jni_offset() + 2); - move(offset() + 1, jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { - box (offset(), jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { - __ movl(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset))); - __ movl(Address(to(), to_offset * wordSize), temp()); -} - - -void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { - __ lea(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset))); - __ cmpptr(Address(from(), Interpreter::local_offset_in_bytes(from_offset)), NULL_WORD); // do not use temp() to avoid AGI - Label L; - __ jcc(Assembler::notZero, L); - __ movptr(temp(), NULL_WORD); - __ bind(L); - __ movptr(Address(to(), to_offset * wordSize), temp()); -} - - -void InterpreterRuntime::SignatureHandlerGenerator::generate( uint64_t fingerprint) { - // generate code to handle arguments - iterate(fingerprint); - // return result handler - __ lea(rax, - ExternalAddress((address)Interpreter::result_handler(method()->result_type()))); - // return - __ ret(0); - __ flush(); -} - - -Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rdi; } -Register InterpreterRuntime::SignatureHandlerGenerator::to() { return rsp; } -Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rcx; } - - -// Implementation of SignatureHandlerLibrary - -void SignatureHandlerLibrary::pd_set_handler(address handler) {} - -class SlowSignatureHandler: public NativeSignatureIterator { - private: - address _from; - intptr_t* _to; - - virtual void pass_int() { - *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); - _from -= Interpreter::stackElementSize; - } - - virtual void pass_float() { - *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); - _from -= Interpreter::stackElementSize; - } - - virtual void pass_long() { - _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); - _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); - _to += 2; - _from -= 2*Interpreter::stackElementSize; - } - - virtual void pass_object() { - // pass address of from - intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0)); - *_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr; - _from -= Interpreter::stackElementSize; - } - - public: - SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) : - NativeSignatureIterator(method) { - _from = from; - _to = to + (is_static() ? 2 : 1); - } -}; - -JRT_ENTRY(address, InterpreterRuntime::slow_signature_handler(JavaThread* current, Method* method, intptr_t* from, intptr_t* to)) - methodHandle m(current, (Method*)method); - assert(m->is_native(), "sanity check"); - // handle arguments - SlowSignatureHandler(m, (address)from, to + 1).iterate((uint64_t)CONST64(-1)); - // return result handler - return Interpreter::result_handler(m->result_type()); -JRT_END diff --git a/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp b/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp deleted file mode 100644 index eee82a5c682..00000000000 --- a/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright (c) 2004, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "asm/macroAssembler.hpp" -#include "memory/resourceArea.hpp" -#include "prims/jniFastGetField.hpp" -#include "prims/jvm_misc.hpp" -#include "prims/jvmtiExport.hpp" -#include "runtime/os.inline.hpp" -#include "runtime/safepoint.hpp" -#include "runtime/stubRoutines.hpp" - -#define __ masm-> - -#define BUFFER_SIZE 30 - -// Instead of issuing lfence for LoadLoad barrier, we create data dependency -// between loads, which is much more efficient than lfence. - -address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { - const char *name = nullptr; - switch (type) { - case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; - case T_BYTE: name = "jni_fast_GetByteField"; break; - case T_CHAR: name = "jni_fast_GetCharField"; break; - case T_SHORT: name = "jni_fast_GetShortField"; break; - case T_INT: name = "jni_fast_GetIntField"; break; - default: ShouldNotReachHere(); - } - ResourceMark rm; - BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); - CodeBuffer cbuf(blob); - MacroAssembler* masm = new MacroAssembler(&cbuf); - address fast_entry = __ pc(); - - Label slow; - - // stack layout: offset from rsp (in words): - // return pc 0 - // jni env 1 - // obj 2 - // jfieldID 3 - - ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); - __ mov32 (rcx, counter); - __ testb (rcx, 1); - __ jcc (Assembler::notZero, slow); - - if (JvmtiExport::can_post_field_access()) { - // Check to see if a field access watch has been set before we - // take the fast path. - __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); - __ jcc(Assembler::notZero, slow); - } - - __ mov(rax, rcx); - __ andptr(rax, 1); // rax, must end up 0 - __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); - // obj, notice rax, is 0. - // rdx is data dependent on rcx. - __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID - - __ clear_jobject_tag(rdx); - - __ movptr(rdx, Address(rdx, 0)); // *obj - __ shrptr (rax, 2); // offset - - assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); - speculative_load_pclist[count] = __ pc(); - switch (type) { - case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break; - case T_BYTE: __ movsbl (rax, Address(rdx, rax, Address::times_1)); break; - case T_CHAR: __ movzwl (rax, Address(rdx, rax, Address::times_1)); break; - case T_SHORT: __ movswl (rax, Address(rdx, rax, Address::times_1)); break; - case T_INT: __ movl (rax, Address(rdx, rax, Address::times_1)); break; - default: ShouldNotReachHere(); - } - - Address ca1; - __ lea(rdx, counter); - __ xorptr(rdx, rax); - __ xorptr(rdx, rax); - __ cmp32(rcx, Address(rdx, 0)); - // ca1 is the same as ca because - // rax, ^ counter_addr ^ rax, = address - // ca1 is data dependent on rax,. - __ jcc (Assembler::notEqual, slow); - - __ ret (0); - - slowcase_entry_pclist[count++] = __ pc(); - __ bind (slow); - address slow_case_addr = nullptr; - switch (type) { - case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; - case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; - case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; - case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; - case T_INT: slow_case_addr = jni_GetIntField_addr(); break; - default: ShouldNotReachHere(); - } - // tail call - __ jump (RuntimeAddress(slow_case_addr)); - - __ flush (); - - return fast_entry; -} - -address JNI_FastGetField::generate_fast_get_boolean_field() { - return generate_fast_get_int_field0(T_BOOLEAN); -} - -address JNI_FastGetField::generate_fast_get_byte_field() { - return generate_fast_get_int_field0(T_BYTE); -} - -address JNI_FastGetField::generate_fast_get_char_field() { - return generate_fast_get_int_field0(T_CHAR); -} - -address JNI_FastGetField::generate_fast_get_short_field() { - return generate_fast_get_int_field0(T_SHORT); -} - -address JNI_FastGetField::generate_fast_get_int_field() { - return generate_fast_get_int_field0(T_INT); -} - -address JNI_FastGetField::generate_fast_get_long_field() { - const char *name = "jni_fast_GetLongField"; - ResourceMark rm; - BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); - CodeBuffer cbuf(blob); - MacroAssembler* masm = new MacroAssembler(&cbuf); - address fast_entry = __ pc(); - - Label slow; - - // stack layout: offset from rsp (in words): - // old rsi 0 - // return pc 1 - // jni env 2 - // obj 3 - // jfieldID 4 - - ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); - - __ push (rsi); - __ mov32 (rcx, counter); - __ testb (rcx, 1); - __ jcc (Assembler::notZero, slow); - - if (JvmtiExport::can_post_field_access()) { - // Check to see if a field access watch has been set before we - // take the fast path. - __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); - __ jcc(Assembler::notZero, slow); - } - - __ mov(rax, rcx); - __ andptr(rax, 1); // rax, must end up 0 - __ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize)); - // obj, notice rax, is 0. - // rdx is data dependent on rcx. - __ movptr(rsi, Address(rsp, 4*wordSize)); // jfieldID - - __ clear_jobject_tag(rdx); - - __ movptr(rdx, Address(rdx, 0)); // *obj - __ shrptr(rsi, 2); // offset - - assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small"); - speculative_load_pclist[count++] = __ pc(); - __ movptr(rax, Address(rdx, rsi, Address::times_1)); - speculative_load_pclist[count] = __ pc(); - __ movl(rdx, Address(rdx, rsi, Address::times_1, 4)); - - __ lea(rsi, counter); - __ xorptr(rsi, rdx); - __ xorptr(rsi, rax); - __ xorptr(rsi, rdx); - __ xorptr(rsi, rax); - __ cmp32(rcx, Address(rsi, 0)); - // ca1 is the same as ca because - // rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address - // ca1 is data dependent on both rax, and rdx. - __ jcc (Assembler::notEqual, slow); - - __ pop (rsi); - - __ ret (0); - - slowcase_entry_pclist[count-1] = __ pc(); - slowcase_entry_pclist[count++] = __ pc(); - __ bind (slow); - __ pop (rsi); - address slow_case_addr = jni_GetLongField_addr();; - // tail call - __ jump (RuntimeAddress(slow_case_addr)); - - __ flush (); - - return fast_entry; -} - -address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { - const char *name = nullptr; - switch (type) { - case T_FLOAT: name = "jni_fast_GetFloatField"; break; - case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; - default: ShouldNotReachHere(); - } - ResourceMark rm; - BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); - CodeBuffer cbuf(blob); - MacroAssembler* masm = new MacroAssembler(&cbuf); - address fast_entry = __ pc(); - - Label slow_with_pop, slow; - - // stack layout: offset from rsp (in words): - // return pc 0 - // jni env 1 - // obj 2 - // jfieldID 3 - - ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); - - __ mov32 (rcx, counter); - __ testb (rcx, 1); - __ jcc (Assembler::notZero, slow); - - if (JvmtiExport::can_post_field_access()) { - // Check to see if a field access watch has been set before we - // take the fast path. - __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); - __ jcc(Assembler::notZero, slow); - } - - __ mov(rax, rcx); - __ andptr(rax, 1); // rax, must end up 0 - __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); - // obj, notice rax, is 0. - // rdx is data dependent on rcx. - __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID - - __ clear_jobject_tag(rdx); - - __ movptr(rdx, Address(rdx, 0)); // *obj - __ shrptr(rax, 2); // offset - - assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); - speculative_load_pclist[count] = __ pc(); - switch (type) { - case T_FLOAT: __ fld_s (Address(rdx, rax, Address::times_1)); break; - case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break; - default: ShouldNotReachHere(); - } - - Address ca1; - __ fst_s (Address(rsp, -4)); - __ lea(rdx, counter); - __ movl (rax, Address(rsp, -4)); - // garbage hi-order bits on 64bit are harmless. - __ xorptr(rdx, rax); - __ xorptr(rdx, rax); - __ cmp32(rcx, Address(rdx, 0)); - // rax, ^ counter_addr ^ rax, = address - // ca1 is data dependent on the field - // access. - __ jcc (Assembler::notEqual, slow_with_pop); - - __ ret (0); - - __ bind (slow_with_pop); - // invalid load. pop FPU stack. - __ fstp_d (0); - - slowcase_entry_pclist[count++] = __ pc(); - __ bind (slow); - address slow_case_addr = nullptr; - switch (type) { - case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; - case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; - default: ShouldNotReachHere(); - } - // tail call - __ jump (RuntimeAddress(slow_case_addr)); - - __ flush (); - - return fast_entry; -} - -address JNI_FastGetField::generate_fast_get_float_field() { - return generate_fast_get_float_field0(T_FLOAT); -} - -address JNI_FastGetField::generate_fast_get_double_field() { - return generate_fast_get_float_field0(T_DOUBLE); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp deleted file mode 100644 index 6fdda4c2f71..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "macroAssembler_x86.hpp" - -ATTRIBUTE_ALIGNED(16) static const juint _ONES[] = { - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xbff00000UL -}; -address MacroAssembler::ONES = (address)_ONES; - -ATTRIBUTE_ALIGNED(16) static const juint _PI4_INV[] = { - 0x6dc9c883UL, 0x3ff45f30UL -}; -address MacroAssembler::PI4_INV = (address)_PI4_INV; - -ATTRIBUTE_ALIGNED(16) static const juint _PI4X3[] = { - 0x54443000UL, 0xbfe921fbUL, 0x3b39a000UL, 0x3d373dcbUL, 0xe0e68948UL, - 0xba845c06UL -}; -address MacroAssembler::PI4X3 = (address)_PI4X3; - -ATTRIBUTE_ALIGNED(16) static const juint _PI4X4[] = { - 0x54400000UL, 0xbfe921fbUL, 0x1a600000UL, 0xbdc0b461UL, 0x2e000000UL, - 0xbb93198aUL, 0x252049c1UL, 0xb96b839aUL -}; -address MacroAssembler::PI4X4 = (address)_PI4X4; - -ATTRIBUTE_ALIGNED(16) static const juint _L_2IL0FLOATPACKET_0[] = { - 0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL -}; -address MacroAssembler::L_2IL0FLOATPACKET_0 = (address)_L_2IL0FLOATPACKET_0; diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp deleted file mode 100644 index dce16756a66..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp +++ /dev/null @@ -1,427 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - COS() -// --------------------- -// -// 1. RANGE REDUCTION -// -// We perform an initial range reduction from X to r with -// -// X =~= N * pi/32 + r -// -// so that |r| <= pi/64 + epsilon. We restrict inputs to those -// where |N| <= 932560. Beyond this, the range reduction is -// insufficiently accurate. For extremely small inputs, -// denormalization can occur internally, impacting performance. -// This means that the main path is actually only taken for -// 2^-252 <= |X| < 90112. -// -// To avoid branches, we perform the range reduction to full -// accuracy each time. -// -// X - N * (P_1 + P_2 + P_3) -// -// where P_1 and P_2 are 32-bit numbers (so multiplication by N -// is exact) and P_3 is a 53-bit number. Together, these -// approximate pi well enough for all cases in the restricted -// range. -// -// The main reduction sequence is: -// -// y = 32/pi * x -// N = integer(y) -// (computed by adding and subtracting off SHIFTER) -// -// m_1 = N * P_1 -// m_2 = N * P_2 -// r_1 = x - m_1 -// r = r_1 - m_2 -// (this r can be used for most of the calculation) -// -// c_1 = r_1 - r -// m_3 = N * P_3 -// c_2 = c_1 - m_2 -// c = c_2 - m_3 -// -// 2. MAIN ALGORITHM -// -// The algorithm uses a table lookup based on B = M * pi / 32 -// where M = N mod 64. The stored values are: -// sigma closest power of 2 to cos(B) -// C_hl 53-bit cos(B) - sigma -// S_hi + S_lo 2 * 53-bit sin(B) -// -// The computation is organized as follows: -// -// sin(B + r + c) = [sin(B) + sigma * r] + -// r * (cos(B) - sigma) + -// sin(B) * [cos(r + c) - 1] + -// cos(B) * [sin(r + c) - r] -// -// which is approximately: -// -// [S_hi + sigma * r] + -// C_hl * r + -// S_lo + S_hi * [(cos(r) - 1) - r * c] + -// (C_hl + sigma) * [(sin(r) - r) + c] -// -// and this is what is actually computed. We separate this sum -// into four parts: -// -// hi + med + pols + corr -// -// where -// -// hi = S_hi + sigma r -// med = C_hl * r -// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) -// corr = S_lo + c * ((C_hl + sigma) - S_hi * r) -// -// 3. POLYNOMIAL -// -// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * -// (sin(r) - r) can be rearranged freely, since it is quite -// small, so we exploit parallelism to the fullest. -// -// psc4 = SC_4 * r_1 -// msc4 = psc4 * r -// r2 = r * r -// msc2 = SC_2 * r2 -// r4 = r2 * r2 -// psc3 = SC_3 + msc4 -// psc1 = SC_1 + msc2 -// msc3 = r4 * psc3 -// sincospols = psc1 + msc3 -// pols = sincospols * -// -// -// 4. CORRECTION TERM -// -// This is where the "c" component of the range reduction is -// taken into account; recall that just "r" is used for most of -// the calculation. -// -// -c = m_3 - c_2 -// -d = S_hi * r - (C_hl + sigma) -// corr = -c * -d + S_lo -// -// 5. COMPENSATED SUMMATIONS -// -// The two successive compensated summations add up the high -// and medium parts, leaving just the low parts to add up at -// the end. -// -// rs = sigma * r -// res_int = S_hi + rs -// k_0 = S_hi - res_int -// k_2 = k_0 + rs -// med = C_hl * r -// res_hi = res_int + med -// k_1 = res_int - res_hi -// k_3 = k_1 + med -// -// 6. FINAL SUMMATION -// -// We now add up all the small parts: -// -// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 -// -// Now the overall result is just: -// -// res_hi + res_lo -// -// 7. SMALL ARGUMENTS -// -// Inputs with |X| < 2^-252 are treated specially as -// 1 - |x|. -// -// Special cases: -// cos(NaN) = quiet NaN, and raise invalid exception -// cos(INF) = NaN and raise invalid exception -// cos(0) = 1 -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_cos[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, - 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, - 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, - 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, - 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, - 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, - 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, - 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, - 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, - 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, - 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, - 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, - 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, - 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, - 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, - 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, - 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, - 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, - 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, - 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, - 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, - 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, - 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, - 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, - 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, - 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, - 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, - 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, - 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, - 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, - 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, - 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, - 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, - 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, - 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, - 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, - 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, - 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, - 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, - 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, - 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, - 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, - 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, - 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, - 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, - 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, - 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, - 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, - 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, - 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, - 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, - 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, - 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, - 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, - 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, - 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, - 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, - 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, - 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, - 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, - 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, - 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, - 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, - 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, - 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, - 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, - 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, - 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, - 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, - 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, - 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, - 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, - 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, - 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, - 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, - 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, - 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, - 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, - 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, - 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, - 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, - 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL -}; -//registers, -// input: (rbp + 8) -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// eax, ecx, edx, ebx (tmp) - -// Code generated by Intel C compiler for LIBM library - -void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label start; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_cos = (address)_static_const_table_cos; - - bind(start); - subl(rsp, 120); - movl(Address(rsp, 56), tmp); - lea(tmp, ExternalAddress(static_const_table_cos)); - movsd(xmm0, Address(rsp, 128)); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - subl(eax, 12336); - cmpl(eax, 4293); - jcc(Assembler::above, L_2TAG_PACKET_0_0_2); - movsd(xmm1, Address(tmp, 2160)); - mulsd(xmm1, xmm0); - movdqu(xmm5, Address(tmp, 2240)); - movsd(xmm4, Address(tmp, 2224)); - pand(xmm4, xmm0); - por(xmm5, xmm4); - movsd(xmm3, Address(tmp, 2128)); - movdqu(xmm2, Address(tmp, 2112)); - addpd(xmm1, xmm5); - cvttsd2sil(edx, xmm1); - cvtsi2sdl(xmm1, edx); - mulsd(xmm3, xmm1); - unpcklpd(xmm1, xmm1); - addl(edx, 1865232); - movdqu(xmm4, xmm0); - andl(edx, 63); - movdqu(xmm5, Address(tmp, 2096)); - lea(eax, Address(tmp, 0)); - shll(edx, 5); - addl(eax, edx); - mulpd(xmm2, xmm1); - subsd(xmm0, xmm3); - mulsd(xmm1, Address(tmp, 2144)); - subsd(xmm4, xmm3); - movsd(xmm7, Address(eax, 8)); - unpcklpd(xmm0, xmm0); - movapd(xmm3, xmm4); - subsd(xmm4, xmm2); - mulpd(xmm5, xmm0); - subpd(xmm0, xmm2); - movdqu(xmm6, Address(tmp, 2064)); - mulsd(xmm7, xmm4); - subsd(xmm3, xmm4); - mulpd(xmm5, xmm0); - mulpd(xmm0, xmm0); - subsd(xmm3, xmm2); - movdqu(xmm2, Address(eax, 0)); - subsd(xmm1, xmm3); - movsd(xmm3, Address(eax, 24)); - addsd(xmm2, xmm3); - subsd(xmm7, xmm2); - mulsd(xmm2, xmm4); - mulpd(xmm6, xmm0); - mulsd(xmm3, xmm4); - mulpd(xmm2, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm5, Address(tmp, 2080)); - mulsd(xmm4, Address(eax, 0)); - addpd(xmm6, Address(tmp, 2048)); - mulpd(xmm5, xmm0); - movapd(xmm0, xmm3); - addsd(xmm3, Address(eax, 8)); - mulpd(xmm1, xmm7); - movapd(xmm7, xmm4); - addsd(xmm4, xmm3); - addpd(xmm6, xmm5); - movsd(xmm5, Address(eax, 8)); - subsd(xmm5, xmm3); - subsd(xmm3, xmm4); - addsd(xmm1, Address(eax, 16)); - mulpd(xmm6, xmm2); - addsd(xmm5, xmm0); - addsd(xmm3, xmm7); - addsd(xmm1, xmm5); - addsd(xmm1, xmm3); - addsd(xmm1, xmm6); - unpckhpd(xmm6, xmm6); - addsd(xmm1, xmm6); - addsd(xmm4, xmm1); - movsd(Address(rsp, 0), xmm4); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_0_0_2); - jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - pinsrw(xmm0, eax, 3); - movsd(xmm1, Address(tmp, 2192)); - subsd(xmm1, xmm0); - movsd(Address(rsp, 0), xmm1); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movl(eax, Address(rsp, 132)); - andl(eax, 2146435072); - cmpl(eax, 2146435072); - jcc(Assembler::equal, L_2TAG_PACKET_3_0_2); - subl(rsp, 32); - movsd(Address(rsp, 0), xmm0); - lea(eax, Address(rsp, 40)); - movl(Address(rsp, 8), eax); - movl(eax, 1); - movl(Address(rsp, 12), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); - addl(rsp, 32); - fld_d(Address(rsp, 8)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_3_0_2); - fld_d(Address(rsp, 128)); - fmul_d(Address(tmp, 2208)); - - bind(L_2TAG_PACKET_1_0_2); - movl(tmp, Address(rsp, 56)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp deleted file mode 100644 index 2e6c1a617bb..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp +++ /dev/null @@ -1,329 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - EXP() -// --------------------- -// -// Description: -// Let K = 64 (table size). -// x x/log(2) n -// e = 2 = 2 * T[j] * (1 + P(y)) -// where -// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] -// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] -// j/K -// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). -// -// P(y) is a minimax polynomial approximation of exp(x)-1 -// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). -// -// To avoid problems with arithmetic overflow and underflow, -// n n1 n2 -// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] -// where BIAS is a value of exponent bias. -// -// Special cases: -// exp(NaN) = NaN -// exp(+INF) = +INF -// exp(-INF) = 0 -// exp(x) = 1 for subnormals -// for finite argument, only exp(0)=1 is exact -// For IEEE double -// if x > 709.782712893383973096 then exp(x) overflow -// if x < -745.133219101941108420 then exp(x) underflow -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table[] = -{ - 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL, - 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL, - 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, - 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, - 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, - 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, - 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, - 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, - 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, - 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, - 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, - 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, - 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, - 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, - 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, - 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, - 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, - 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, - 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, - 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, - 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, - 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, - 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, - 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, - 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, - 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, - 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, - 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, - 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, - 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, - 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, - 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, - 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, - 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, - 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, - 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, - 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, - 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, - 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, - 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, - 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, - 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, - 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, - 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, - 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, - 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, - 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, - 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, - 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, - 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, - 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, - 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, - 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, - 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, - 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, - 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, - 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, - 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, - 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL, - 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL, - 0x00100000UL -}; - -//registers, -// input: (rbp + 8) -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// rax, rdx, rcx, rbx (tmp) - -// Code generated by Intel C compiler for LIBM library - -void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; - Label L_2TAG_PACKET_12_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - address static_const_table = (address)_static_const_table; - - subl(rsp, 120); - movl(Address(rsp, 64), tmp); - lea(tmp, ExternalAddress(static_const_table)); - movsd(xmm0, Address(rsp, 128)); - unpcklpd(xmm0, xmm0); - movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL - movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL - movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL - movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL - pextrw(eax, xmm0, 3); - andl(eax, 32767); - movl(edx, 16527); - subl(edx, eax); - subl(eax, 15504); - orl(edx, eax); - cmpl(edx, INT_MIN); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - mulpd(xmm1, xmm0); - addpd(xmm1, xmm6); - movapd(xmm7, xmm1); - subpd(xmm1, xmm6); - mulpd(xmm2, xmm1); - movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL - mulpd(xmm3, xmm1); - movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL - subpd(xmm0, xmm2); - movdl(eax, xmm7); - movl(ecx, eax); - andl(ecx, 63); - shll(ecx, 4); - sarl(eax, 6); - movl(edx, eax); - movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL - pand(xmm7, xmm6); - movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL - paddq(xmm7, xmm6); - psllq(xmm7, 46); - subpd(xmm0, xmm3); - movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); - mulpd(xmm4, xmm0); - movapd(xmm6, xmm0); - movapd(xmm1, xmm0); - mulpd(xmm6, xmm6); - mulpd(xmm0, xmm6); - addpd(xmm5, xmm4); - mulsd(xmm0, xmm6); - mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL - addsd(xmm1, xmm2); - unpckhpd(xmm2, xmm2); - mulpd(xmm0, xmm5); - addsd(xmm1, xmm0); - por(xmm2, xmm7); - unpckhpd(xmm0, xmm0); - addsd(xmm0, xmm1); - addsd(xmm0, xmm6); - addl(edx, 894); - cmpl(edx, 1916); - jcc(Assembler::above, L_2TAG_PACKET_1_0_2); - mulsd(xmm0, xmm2); - addsd(xmm0, xmm2); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_1_0_2); - fnstcw(Address(rsp, 24)); - movzwl(edx, Address(rsp, 24)); - orl(edx, 768); - movw(Address(rsp, 28), edx); - fldcw(Address(rsp, 28)); - movl(edx, eax); - sarl(eax, 1); - subl(edx, eax); - movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL - pandn(xmm6, xmm2); - addl(eax, 1023); - movdl(xmm3, eax); - psllq(xmm3, 52); - por(xmm6, xmm3); - addl(edx, 1023); - movdl(xmm4, edx); - psllq(xmm4, 52); - movsd(Address(rsp, 8), xmm0); - fld_d(Address(rsp, 8)); - movsd(Address(rsp, 16), xmm6); - fld_d(Address(rsp, 16)); - fmula(1); - faddp(1); - movsd(Address(rsp, 8), xmm4); - fld_d(Address(rsp, 8)); - fmulp(1); - fstp_d(Address(rsp, 8)); - movsd(xmm0, Address(rsp, 8)); - fldcw(Address(rsp, 24)); - pextrw(ecx, xmm0, 3); - andl(ecx, 32752); - cmpl(ecx, 32752); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); - cmpl(ecx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); - jmp(L_2TAG_PACKET_2_0_2); - cmpl(ecx, INT_MIN); - jcc(Assembler::below, L_2TAG_PACKET_3_0_2); - cmpl(ecx, -1064950997); - jcc(Assembler::below, L_2TAG_PACKET_2_0_2); - jcc(Assembler::above, L_2TAG_PACKET_4_0_2); - movl(edx, Address(rsp, 128)); - cmpl(edx, -17155601); - jcc(Assembler::below, L_2TAG_PACKET_2_0_2); - jmp(L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movl(edx, 14); - jmp(L_2TAG_PACKET_5_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movl(edx, 15); - - bind(L_2TAG_PACKET_5_0_2); - movsd(Address(rsp, 0), xmm0); - movsd(xmm0, Address(rsp, 128)); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_7_0_2); - cmpl(eax, 2146435072); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); - movl(eax, Address(rsp, 132)); - cmpl(eax, INT_MIN); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); - movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL - mulsd(xmm0, xmm0); - movl(edx, 14); - jmp(L_2TAG_PACKET_5_0_2); - - bind(L_2TAG_PACKET_9_0_2); - movsd(xmm0, Address(tmp, 1216)); - mulsd(xmm0, xmm0); - movl(edx, 15); - jmp(L_2TAG_PACKET_5_0_2); - - bind(L_2TAG_PACKET_8_0_2); - movl(edx, Address(rsp, 128)); - cmpl(eax, 2146435072); - jcc(Assembler::above, L_2TAG_PACKET_10_0_2); - cmpl(edx, 0); - jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); - movl(eax, Address(rsp, 132)); - cmpl(eax, 2146435072); - jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); - movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_11_0_2); - movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_10_0_2); - movsd(xmm0, Address(rsp, 128)); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_0_0_2); - movl(eax, Address(rsp, 132)); - andl(eax, 2147483647); - cmpl(eax, 1083179008); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 128)); - addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movsd(Address(rsp, 48), xmm0); - fld_d(Address(rsp, 48)); - - bind(L_2TAG_PACKET_6_0_2); - movl(tmp, Address(rsp, 64)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp deleted file mode 100644 index abaabef6741..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp +++ /dev/null @@ -1,344 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - LOG() -// --------------------- -// -// x=2^k * mx, mx in [1,2) -// -// Get B~1/mx based on the output of rcpss instruction (B0) -// B = int((B0*2^7+0.5))/2^7 -// -// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) -// -// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and -// p(r) is a degree 7 polynomial -// -log(B) read from data table (high, low parts) -// Result is formed from high and low parts -// -// Special cases: -// log(NaN) = quiet NaN, and raise invalid exception -// log(+INF) = that INF -// log(0) = -INF with divide-by-zero exception raised -// log(1) = +0 -// log(x) = NaN with invalid exception raised if x < -0, including -INF -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant -// -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log[] = -{ - 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, - 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, - 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, - 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, - 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, - 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, - 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, - 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, - 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, - 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, - 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, - 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, - 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, - 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, - 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, - 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, - 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, - 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, - 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, - 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, - 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, - 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, - 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, - 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, - 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, - 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, - 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, - 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, - 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, - 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, - 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, - 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, - 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, - 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, - 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, - 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, - 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, - 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, - 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, - 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, - 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, - 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, - 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, - 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, - 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, - 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, - 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, - 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, - 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, - 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, - 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, - 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, - 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, - 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, - 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, - 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, - 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, - 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, - 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, - 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, - 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, - 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, - 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, - 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, - 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, - 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, - 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, - 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, - 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, - 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, - 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, - 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, - 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, - 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, - 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, - 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, - 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, - 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, - 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, - 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, - 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, - 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, - 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, - 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, - 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, - 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, - 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, - 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, - 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, - 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, - 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, - 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, - 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, - 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, - 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, - 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, - 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, - 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, - 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, - 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, - 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, - 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, - 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, - 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, - 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, - 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, - 0xffffe000UL -}; - -//registers, -// input: xmm0 -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// rax, rdx, rcx, rbx (tmp) -void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; - Label L_2TAG_PACKET_10_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - address static_const_table = (address)_static_const_table_log; - - subl(rsp, 104); - movl(Address(rsp, 40), tmp); - lea(tmp, ExternalAddress(static_const_table)); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - xorpd(xmm3, xmm3); - movl(edx, 30704); - pinsrw(xmm3, edx, 3); - movsd(xmm0, Address(rsp, 112)); - movapd(xmm1, xmm0); - movl(ecx, 32768); - movdl(xmm4, ecx); - movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - psllq(xmm0, 5); - movl(ecx, 16352); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 228); - psrlq(xmm1, 12); - subl(eax, 16); - cmpl(eax, 32736); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - - bind(L_2TAG_PACKET_1_0_2); - paddd(xmm0, xmm4); - por(xmm1, xmm3); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm1); - pand(xmm0, xmm6); - subsd(xmm1, xmm5); - mulpd(xmm5, xmm0); - andl(eax, 32752); - subl(eax, ecx); - cvtsi2sdl(xmm7, eax); - mulsd(xmm1, xmm0); - movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL - movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL - subsd(xmm5, xmm2); - andl(edx, 16711680); - shrl(edx, 12); - movdqu(xmm0, Address(tmp, edx)); - movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL - addsd(xmm1, xmm5); - movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL - mulsd(xmm6, xmm7); - pshufd(xmm5, xmm1, 68); - mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL - mulsd(xmm3, xmm1); - addsd(xmm0, xmm6); - mulpd(xmm4, xmm5); - mulpd(xmm5, xmm5); - pshufd(xmm6, xmm0, 228); - addsd(xmm0, xmm1); - addpd(xmm4, xmm2); - mulpd(xmm3, xmm5); - subsd(xmm6, xmm0); - mulsd(xmm4, xmm1); - pshufd(xmm2, xmm0, 238); - addsd(xmm1, xmm6); - mulsd(xmm5, xmm5); - addsd(xmm7, xmm2); - addpd(xmm4, xmm3); - addsd(xmm1, xmm7); - mulpd(xmm4, xmm5); - addsd(xmm1, xmm4); - pshufd(xmm5, xmm4, 238); - addsd(xmm1, xmm5); - addsd(xmm0, xmm1); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_0_0_2); - movsd(xmm0, Address(rsp, 112)); - movdqu(xmm1, xmm0); - addl(eax, 16); - cmpl(eax, 32768); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); - cmpl(eax, 16); - jcc(Assembler::below, L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_5_0_2); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_6_0_2); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - cmpl(edx, 0); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - jmp(L_2TAG_PACKET_7_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - addl(ecx, ecx); - cmpl(ecx, -2097152); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - - bind(L_2TAG_PACKET_7_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 32752); - pinsrw(xmm1, eax, 3); - movl(edx, 3); - mulsd(xmm0, xmm1); - - bind(L_2TAG_PACKET_9_0_2); - movsd(Address(rsp, 0), xmm0); - movsd(xmm0, Address(rsp, 112)); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_10_0_2); - - bind(L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 49136); - pinsrw(xmm0, eax, 3); - divsd(xmm0, xmm1); - movl(edx, 2); - jmp(L_2TAG_PACKET_9_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - movl(eax, 18416); - pinsrw(xmm1, eax, 3); - mulsd(xmm0, xmm1); - movapd(xmm1, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - psllq(xmm0, 5); - movl(ecx, 18416); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 228); - psrlq(xmm1, 12); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movsd(Address(rsp, 24), xmm0); - fld_d(Address(rsp, 24)); - - bind(L_2TAG_PACKET_10_0_2); - movl(tmp, Address(rsp, 40)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp deleted file mode 100644 index 1fc5f49cf75..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp +++ /dev/null @@ -1,357 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - LOG10() -// --------------------- -// -// Let x=2^k * mx, mx in [1,2) -// -// Get B~1/mx based on the output of rcpss instruction (B0) -// B = int((B0*LH*2^7+0.5))/2^7 -// LH is a short approximation for log10(e) -// -// Reduced argument: r=B*mx-LH (computed accurately in high and low parts) -// -// Result: k*log10(2) - log(B) + p(r) -// p(r) is a degree 7 polynomial -// -log(B) read from data table (high, low parts) -// Result is formed from high and low parts -// -// Special cases: -// log10(0) = -INF with divide-by-zero exception raised -// log10(1) = +0 -// log10(x) = NaN with invalid exception raised if x < -0, including -INF -// log10(+INF) = +INF -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log10[] = -{ - 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, - 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, - 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, - 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, - 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, - 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, - 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, - 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, - 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, - 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, - 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, - 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, - 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, - 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, - 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, - 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, - 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, - 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, - 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, - 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, - 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, - 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, - 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, - 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, - 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, - 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, - 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, - 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, - 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, - 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, - 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, - 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, - 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, - 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, - 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, - 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, - 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, - 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, - 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, - 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, - 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, - 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, - 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, - 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, - 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, - 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, - 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, - 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, - 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, - 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, - 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, - 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, - 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, - 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, - 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, - 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, - 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, - 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, - 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, - 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, - 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, - 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, - 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, - 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, - 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, - 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, - 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, - 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, - 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, - 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, - 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, - 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, - 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, - 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, - 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, - 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, - 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, - 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, - 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, - 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, - 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, - 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, - 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, - 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, - 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, - 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, - 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, - 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, - 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, - 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, - 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, - 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, - 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, - 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, - 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, - 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, - 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, - 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, - 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, - 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, - 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, - 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, - 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL, - 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, - 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, - 0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL, - 0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL -}; -//registers, -// input: xmm0 -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// rax, rdx, rcx, rbx (tmp) - -void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_log10 = (address)_static_const_table_log10; - - subl(rsp, 104); - movl(Address(rsp, 40), tmp); - lea(tmp, ExternalAddress(static_const_table_log10)); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movl(ecx, 1054736384); - movdl(xmm7, ecx); - xorpd(xmm3, xmm3); - movl(edx, 30704); - pinsrw(xmm3, edx, 3); - movsd(xmm0, Address(rsp, 112)); - movdqu(xmm1, xmm0); - movl(edx, 32768); - movdl(xmm4, edx); - movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 16352); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 78); - psrlq(xmm1, 12); - subl(eax, 16); - cmpl(eax, 32736); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - - bind(L_2TAG_PACKET_1_0_2); - mulss(xmm0, xmm7); - por(xmm1, xmm3); - andpd(xmm5, xmm1); - paddd(xmm0, xmm4); - subsd(xmm1, xmm5); - movdl(edx, xmm0); - psllq(xmm0, 29); - andpd(xmm0, xmm6); - andl(eax, 32752); - subl(eax, ecx); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - mulsd(xmm1, xmm0); - movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL - movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL - subsd(xmm5, xmm2); - andl(edx, 16711680); - shrl(edx, 12); - movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504)); - movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL - addsd(xmm1, xmm5); - movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL - mulsd(xmm6, xmm7); - pshufd(xmm5, xmm1, 68); - mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL - mulsd(xmm3, xmm1); - addsd(xmm0, xmm6); - mulpd(xmm4, xmm5); - movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL - mulpd(xmm5, xmm5); - addpd(xmm4, xmm2); - mulpd(xmm3, xmm5); - pshufd(xmm2, xmm0, 228); - addsd(xmm0, xmm1); - mulsd(xmm4, xmm1); - subsd(xmm2, xmm0); - mulsd(xmm6, xmm1); - addsd(xmm1, xmm2); - pshufd(xmm2, xmm0, 238); - mulsd(xmm5, xmm5); - addsd(xmm7, xmm2); - addsd(xmm1, xmm6); - addpd(xmm4, xmm3); - addsd(xmm1, xmm7); - mulpd(xmm4, xmm5); - addsd(xmm1, xmm4); - pshufd(xmm5, xmm4, 238); - addsd(xmm1, xmm5); - addsd(xmm0, xmm1); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_0_0_2); - movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL - movdqu(xmm1, xmm0); - addl(eax, 16); - cmpl(eax, 32768); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); - cmpl(eax, 16); - jcc(Assembler::below, L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_5_0_2); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_6_0_2); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - cmpl(edx, 0); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - jmp(L_2TAG_PACKET_7_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - addl(ecx, ecx); - cmpl(ecx, -2097152); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - - bind(L_2TAG_PACKET_7_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 32752); - pinsrw(xmm1, eax, 3); - movl(edx, 9); - mulsd(xmm0, xmm1); - - bind(L_2TAG_PACKET_9_0_2); - movsd(Address(rsp, 0), xmm0); - movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_10_0_2); - - bind(L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 49136); - pinsrw(xmm0, eax, 3); - divsd(xmm0, xmm1); - movl(edx, 8); - jmp(L_2TAG_PACKET_9_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - movl(eax, 18416); - pinsrw(xmm1, eax, 3); - mulsd(xmm0, xmm1); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movdqu(xmm1, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 18416); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 78); - psrlq(xmm1, 12); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movsd(Address(rsp, 24), xmm0); - fld_d(Address(rsp, 24)); - - bind(L_2TAG_PACKET_10_0_2); - movl(tmp, Address(rsp, 40)); - -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp deleted file mode 100644 index 2d8a8ef91ac..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp +++ /dev/null @@ -1,1855 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - POW() -// --------------------- -// -// Let x=2^k * mx, mx in [1,2) -// -// log2(x) calculation: -// -// Get B~1/mx based on the output of rcpps instruction (B0) -// B = int((B0*LH*2^9+0.5))/2^9 -// LH is a short approximation for log2(e) -// -// Reduced argument, scaled by LH: -// r=B*mx-LH (computed accurately in high and low parts) -// -// log2(x) result: k - log2(B) + p(r) -// p(r) is a degree 8 polynomial -// -log2(B) read from data table (high, low parts) -// log2(x) is formed from high and low parts -// For |x| in [1-1/32, 1+1/16), a slower but more accurate computation -// based om the same table design is performed. -// -// Main path is taken if | floor(log2(|log2(|x|)|) + floor(log2|y|) | < 8, -// to filter out all potential OF/UF cases. -// exp2(y*log2(x)) is computed using an 8-bit index table and a degree 5 -// polynomial -// -// Special cases: -// pow(-0,y) = -INF and raises the divide-by-zero exception for y an odd -// integer < 0. -// pow(-0,y) = +INF and raises the divide-by-zero exception for y < 0 and -// not an odd integer. -// pow(-0,y) = -0 for y an odd integer > 0. -// pow(-0,y) = +0 for y > 0 and not an odd integer. -// pow(-1,-INF) = NaN. -// pow(+1,y) = NaN for any y, even a NaN. -// pow(x,-0) = 1 for any x, even a NaN. -// pow(x,y) = a NaN and raises the invalid exception for finite x < 0 and -// finite non-integer y. -// pow(x,-INF) = +INF for |x|<1. -// pow(x,-INF) = +0 for |x|>1. -// pow(x,+INF) = +0 for |x|<1. -// pow(x,+INF) = +INF for |x|>1. -// pow(-INF,y) = -0 for y an odd integer < 0. -// pow(-INF,y) = +0 for y < 0 and not an odd integer. -// pow(-INF,y) = -INF for y an odd integer > 0. -// pow(-INF,y) = +INF for y > 0 and not an odd integer. -// pow(+INF,y) = +0 for y <0. -// pow(+INF,y) = +INF for y >0. -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_pow[] = -{ - 0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL, 0xf8000000UL, - 0xffffffffUL, 0x00000000UL, 0xfffff800UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0x00000000UL, 0x20000000UL, 0x3feff00aUL, 0x96621f95UL, - 0x3e5b1856UL, 0xe0000000UL, 0x3fefe019UL, 0xe5916f9eUL, 0xbe325278UL, - 0x00000000UL, 0x3fefd02fUL, 0x859a1062UL, 0x3e595fb7UL, 0xc0000000UL, - 0x3fefc049UL, 0xb245f18fUL, 0xbe529c38UL, 0xe0000000UL, 0x3fefb069UL, - 0xad2880a7UL, 0xbe501230UL, 0x60000000UL, 0x3fefa08fUL, 0xc8e72420UL, - 0x3e597bd1UL, 0x80000000UL, 0x3fef90baUL, 0xc30c4500UL, 0xbe5d6c75UL, - 0xe0000000UL, 0x3fef80eaUL, 0x02c63f43UL, 0x3e2e1318UL, 0xc0000000UL, - 0x3fef7120UL, 0xb3d4ccccUL, 0xbe44c52aUL, 0x00000000UL, 0x3fef615cUL, - 0xdbd91397UL, 0xbe4e7d6cUL, 0xa0000000UL, 0x3fef519cUL, 0x65c5cd68UL, - 0xbe522dc8UL, 0xa0000000UL, 0x3fef41e2UL, 0x46d1306cUL, 0xbe5a840eUL, - 0xe0000000UL, 0x3fef322dUL, 0xd2980e94UL, 0x3e5071afUL, 0xa0000000UL, - 0x3fef227eUL, 0x773abadeUL, 0xbe5891e5UL, 0xa0000000UL, 0x3fef12d4UL, - 0xdc6bf46bUL, 0xbe5cccbeUL, 0xe0000000UL, 0x3fef032fUL, 0xbc7247faUL, - 0xbe2bab83UL, 0x80000000UL, 0x3feef390UL, 0xbcaa1e46UL, 0xbe53bb3bUL, - 0x60000000UL, 0x3feee3f6UL, 0x5f6c682dUL, 0xbe54c619UL, 0x80000000UL, - 0x3feed461UL, 0x5141e368UL, 0xbe4b6d86UL, 0xe0000000UL, 0x3feec4d1UL, - 0xec678f76UL, 0xbe369af6UL, 0x80000000UL, 0x3feeb547UL, 0x41301f55UL, - 0xbe2d4312UL, 0x60000000UL, 0x3feea5c2UL, 0x676da6bdUL, 0xbe4d8dd0UL, - 0x60000000UL, 0x3fee9642UL, 0x57a891c4UL, 0x3e51f991UL, 0xa0000000UL, - 0x3fee86c7UL, 0xe4eb491eUL, 0x3e579bf9UL, 0x20000000UL, 0x3fee7752UL, - 0xfddc4a2cUL, 0xbe3356e6UL, 0xc0000000UL, 0x3fee67e1UL, 0xd75b5bf1UL, - 0xbe449531UL, 0x80000000UL, 0x3fee5876UL, 0xbd423b8eUL, 0x3df54fe4UL, - 0x60000000UL, 0x3fee4910UL, 0x330e51b9UL, 0x3e54289cUL, 0x80000000UL, - 0x3fee39afUL, 0x8651a95fUL, 0xbe55aad6UL, 0xa0000000UL, 0x3fee2a53UL, - 0x5e98c708UL, 0xbe2fc4a9UL, 0xe0000000UL, 0x3fee1afcUL, 0x0989328dUL, - 0x3e23958cUL, 0x40000000UL, 0x3fee0babUL, 0xee642abdUL, 0xbe425dd8UL, - 0xa0000000UL, 0x3fedfc5eUL, 0xc394d236UL, 0x3e526362UL, 0x20000000UL, - 0x3feded17UL, 0xe104aa8eUL, 0x3e4ce247UL, 0xc0000000UL, 0x3fedddd4UL, - 0x265a9be4UL, 0xbe5bb77aUL, 0x40000000UL, 0x3fedce97UL, 0x0ecac52fUL, - 0x3e4a7cb1UL, 0xe0000000UL, 0x3fedbf5eUL, 0x124cb3b8UL, 0x3e257024UL, - 0x80000000UL, 0x3fedb02bUL, 0xe6d4febeUL, 0xbe2033eeUL, 0x20000000UL, - 0x3feda0fdUL, 0x39cca00eUL, 0xbe3ddabcUL, 0xc0000000UL, 0x3fed91d3UL, - 0xef8a552aUL, 0xbe543390UL, 0x40000000UL, 0x3fed82afUL, 0xb8e85204UL, - 0x3e513850UL, 0xe0000000UL, 0x3fed738fUL, 0x3d59fe08UL, 0xbe5db728UL, - 0x40000000UL, 0x3fed6475UL, 0x3aa7ead1UL, 0x3e58804bUL, 0xc0000000UL, - 0x3fed555fUL, 0xf8a35ba9UL, 0xbe5298b0UL, 0x00000000UL, 0x3fed464fUL, - 0x9a88dd15UL, 0x3e5a8cdbUL, 0x40000000UL, 0x3fed3743UL, 0xb0b0a190UL, - 0x3e598635UL, 0x80000000UL, 0x3fed283cUL, 0xe2113295UL, 0xbe5c1119UL, - 0x80000000UL, 0x3fed193aUL, 0xafbf1728UL, 0xbe492e9cUL, 0x60000000UL, - 0x3fed0a3dUL, 0xe4a4ccf3UL, 0x3e19b90eUL, 0x20000000UL, 0x3fecfb45UL, - 0xba3cbeb8UL, 0x3e406b50UL, 0xc0000000UL, 0x3fecec51UL, 0x110f7dddUL, - 0x3e0d6806UL, 0x40000000UL, 0x3fecdd63UL, 0x7dd7d508UL, 0xbe5a8943UL, - 0x80000000UL, 0x3fecce79UL, 0x9b60f271UL, 0xbe50676aUL, 0x80000000UL, - 0x3fecbf94UL, 0x0b9ad660UL, 0x3e59174fUL, 0x60000000UL, 0x3fecb0b4UL, - 0x00823d9cUL, 0x3e5bbf72UL, 0x20000000UL, 0x3feca1d9UL, 0x38a6ec89UL, - 0xbe4d38f9UL, 0x80000000UL, 0x3fec9302UL, 0x3a0b7d8eUL, 0x3e53dbfdUL, - 0xc0000000UL, 0x3fec8430UL, 0xc6826b34UL, 0xbe27c5c9UL, 0xc0000000UL, - 0x3fec7563UL, 0x0c706381UL, 0xbe593653UL, 0x60000000UL, 0x3fec669bUL, - 0x7df34ec7UL, 0x3e461ab5UL, 0xe0000000UL, 0x3fec57d7UL, 0x40e5e7e8UL, - 0xbe5c3daeUL, 0x00000000UL, 0x3fec4919UL, 0x5602770fUL, 0xbe55219dUL, - 0xc0000000UL, 0x3fec3a5eUL, 0xec7911ebUL, 0x3e5a5d25UL, 0x60000000UL, - 0x3fec2ba9UL, 0xb39ea225UL, 0xbe53c00bUL, 0x80000000UL, 0x3fec1cf8UL, - 0x967a212eUL, 0x3e5a8ddfUL, 0x60000000UL, 0x3fec0e4cUL, 0x580798bdUL, - 0x3e5f53abUL, 0x00000000UL, 0x3febffa5UL, 0xb8282df6UL, 0xbe46b874UL, - 0x20000000UL, 0x3febf102UL, 0xe33a6729UL, 0x3e54963fUL, 0x00000000UL, - 0x3febe264UL, 0x3b53e88aUL, 0xbe3adce1UL, 0x60000000UL, 0x3febd3caUL, - 0xc2585084UL, 0x3e5cde9fUL, 0x80000000UL, 0x3febc535UL, 0xa335c5eeUL, - 0xbe39fd9cUL, 0x20000000UL, 0x3febb6a5UL, 0x7325b04dUL, 0x3e42ba15UL, - 0x60000000UL, 0x3feba819UL, 0x1564540fUL, 0x3e3a9f35UL, 0x40000000UL, - 0x3feb9992UL, 0x83fff592UL, 0xbe5465ceUL, 0xa0000000UL, 0x3feb8b0fUL, - 0xb9da63d3UL, 0xbe4b1a0aUL, 0x80000000UL, 0x3feb7c91UL, 0x6d6f1ea4UL, - 0x3e557657UL, 0x00000000UL, 0x3feb6e18UL, 0x5e80a1bfUL, 0x3e4ddbb6UL, - 0x00000000UL, 0x3feb5fa3UL, 0x1c9eacb5UL, 0x3e592877UL, 0xa0000000UL, - 0x3feb5132UL, 0x6d40beb3UL, 0xbe51858cUL, 0xa0000000UL, 0x3feb42c6UL, - 0xd740c67bUL, 0x3e427ad2UL, 0x40000000UL, 0x3feb345fUL, 0xa3e0cceeUL, - 0xbe5c2fc4UL, 0x40000000UL, 0x3feb25fcUL, 0x8e752b50UL, 0xbe3da3c2UL, - 0xc0000000UL, 0x3feb179dUL, 0xa892e7deUL, 0x3e1fb481UL, 0xc0000000UL, - 0x3feb0943UL, 0x21ed71e9UL, 0xbe365206UL, 0x20000000UL, 0x3feafaeeUL, - 0x0e1380a3UL, 0x3e5c5b7bUL, 0x20000000UL, 0x3feaec9dUL, 0x3c3d640eUL, - 0xbe5dbbd0UL, 0x60000000UL, 0x3feade50UL, 0x8f97a715UL, 0x3e3a8ec5UL, - 0x20000000UL, 0x3fead008UL, 0x23ab2839UL, 0x3e2fe98aUL, 0x40000000UL, - 0x3feac1c4UL, 0xf4bbd50fUL, 0x3e54d8f6UL, 0xe0000000UL, 0x3feab384UL, - 0x14757c4dUL, 0xbe48774cUL, 0xc0000000UL, 0x3feaa549UL, 0x7c7b0eeaUL, - 0x3e5b51bbUL, 0x20000000UL, 0x3fea9713UL, 0xf56f7013UL, 0x3e386200UL, - 0xe0000000UL, 0x3fea88e0UL, 0xbe428ebeUL, 0xbe514af5UL, 0xe0000000UL, - 0x3fea7ab2UL, 0x8d0e4496UL, 0x3e4f9165UL, 0x60000000UL, 0x3fea6c89UL, - 0xdbacc5d5UL, 0xbe5c063bUL, 0x20000000UL, 0x3fea5e64UL, 0x3f19d970UL, - 0xbe5a0c8cUL, 0x20000000UL, 0x3fea5043UL, 0x09ea3e6bUL, 0x3e5065dcUL, - 0x80000000UL, 0x3fea4226UL, 0x78df246cUL, 0x3e5e05f6UL, 0x40000000UL, - 0x3fea340eUL, 0x4057d4a0UL, 0x3e431b2bUL, 0x40000000UL, 0x3fea25faUL, - 0x82867bb5UL, 0x3e4b76beUL, 0xa0000000UL, 0x3fea17eaUL, 0x9436f40aUL, - 0xbe5aad39UL, 0x20000000UL, 0x3fea09dfUL, 0x4b5253b3UL, 0x3e46380bUL, - 0x00000000UL, 0x3fe9fbd8UL, 0x8fc52466UL, 0xbe386f9bUL, 0x20000000UL, - 0x3fe9edd5UL, 0x22d3f344UL, 0xbe538347UL, 0x60000000UL, 0x3fe9dfd6UL, - 0x1ac33522UL, 0x3e5dbc53UL, 0x00000000UL, 0x3fe9d1dcUL, 0xeabdff1dUL, - 0x3e40fc0cUL, 0xe0000000UL, 0x3fe9c3e5UL, 0xafd30e73UL, 0xbe585e63UL, - 0xe0000000UL, 0x3fe9b5f3UL, 0xa52f226aUL, 0xbe43e8f9UL, 0x20000000UL, - 0x3fe9a806UL, 0xecb8698dUL, 0xbe515b36UL, 0x80000000UL, 0x3fe99a1cUL, - 0xf2b4e89dUL, 0x3e48b62bUL, 0x20000000UL, 0x3fe98c37UL, 0x7c9a88fbUL, - 0x3e44414cUL, 0x00000000UL, 0x3fe97e56UL, 0xda015741UL, 0xbe5d13baUL, - 0xe0000000UL, 0x3fe97078UL, 0x5fdace06UL, 0x3e51b947UL, 0x00000000UL, - 0x3fe962a0UL, 0x956ca094UL, 0x3e518785UL, 0x40000000UL, 0x3fe954cbUL, - 0x01164c1dUL, 0x3e5d5b57UL, 0xc0000000UL, 0x3fe946faUL, 0xe63b3767UL, - 0xbe4f84e7UL, 0x40000000UL, 0x3fe9392eUL, 0xe57cc2a9UL, 0x3e34eda3UL, - 0xe0000000UL, 0x3fe92b65UL, 0x8c75b544UL, 0x3e5766a0UL, 0xc0000000UL, - 0x3fe91da1UL, 0x37d1d087UL, 0xbe5e2ab1UL, 0x80000000UL, 0x3fe90fe1UL, - 0xa953dc20UL, 0x3e5fa1f3UL, 0x80000000UL, 0x3fe90225UL, 0xdbd3f369UL, - 0x3e47d6dbUL, 0xa0000000UL, 0x3fe8f46dUL, 0x1c9be989UL, 0xbe5e2b0aUL, - 0xa0000000UL, 0x3fe8e6b9UL, 0x3c93d76aUL, 0x3e5c8618UL, 0xe0000000UL, - 0x3fe8d909UL, 0x2182fc9aUL, 0xbe41aa9eUL, 0x20000000UL, 0x3fe8cb5eUL, - 0xe6b3539dUL, 0xbe530d19UL, 0x60000000UL, 0x3fe8bdb6UL, 0x49e58cc3UL, - 0xbe3bb374UL, 0xa0000000UL, 0x3fe8b012UL, 0xa7cfeb8fUL, 0x3e56c412UL, - 0x00000000UL, 0x3fe8a273UL, 0x8d52bc19UL, 0x3e1429b8UL, 0x60000000UL, - 0x3fe894d7UL, 0x4dc32c6cUL, 0xbe48604cUL, 0xc0000000UL, 0x3fe8873fUL, - 0x0c868e56UL, 0xbe564ee5UL, 0x00000000UL, 0x3fe879acUL, 0x56aee828UL, - 0x3e5e2fd8UL, 0x60000000UL, 0x3fe86c1cUL, 0x7ceab8ecUL, 0x3e493365UL, - 0xc0000000UL, 0x3fe85e90UL, 0x78d4dadcUL, 0xbe4f7f25UL, 0x00000000UL, - 0x3fe85109UL, 0x0ccd8280UL, 0x3e31e7a2UL, 0x40000000UL, 0x3fe84385UL, - 0x34ba4e15UL, 0x3e328077UL, 0x80000000UL, 0x3fe83605UL, 0xa670975aUL, - 0xbe53eee5UL, 0xa0000000UL, 0x3fe82889UL, 0xf61b77b2UL, 0xbe43a20aUL, - 0xa0000000UL, 0x3fe81b11UL, 0x13e6643bUL, 0x3e5e5fe5UL, 0xc0000000UL, - 0x3fe80d9dUL, 0x82cc94e8UL, 0xbe5ff1f9UL, 0xa0000000UL, 0x3fe8002dUL, - 0x8a0c9c5dUL, 0xbe42b0e7UL, 0x60000000UL, 0x3fe7f2c1UL, 0x22a16f01UL, - 0x3e5d9ea0UL, 0x20000000UL, 0x3fe7e559UL, 0xc38cd451UL, 0x3e506963UL, - 0xc0000000UL, 0x3fe7d7f4UL, 0x9902bc71UL, 0x3e4503d7UL, 0x40000000UL, - 0x3fe7ca94UL, 0xdef2a3c0UL, 0x3e3d98edUL, 0xa0000000UL, 0x3fe7bd37UL, - 0xed49abb0UL, 0x3e24c1ffUL, 0xe0000000UL, 0x3fe7afdeUL, 0xe3b0be70UL, - 0xbe40c467UL, 0x00000000UL, 0x3fe7a28aUL, 0xaf9f193cUL, 0xbe5dff6cUL, - 0xe0000000UL, 0x3fe79538UL, 0xb74cf6b6UL, 0xbe258ed0UL, 0xa0000000UL, - 0x3fe787ebUL, 0x1d9127c7UL, 0x3e345fb0UL, 0x40000000UL, 0x3fe77aa2UL, - 0x1028c21dUL, 0xbe4619bdUL, 0xa0000000UL, 0x3fe76d5cUL, 0x7cb0b5e4UL, - 0x3e40f1a2UL, 0xe0000000UL, 0x3fe7601aUL, 0x2b1bc4adUL, 0xbe32e8bbUL, - 0xe0000000UL, 0x3fe752dcUL, 0x6839f64eUL, 0x3e41f57bUL, 0xc0000000UL, - 0x3fe745a2UL, 0xc4121f7eUL, 0xbe52c40aUL, 0x60000000UL, 0x3fe7386cUL, - 0xd6852d72UL, 0xbe5c4e6bUL, 0xc0000000UL, 0x3fe72b39UL, 0x91d690f7UL, - 0xbe57f88fUL, 0xe0000000UL, 0x3fe71e0aUL, 0x627a2159UL, 0xbe4425d5UL, - 0xc0000000UL, 0x3fe710dfUL, 0x50a54033UL, 0x3e422b7eUL, 0x60000000UL, - 0x3fe703b8UL, 0x3b0b5f91UL, 0x3e5d3857UL, 0xe0000000UL, 0x3fe6f694UL, - 0x84d628a2UL, 0xbe51f090UL, 0x00000000UL, 0x3fe6e975UL, 0x306d8894UL, - 0xbe414d83UL, 0xe0000000UL, 0x3fe6dc58UL, 0x30bf24aaUL, 0xbe4650caUL, - 0x80000000UL, 0x3fe6cf40UL, 0xd4628d69UL, 0xbe5db007UL, 0xc0000000UL, - 0x3fe6c22bUL, 0xa2aae57bUL, 0xbe31d279UL, 0xc0000000UL, 0x3fe6b51aUL, - 0x860edf7eUL, 0xbe2d4c4aUL, 0x80000000UL, 0x3fe6a80dUL, 0xf3559341UL, - 0xbe5f7e98UL, 0xe0000000UL, 0x3fe69b03UL, 0xa885899eUL, 0xbe5c2011UL, - 0xe0000000UL, 0x3fe68dfdUL, 0x2bdc6d37UL, 0x3e224a82UL, 0xa0000000UL, - 0x3fe680fbUL, 0xc12ad1b9UL, 0xbe40cf56UL, 0x00000000UL, 0x3fe673fdUL, - 0x1bcdf659UL, 0xbdf52f2dUL, 0x00000000UL, 0x3fe66702UL, 0x5df10408UL, - 0x3e5663e0UL, 0xc0000000UL, 0x3fe65a0aUL, 0xa4070568UL, 0xbe40b12fUL, - 0x00000000UL, 0x3fe64d17UL, 0x71c54c47UL, 0x3e5f5e8bUL, 0x00000000UL, - 0x3fe64027UL, 0xbd4b7e83UL, 0x3e42ead6UL, 0xa0000000UL, 0x3fe6333aUL, - 0x61598bd2UL, 0xbe4c48d4UL, 0xc0000000UL, 0x3fe62651UL, 0x6f538d61UL, - 0x3e548401UL, 0xa0000000UL, 0x3fe6196cUL, 0x14344120UL, 0xbe529af6UL, - 0x00000000UL, 0x3fe60c8bUL, 0x5982c587UL, 0xbe3e1e4fUL, 0x00000000UL, - 0x3fe5ffadUL, 0xfe51d4eaUL, 0xbe4c897aUL, 0x80000000UL, 0x3fe5f2d2UL, - 0xfd46ebe1UL, 0x3e552e00UL, 0xa0000000UL, 0x3fe5e5fbUL, 0xa4695699UL, - 0x3e5ed471UL, 0x60000000UL, 0x3fe5d928UL, 0x80d118aeUL, 0x3e456b61UL, - 0xa0000000UL, 0x3fe5cc58UL, 0x304c330bUL, 0x3e54dc29UL, 0x80000000UL, - 0x3fe5bf8cUL, 0x0af2dedfUL, 0xbe3aa9bdUL, 0xe0000000UL, 0x3fe5b2c3UL, - 0x15fc9258UL, 0xbe479a37UL, 0xc0000000UL, 0x3fe5a5feUL, 0x9292c7eaUL, - 0x3e188650UL, 0x20000000UL, 0x3fe5993dUL, 0x33b4d380UL, 0x3e5d6d93UL, - 0x20000000UL, 0x3fe58c7fUL, 0x02fd16c7UL, 0x3e2fe961UL, 0xa0000000UL, - 0x3fe57fc4UL, 0x4a05edb6UL, 0xbe4d55b4UL, 0xa0000000UL, 0x3fe5730dUL, - 0x3d443abbUL, 0xbe5e6954UL, 0x00000000UL, 0x3fe5665aUL, 0x024acfeaUL, - 0x3e50e61bUL, 0x00000000UL, 0x3fe559aaUL, 0xcc9edd09UL, 0xbe325403UL, - 0x60000000UL, 0x3fe54cfdUL, 0x1fe26950UL, 0x3e5d500eUL, 0x60000000UL, - 0x3fe54054UL, 0x6c5ae164UL, 0xbe4a79b4UL, 0xc0000000UL, 0x3fe533aeUL, - 0x154b0287UL, 0xbe401571UL, 0xa0000000UL, 0x3fe5270cUL, 0x0673f401UL, - 0xbe56e56bUL, 0xe0000000UL, 0x3fe51a6dUL, 0x751b639cUL, 0x3e235269UL, - 0xa0000000UL, 0x3fe50dd2UL, 0x7c7b2bedUL, 0x3ddec887UL, 0xc0000000UL, - 0x3fe5013aUL, 0xafab4e17UL, 0x3e5e7575UL, 0x60000000UL, 0x3fe4f4a6UL, - 0x2e308668UL, 0x3e59aed6UL, 0x80000000UL, 0x3fe4e815UL, 0xf33e2a76UL, - 0xbe51f184UL, 0xe0000000UL, 0x3fe4db87UL, 0x839f3e3eUL, 0x3e57db01UL, - 0xc0000000UL, 0x3fe4cefdUL, 0xa9eda7bbUL, 0x3e535e0fUL, 0x00000000UL, - 0x3fe4c277UL, 0x2a8f66a5UL, 0x3e5ce451UL, 0xc0000000UL, 0x3fe4b5f3UL, - 0x05192456UL, 0xbe4e8518UL, 0xc0000000UL, 0x3fe4a973UL, 0x4aa7cd1dUL, - 0x3e46784aUL, 0x40000000UL, 0x3fe49cf7UL, 0x8e23025eUL, 0xbe5749f2UL, - 0x00000000UL, 0x3fe4907eUL, 0x18d30215UL, 0x3e360f39UL, 0x20000000UL, - 0x3fe48408UL, 0x63dcf2f3UL, 0x3e5e00feUL, 0xc0000000UL, 0x3fe47795UL, - 0x46182d09UL, 0xbe5173d9UL, 0xa0000000UL, 0x3fe46b26UL, 0x8f0e62aaUL, - 0xbe48f281UL, 0xe0000000UL, 0x3fe45ebaUL, 0x5775c40cUL, 0xbe56aad4UL, - 0x60000000UL, 0x3fe45252UL, 0x0fe25f69UL, 0x3e48bd71UL, 0x40000000UL, - 0x3fe445edUL, 0xe9989ec5UL, 0x3e590d97UL, 0x80000000UL, 0x3fe4398bUL, - 0xb3d9ffe3UL, 0x3e479dbcUL, 0x20000000UL, 0x3fe42d2dUL, 0x388e4d2eUL, - 0xbe5eed80UL, 0xe0000000UL, 0x3fe420d1UL, 0x6f797c18UL, 0x3e554b4cUL, - 0x20000000UL, 0x3fe4147aUL, 0x31048bb4UL, 0xbe5b1112UL, 0x80000000UL, - 0x3fe40825UL, 0x2efba4f9UL, 0x3e48ebc7UL, 0x40000000UL, 0x3fe3fbd4UL, - 0x50201119UL, 0x3e40b701UL, 0x40000000UL, 0x3fe3ef86UL, 0x0a4db32cUL, - 0x3e551de8UL, 0xa0000000UL, 0x3fe3e33bUL, 0x0c9c148bUL, 0xbe50c1f6UL, - 0x20000000UL, 0x3fe3d6f4UL, 0xc9129447UL, 0x3e533fa0UL, 0x00000000UL, - 0x3fe3cab0UL, 0xaae5b5a0UL, 0xbe22b68eUL, 0x20000000UL, 0x3fe3be6fUL, - 0x02305e8aUL, 0xbe54fc08UL, 0x60000000UL, 0x3fe3b231UL, 0x7f908258UL, - 0x3e57dc05UL, 0x00000000UL, 0x3fe3a5f7UL, 0x1a09af78UL, 0x3e08038bUL, - 0xe0000000UL, 0x3fe399bfUL, 0x490643c1UL, 0xbe5dbe42UL, 0xe0000000UL, - 0x3fe38d8bUL, 0x5e8ad724UL, 0xbe3c2b72UL, 0x20000000UL, 0x3fe3815bUL, - 0xc67196b6UL, 0x3e1713cfUL, 0xa0000000UL, 0x3fe3752dUL, 0x6182e429UL, - 0xbe3ec14cUL, 0x40000000UL, 0x3fe36903UL, 0xab6eb1aeUL, 0x3e5a2cc5UL, - 0x40000000UL, 0x3fe35cdcUL, 0xfe5dc064UL, 0xbe5c5878UL, 0x40000000UL, - 0x3fe350b8UL, 0x0ba6b9e4UL, 0x3e51619bUL, 0x80000000UL, 0x3fe34497UL, - 0x857761aaUL, 0x3e5fff53UL, 0x00000000UL, 0x3fe3387aUL, 0xf872d68cUL, - 0x3e484f4dUL, 0xa0000000UL, 0x3fe32c5fUL, 0x087e97c2UL, 0x3e52842eUL, - 0x80000000UL, 0x3fe32048UL, 0x73d6d0c0UL, 0xbe503edfUL, 0x80000000UL, - 0x3fe31434UL, 0x0c1456a1UL, 0xbe5f72adUL, 0xa0000000UL, 0x3fe30823UL, - 0x83a1a4d5UL, 0xbe5e65ccUL, 0xe0000000UL, 0x3fe2fc15UL, 0x855a7390UL, - 0xbe506438UL, 0x40000000UL, 0x3fe2f00bUL, 0xa2898287UL, 0x3e3d22a2UL, - 0xe0000000UL, 0x3fe2e403UL, 0x8b56f66fUL, 0xbe5aa5fdUL, 0x80000000UL, - 0x3fe2d7ffUL, 0x52db119aUL, 0x3e3a2e3dUL, 0x60000000UL, 0x3fe2cbfeUL, - 0xe2ddd4c0UL, 0xbe586469UL, 0x40000000UL, 0x3fe2c000UL, 0x6b01bf10UL, - 0x3e352b9dUL, 0x40000000UL, 0x3fe2b405UL, 0xb07a1cdfUL, 0x3e5c5cdaUL, - 0x80000000UL, 0x3fe2a80dUL, 0xc7b5f868UL, 0xbe5668b3UL, 0xc0000000UL, - 0x3fe29c18UL, 0x185edf62UL, 0xbe563d66UL, 0x00000000UL, 0x3fe29027UL, - 0xf729e1ccUL, 0x3e59a9a0UL, 0x80000000UL, 0x3fe28438UL, 0x6433c727UL, - 0xbe43cc89UL, 0x00000000UL, 0x3fe2784dUL, 0x41782631UL, 0xbe30750cUL, - 0xa0000000UL, 0x3fe26c64UL, 0x914911b7UL, 0xbe58290eUL, 0x40000000UL, - 0x3fe2607fUL, 0x3dcc73e1UL, 0xbe4269cdUL, 0x00000000UL, 0x3fe2549dUL, - 0x2751bf70UL, 0xbe5a6998UL, 0xc0000000UL, 0x3fe248bdUL, 0x4248b9fbUL, - 0xbe4ddb00UL, 0x80000000UL, 0x3fe23ce1UL, 0xf35cf82fUL, 0x3e561b71UL, - 0x60000000UL, 0x3fe23108UL, 0x8e481a2dUL, 0x3e518fb9UL, 0x60000000UL, - 0x3fe22532UL, 0x5ab96edcUL, 0xbe5fafc5UL, 0x40000000UL, 0x3fe2195fUL, - 0x80943911UL, 0xbe07f819UL, 0x40000000UL, 0x3fe20d8fUL, 0x386f2d6cUL, - 0xbe54ba8bUL, 0x40000000UL, 0x3fe201c2UL, 0xf29664acUL, 0xbe5eb815UL, - 0x20000000UL, 0x3fe1f5f8UL, 0x64f03390UL, 0x3e5e320cUL, 0x20000000UL, - 0x3fe1ea31UL, 0x747ff696UL, 0x3e5ef0a5UL, 0x40000000UL, 0x3fe1de6dUL, - 0x3e9ceb51UL, 0xbe5f8d27UL, 0x20000000UL, 0x3fe1d2acUL, 0x4ae0b55eUL, - 0x3e5faa21UL, 0x20000000UL, 0x3fe1c6eeUL, 0x28569a5eUL, 0x3e598a4fUL, - 0x20000000UL, 0x3fe1bb33UL, 0x54b33e07UL, 0x3e46130aUL, 0x20000000UL, - 0x3fe1af7bUL, 0x024f1078UL, 0xbe4dbf93UL, 0x00000000UL, 0x3fe1a3c6UL, - 0xb0783bfaUL, 0x3e419248UL, 0xe0000000UL, 0x3fe19813UL, 0x2f02b836UL, - 0x3e4e02b7UL, 0xc0000000UL, 0x3fe18c64UL, 0x28dec9d4UL, 0x3e09064fUL, - 0x80000000UL, 0x3fe180b8UL, 0x45cbf406UL, 0x3e5b1f46UL, 0x40000000UL, - 0x3fe1750fUL, 0x03d9964cUL, 0x3e5b0a79UL, 0x00000000UL, 0x3fe16969UL, - 0x8b5b882bUL, 0xbe238086UL, 0xa0000000UL, 0x3fe15dc5UL, 0x73bad6f8UL, - 0xbdf1fca4UL, 0x20000000UL, 0x3fe15225UL, 0x5385769cUL, 0x3e5e8d76UL, - 0xa0000000UL, 0x3fe14687UL, 0x1676dc6bUL, 0x3e571d08UL, 0x20000000UL, - 0x3fe13aedUL, 0xa8c41c7fUL, 0xbe598a25UL, 0x60000000UL, 0x3fe12f55UL, - 0xc4e1aaf0UL, 0x3e435277UL, 0xa0000000UL, 0x3fe123c0UL, 0x403638e1UL, - 0xbe21aa7cUL, 0xc0000000UL, 0x3fe1182eUL, 0x557a092bUL, 0xbdd0116bUL, - 0xc0000000UL, 0x3fe10c9fUL, 0x7d779f66UL, 0x3e4a61baUL, 0xc0000000UL, - 0x3fe10113UL, 0x2b09c645UL, 0xbe5d586eUL, 0x20000000UL, 0x3fe0ea04UL, - 0xea2cad46UL, 0x3e5aa97cUL, 0x20000000UL, 0x3fe0d300UL, 0x23190e54UL, - 0x3e50f1a7UL, 0xa0000000UL, 0x3fe0bc07UL, 0x1379a5a6UL, 0xbe51619dUL, - 0x60000000UL, 0x3fe0a51aUL, 0x926a3d4aUL, 0x3e5cf019UL, 0xa0000000UL, - 0x3fe08e38UL, 0xa8c24358UL, 0x3e35241eUL, 0x20000000UL, 0x3fe07762UL, - 0x24317e7aUL, 0x3e512cfaUL, 0x00000000UL, 0x3fe06097UL, 0xfd9cf274UL, - 0xbe55bef3UL, 0x00000000UL, 0x3fe049d7UL, 0x3689b49dUL, 0xbe36d26dUL, - 0x40000000UL, 0x3fe03322UL, 0xf72ef6c4UL, 0xbe54cd08UL, 0xa0000000UL, - 0x3fe01c78UL, 0x23702d2dUL, 0xbe5900bfUL, 0x00000000UL, 0x3fe005daUL, - 0x3f59c14cUL, 0x3e57d80bUL, 0x40000000UL, 0x3fdfde8dUL, 0xad67766dUL, - 0xbe57fad4UL, 0x40000000UL, 0x3fdfb17cUL, 0x644f4ae7UL, 0x3e1ee43bUL, - 0x40000000UL, 0x3fdf8481UL, 0x903234d2UL, 0x3e501a86UL, 0x40000000UL, - 0x3fdf579cUL, 0xafe9e509UL, 0xbe267c3eUL, 0x00000000UL, 0x3fdf2acdUL, - 0xb7dfda0bUL, 0xbe48149bUL, 0x40000000UL, 0x3fdefe13UL, 0x3b94305eUL, - 0x3e5f4ea7UL, 0x80000000UL, 0x3fded16fUL, 0x5d95da61UL, 0xbe55c198UL, - 0x00000000UL, 0x3fdea4e1UL, 0x406960c9UL, 0xbdd99a19UL, 0x00000000UL, - 0x3fde7868UL, 0xd22f3539UL, 0x3e470c78UL, 0x80000000UL, 0x3fde4c04UL, - 0x83eec535UL, 0xbe3e1232UL, 0x40000000UL, 0x3fde1fb6UL, 0x3dfbffcbUL, - 0xbe4b7d71UL, 0x40000000UL, 0x3fddf37dUL, 0x7e1be4e0UL, 0xbe5b8f8fUL, - 0x40000000UL, 0x3fddc759UL, 0x46dae887UL, 0xbe350458UL, 0x80000000UL, - 0x3fdd9b4aUL, 0xed6ecc49UL, 0xbe5f0045UL, 0x80000000UL, 0x3fdd6f50UL, - 0x2e9e883cUL, 0x3e2915daUL, 0x80000000UL, 0x3fdd436bUL, 0xf0bccb32UL, - 0x3e4a68c9UL, 0x80000000UL, 0x3fdd179bUL, 0x9bbfc779UL, 0xbe54a26aUL, - 0x00000000UL, 0x3fdcebe0UL, 0x7cea33abUL, 0x3e43c6b7UL, 0x40000000UL, - 0x3fdcc039UL, 0xe740fd06UL, 0x3e5526c2UL, 0x40000000UL, 0x3fdc94a7UL, - 0x9eadeb1aUL, 0xbe396d8dUL, 0xc0000000UL, 0x3fdc6929UL, 0xf0a8f95aUL, - 0xbe5c0ab2UL, 0x80000000UL, 0x3fdc3dc0UL, 0x6ee2693bUL, 0x3e0992e6UL, - 0xc0000000UL, 0x3fdc126bUL, 0x5ac6b581UL, 0xbe2834b6UL, 0x40000000UL, - 0x3fdbe72bUL, 0x8cc226ffUL, 0x3e3596a6UL, 0x00000000UL, 0x3fdbbbffUL, - 0xf92a74bbUL, 0x3e3c5813UL, 0x00000000UL, 0x3fdb90e7UL, 0x479664c0UL, - 0xbe50d644UL, 0x00000000UL, 0x3fdb65e3UL, 0x5004975bUL, 0xbe55258fUL, - 0x00000000UL, 0x3fdb3af3UL, 0xe4b23194UL, 0xbe588407UL, 0xc0000000UL, - 0x3fdb1016UL, 0xe65d4d0aUL, 0x3e527c26UL, 0x80000000UL, 0x3fdae54eUL, - 0x814fddd6UL, 0x3e5962a2UL, 0x40000000UL, 0x3fdaba9aUL, 0xe19d0913UL, - 0xbe562f4eUL, 0x80000000UL, 0x3fda8ff9UL, 0x43cfd006UL, 0xbe4cfdebUL, - 0x40000000UL, 0x3fda656cUL, 0x686f0a4eUL, 0x3e5e47a8UL, 0xc0000000UL, - 0x3fda3af2UL, 0x7200d410UL, 0x3e5e1199UL, 0xc0000000UL, 0x3fda108cUL, - 0xabd2266eUL, 0x3e5ee4d1UL, 0x40000000UL, 0x3fd9e63aUL, 0x396f8f2cUL, - 0x3e4dbffbUL, 0x00000000UL, 0x3fd9bbfbUL, 0xe32b25ddUL, 0x3e5c3a54UL, - 0x40000000UL, 0x3fd991cfUL, 0x431e4035UL, 0xbe457925UL, 0x80000000UL, - 0x3fd967b6UL, 0x7bed3dd3UL, 0x3e40c61dUL, 0x00000000UL, 0x3fd93db1UL, - 0xd7449365UL, 0x3e306419UL, 0x80000000UL, 0x3fd913beUL, 0x1746e791UL, - 0x3e56fcfcUL, 0x40000000UL, 0x3fd8e9dfUL, 0xf3a9028bUL, 0xbe5041b9UL, - 0xc0000000UL, 0x3fd8c012UL, 0x56840c50UL, 0xbe26e20aUL, 0x40000000UL, - 0x3fd89659UL, 0x19763102UL, 0xbe51f466UL, 0x80000000UL, 0x3fd86cb2UL, - 0x7032de7cUL, 0xbe4d298aUL, 0x80000000UL, 0x3fd8431eUL, 0xdeb39fabUL, - 0xbe4361ebUL, 0x40000000UL, 0x3fd8199dUL, 0x5d01cbe0UL, 0xbe5425b3UL, - 0x80000000UL, 0x3fd7f02eUL, 0x3ce99aa9UL, 0x3e146fa8UL, 0x80000000UL, - 0x3fd7c6d2UL, 0xd1a262b9UL, 0xbe5a1a69UL, 0xc0000000UL, 0x3fd79d88UL, - 0x8606c236UL, 0x3e423a08UL, 0x80000000UL, 0x3fd77451UL, 0x8fd1e1b7UL, - 0x3e5a6a63UL, 0xc0000000UL, 0x3fd74b2cUL, 0xe491456aUL, 0x3e42c1caUL, - 0x40000000UL, 0x3fd7221aUL, 0x4499a6d7UL, 0x3e36a69aUL, 0x00000000UL, - 0x3fd6f91aUL, 0x5237df94UL, 0xbe0f8f02UL, 0x00000000UL, 0x3fd6d02cUL, - 0xb6482c6eUL, 0xbe5abcf7UL, 0x00000000UL, 0x3fd6a750UL, 0x1919fd61UL, - 0xbe57ade2UL, 0x00000000UL, 0x3fd67e86UL, 0xaa7a994dUL, 0xbe3f3fbdUL, - 0x00000000UL, 0x3fd655ceUL, 0x67db014cUL, 0x3e33c550UL, 0x00000000UL, - 0x3fd62d28UL, 0xa82856b7UL, 0xbe1409d1UL, 0xc0000000UL, 0x3fd60493UL, - 0x1e6a300dUL, 0x3e55d899UL, 0x80000000UL, 0x3fd5dc11UL, 0x1222bd5cUL, - 0xbe35bfc0UL, 0xc0000000UL, 0x3fd5b3a0UL, 0x6e8dc2d3UL, 0x3e5d4d79UL, - 0x00000000UL, 0x3fd58b42UL, 0xe0e4ace6UL, 0xbe517303UL, 0x80000000UL, - 0x3fd562f4UL, 0xb306e0a8UL, 0x3e5edf0fUL, 0xc0000000UL, 0x3fd53ab8UL, - 0x6574bc54UL, 0x3e5ee859UL, 0x80000000UL, 0x3fd5128eUL, 0xea902207UL, - 0x3e5f6188UL, 0xc0000000UL, 0x3fd4ea75UL, 0x9f911d79UL, 0x3e511735UL, - 0x80000000UL, 0x3fd4c26eUL, 0xf9c77397UL, 0xbe5b1643UL, 0x40000000UL, - 0x3fd49a78UL, 0x15fc9258UL, 0x3e479a37UL, 0x80000000UL, 0x3fd47293UL, - 0xd5a04dd9UL, 0xbe426e56UL, 0xc0000000UL, 0x3fd44abfUL, 0xe04042f5UL, - 0x3e56f7c6UL, 0x40000000UL, 0x3fd422fdUL, 0x1d8bf2c8UL, 0x3e5d8810UL, - 0x00000000UL, 0x3fd3fb4cUL, 0x88a8ddeeUL, 0xbe311454UL, 0xc0000000UL, - 0x3fd3d3abUL, 0x3e3b5e47UL, 0xbe5d1b72UL, 0x40000000UL, 0x3fd3ac1cUL, - 0xc2ab5d59UL, 0x3e31b02bUL, 0xc0000000UL, 0x3fd3849dUL, 0xd4e34b9eUL, - 0x3e51cb2fUL, 0x40000000UL, 0x3fd35d30UL, 0x177204fbUL, 0xbe2b8cd7UL, - 0x80000000UL, 0x3fd335d3UL, 0xfcd38c82UL, 0xbe4356e1UL, 0x80000000UL, - 0x3fd30e87UL, 0x64f54accUL, 0xbe4e6224UL, 0x00000000UL, 0x3fd2e74cUL, - 0xaa7975d9UL, 0x3e5dc0feUL, 0x80000000UL, 0x3fd2c021UL, 0x516dab3fUL, - 0xbe50ffa3UL, 0x40000000UL, 0x3fd29907UL, 0x2bfb7313UL, 0x3e5674a2UL, - 0xc0000000UL, 0x3fd271fdUL, 0x0549fc99UL, 0x3e385d29UL, 0xc0000000UL, - 0x3fd24b04UL, 0x55b63073UL, 0xbe500c6dUL, 0x00000000UL, 0x3fd2241cUL, - 0x3f91953aUL, 0x3e389977UL, 0xc0000000UL, 0x3fd1fd43UL, 0xa1543f71UL, - 0xbe3487abUL, 0xc0000000UL, 0x3fd1d67bUL, 0x4ec8867cUL, 0x3df6a2dcUL, - 0x00000000UL, 0x3fd1afc4UL, 0x4328e3bbUL, 0x3e41d9c0UL, 0x80000000UL, - 0x3fd1891cUL, 0x2e1cda84UL, 0x3e3bdd87UL, 0x40000000UL, 0x3fd16285UL, - 0x4b5331aeUL, 0xbe53128eUL, 0x00000000UL, 0x3fd13bfeUL, 0xb9aec164UL, - 0xbe52ac98UL, 0xc0000000UL, 0x3fd11586UL, 0xd91e1316UL, 0xbe350630UL, - 0x80000000UL, 0x3fd0ef1fUL, 0x7cacc12cUL, 0x3e3f5219UL, 0x40000000UL, - 0x3fd0c8c8UL, 0xbce277b7UL, 0x3e3d30c0UL, 0x00000000UL, 0x3fd0a281UL, - 0x2a63447dUL, 0xbe541377UL, 0x80000000UL, 0x3fd07c49UL, 0xfac483b5UL, - 0xbe5772ecUL, 0xc0000000UL, 0x3fd05621UL, 0x36b8a570UL, 0xbe4fd4bdUL, - 0xc0000000UL, 0x3fd03009UL, 0xbae505f7UL, 0xbe450388UL, 0x80000000UL, - 0x3fd00a01UL, 0x3e35aeadUL, 0xbe5430fcUL, 0x80000000UL, 0x3fcfc811UL, - 0x707475acUL, 0x3e38806eUL, 0x80000000UL, 0x3fcf7c3fUL, 0xc91817fcUL, - 0xbe40cceaUL, 0x80000000UL, 0x3fcf308cUL, 0xae05d5e9UL, 0xbe4919b8UL, - 0x80000000UL, 0x3fcee4f8UL, 0xae6cc9e6UL, 0xbe530b94UL, 0x00000000UL, - 0x3fce9983UL, 0x1efe3e8eUL, 0x3e57747eUL, 0x00000000UL, 0x3fce4e2dUL, - 0xda78d9bfUL, 0xbe59a608UL, 0x00000000UL, 0x3fce02f5UL, 0x8abe2c2eUL, - 0x3e4a35adUL, 0x00000000UL, 0x3fcdb7dcUL, 0x1495450dUL, 0xbe0872ccUL, - 0x80000000UL, 0x3fcd6ce1UL, 0x86ee0ba0UL, 0xbe4f59a0UL, 0x00000000UL, - 0x3fcd2205UL, 0xe81ca888UL, 0x3e5402c3UL, 0x00000000UL, 0x3fccd747UL, - 0x3b4424b9UL, 0x3e5dfdc3UL, 0x80000000UL, 0x3fcc8ca7UL, 0xd305b56cUL, - 0x3e202da6UL, 0x00000000UL, 0x3fcc4226UL, 0x399a6910UL, 0xbe482a1cUL, - 0x80000000UL, 0x3fcbf7c2UL, 0x747f7938UL, 0xbe587372UL, 0x80000000UL, - 0x3fcbad7cUL, 0x6fc246a0UL, 0x3e50d83dUL, 0x00000000UL, 0x3fcb6355UL, - 0xee9e9be5UL, 0xbe5c35bdUL, 0x80000000UL, 0x3fcb194aUL, 0x8416c0bcUL, - 0x3e546d4fUL, 0x00000000UL, 0x3fcacf5eUL, 0x49f7f08fUL, 0x3e56da76UL, - 0x00000000UL, 0x3fca858fUL, 0x5dc30de2UL, 0x3e5f390cUL, 0x00000000UL, - 0x3fca3bdeUL, 0x950583b6UL, 0xbe5e4169UL, 0x80000000UL, 0x3fc9f249UL, - 0x33631553UL, 0x3e52aeb1UL, 0x00000000UL, 0x3fc9a8d3UL, 0xde8795a6UL, - 0xbe59a504UL, 0x00000000UL, 0x3fc95f79UL, 0x076bf41eUL, 0x3e5122feUL, - 0x80000000UL, 0x3fc9163cUL, 0x2914c8e7UL, 0x3e3dd064UL, 0x00000000UL, - 0x3fc8cd1dUL, 0x3a30eca3UL, 0xbe21b4aaUL, 0x80000000UL, 0x3fc8841aUL, - 0xb2a96650UL, 0xbe575444UL, 0x80000000UL, 0x3fc83b34UL, 0x2376c0cbUL, - 0xbe2a74c7UL, 0x80000000UL, 0x3fc7f26bUL, 0xd8a0b653UL, 0xbe5181b6UL, - 0x00000000UL, 0x3fc7a9bfUL, 0x32257882UL, 0xbe4a78b4UL, 0x00000000UL, - 0x3fc7612fUL, 0x1eee8bd9UL, 0xbe1bfe9dUL, 0x80000000UL, 0x3fc718bbUL, - 0x0c603cc4UL, 0x3e36fdc9UL, 0x80000000UL, 0x3fc6d064UL, 0x3728b8cfUL, - 0xbe1e542eUL, 0x80000000UL, 0x3fc68829UL, 0xc79a4067UL, 0x3e5c380fUL, - 0x00000000UL, 0x3fc6400bUL, 0xf69eac69UL, 0x3e550a84UL, 0x80000000UL, - 0x3fc5f808UL, 0xb7a780a4UL, 0x3e5d9224UL, 0x80000000UL, 0x3fc5b022UL, - 0xad9dfb1eUL, 0xbe55242fUL, 0x00000000UL, 0x3fc56858UL, 0x659b18beUL, - 0xbe4bfda3UL, 0x80000000UL, 0x3fc520a9UL, 0x66ee3631UL, 0xbe57d769UL, - 0x80000000UL, 0x3fc4d916UL, 0x1ec62819UL, 0x3e2427f7UL, 0x80000000UL, - 0x3fc4919fUL, 0xdec25369UL, 0xbe435431UL, 0x00000000UL, 0x3fc44a44UL, - 0xa8acfc4bUL, 0xbe3c62e8UL, 0x00000000UL, 0x3fc40304UL, 0xcf1d3eabUL, - 0xbdfba29fUL, 0x80000000UL, 0x3fc3bbdfUL, 0x79aba3eaUL, 0xbdf1b7c8UL, - 0x80000000UL, 0x3fc374d6UL, 0xb8d186daUL, 0xbe5130cfUL, 0x80000000UL, - 0x3fc32de8UL, 0x9d74f152UL, 0x3e2285b6UL, 0x00000000UL, 0x3fc2e716UL, - 0x50ae7ca9UL, 0xbe503920UL, 0x80000000UL, 0x3fc2a05eUL, 0x6caed92eUL, - 0xbe533924UL, 0x00000000UL, 0x3fc259c2UL, 0x9cb5034eUL, 0xbe510e31UL, - 0x80000000UL, 0x3fc21340UL, 0x12c4d378UL, 0xbe540b43UL, 0x80000000UL, - 0x3fc1ccd9UL, 0xcc418706UL, 0x3e59887aUL, 0x00000000UL, 0x3fc1868eUL, - 0x921f4106UL, 0xbe528e67UL, 0x80000000UL, 0x3fc1405cUL, 0x3969441eUL, - 0x3e5d8051UL, 0x00000000UL, 0x3fc0fa46UL, 0xd941ef5bUL, 0x3e5f9079UL, - 0x80000000UL, 0x3fc0b44aUL, 0x5a3e81b2UL, 0xbe567691UL, 0x00000000UL, - 0x3fc06e69UL, 0x9d66afe7UL, 0xbe4d43fbUL, 0x00000000UL, 0x3fc028a2UL, - 0x0a92a162UL, 0xbe52f394UL, 0x00000000UL, 0x3fbfc5eaUL, 0x209897e5UL, - 0x3e529e37UL, 0x00000000UL, 0x3fbf3ac5UL, 0x8458bd7bUL, 0x3e582831UL, - 0x00000000UL, 0x3fbeafd5UL, 0xb8d8b4b8UL, 0xbe486b4aUL, 0x00000000UL, - 0x3fbe2518UL, 0xe0a3b7b6UL, 0x3e5bafd2UL, 0x00000000UL, 0x3fbd9a90UL, - 0x2bf2710eUL, 0x3e383b2bUL, 0x00000000UL, 0x3fbd103cUL, 0x73eb6ab7UL, - 0xbe56d78dUL, 0x00000000UL, 0x3fbc861bUL, 0x32ceaff5UL, 0xbe32dc5aUL, - 0x00000000UL, 0x3fbbfc2eUL, 0xbee04cb7UL, 0xbe4a71a4UL, 0x00000000UL, - 0x3fbb7274UL, 0x35ae9577UL, 0x3e38142fUL, 0x00000000UL, 0x3fbae8eeUL, - 0xcbaddab4UL, 0xbe5490f0UL, 0x00000000UL, 0x3fba5f9aUL, 0x95ce1114UL, - 0x3e597c71UL, 0x00000000UL, 0x3fb9d67aUL, 0x6d7c0f78UL, 0x3e3abc2dUL, - 0x00000000UL, 0x3fb94d8dUL, 0x2841a782UL, 0xbe566cbcUL, 0x00000000UL, - 0x3fb8c4d2UL, 0x6ed429c6UL, 0xbe3cfff9UL, 0x00000000UL, 0x3fb83c4aUL, - 0xe4a49fbbUL, 0xbe552964UL, 0x00000000UL, 0x3fb7b3f4UL, 0x2193d81eUL, - 0xbe42fa72UL, 0x00000000UL, 0x3fb72bd0UL, 0xdd70c122UL, 0x3e527a8cUL, - 0x00000000UL, 0x3fb6a3dfUL, 0x03108a54UL, 0xbe450393UL, 0x00000000UL, - 0x3fb61c1fUL, 0x30ff7954UL, 0x3e565840UL, 0x00000000UL, 0x3fb59492UL, - 0xdedd460cUL, 0xbe5422b5UL, 0x00000000UL, 0x3fb50d36UL, 0x950f9f45UL, - 0xbe5313f6UL, 0x00000000UL, 0x3fb4860bUL, 0x582cdcb1UL, 0x3e506d39UL, - 0x00000000UL, 0x3fb3ff12UL, 0x7216d3a6UL, 0x3e4aa719UL, 0x00000000UL, - 0x3fb3784aUL, 0x57a423fdUL, 0x3e5a9b9fUL, 0x00000000UL, 0x3fb2f1b4UL, - 0x7a138b41UL, 0xbe50b418UL, 0x00000000UL, 0x3fb26b4eUL, 0x2fbfd7eaUL, - 0x3e23a53eUL, 0x00000000UL, 0x3fb1e519UL, 0x18913ccbUL, 0x3e465fc1UL, - 0x00000000UL, 0x3fb15f15UL, 0x7ea24e21UL, 0x3e042843UL, 0x00000000UL, - 0x3fb0d941UL, 0x7c6d9c77UL, 0x3e59f61eUL, 0x00000000UL, 0x3fb0539eUL, - 0x114efd44UL, 0x3e4ccab7UL, 0x00000000UL, 0x3faf9c56UL, 0x1777f657UL, - 0x3e552f65UL, 0x00000000UL, 0x3fae91d2UL, 0xc317b86aUL, 0xbe5a61e0UL, - 0x00000000UL, 0x3fad87acUL, 0xb7664efbUL, 0xbe41f64eUL, 0x00000000UL, - 0x3fac7de6UL, 0x5d3d03a9UL, 0x3e0807a0UL, 0x00000000UL, 0x3fab7480UL, - 0x743c38ebUL, 0xbe3726e1UL, 0x00000000UL, 0x3faa6b78UL, 0x06a253f1UL, - 0x3e5ad636UL, 0x00000000UL, 0x3fa962d0UL, 0xa35f541bUL, 0x3e5a187aUL, - 0x00000000UL, 0x3fa85a88UL, 0x4b86e446UL, 0xbe508150UL, 0x00000000UL, - 0x3fa7529cUL, 0x2589cacfUL, 0x3e52938aUL, 0x00000000UL, 0x3fa64b10UL, - 0xaf6b11f2UL, 0xbe3454cdUL, 0x00000000UL, 0x3fa543e2UL, 0x97506fefUL, - 0xbe5fdec5UL, 0x00000000UL, 0x3fa43d10UL, 0xe75f7dd9UL, 0xbe388dd3UL, - 0x00000000UL, 0x3fa3369cUL, 0xa4139632UL, 0xbdea5177UL, 0x00000000UL, - 0x3fa23086UL, 0x352d6f1eUL, 0xbe565ad6UL, 0x00000000UL, 0x3fa12accUL, - 0x77449eb7UL, 0xbe50d5c7UL, 0x00000000UL, 0x3fa0256eUL, 0x7478da78UL, - 0x3e404724UL, 0x00000000UL, 0x3f9e40dcUL, 0xf59cef7fUL, 0xbe539d0aUL, - 0x00000000UL, 0x3f9c3790UL, 0x1511d43cUL, 0x3e53c2c8UL, 0x00000000UL, - 0x3f9a2f00UL, 0x9b8bff3cUL, 0xbe43b3e1UL, 0x00000000UL, 0x3f982724UL, - 0xad1e22a5UL, 0x3e46f0bdUL, 0x00000000UL, 0x3f962000UL, 0x130d9356UL, - 0x3e475ba0UL, 0x00000000UL, 0x3f941994UL, 0x8f86f883UL, 0xbe513d0bUL, - 0x00000000UL, 0x3f9213dcUL, 0x914d0dc8UL, 0xbe534335UL, 0x00000000UL, - 0x3f900ed8UL, 0x2d73e5e7UL, 0xbe22ba75UL, 0x00000000UL, 0x3f8c1510UL, - 0xc5b7d70eUL, 0x3e599c5dUL, 0x00000000UL, 0x3f880de0UL, 0x8a27857eUL, - 0xbe3d28c8UL, 0x00000000UL, 0x3f840810UL, 0xda767328UL, 0x3e531b3dUL, - 0x00000000UL, 0x3f8003b0UL, 0x77bacaf3UL, 0xbe5f04e3UL, 0x00000000UL, - 0x3f780150UL, 0xdf4b0720UL, 0x3e5a8bffUL, 0x00000000UL, 0x3f6ffc40UL, - 0x34c48e71UL, 0xbe3fcd99UL, 0x00000000UL, 0x3f5ff6c0UL, 0x1ad218afUL, - 0xbe4c78a7UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, - 0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL, 0x00000000UL, - 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL, 0x6dc96112UL, 0xbf836578UL, - 0xee241472UL, 0xbf9b0301UL, 0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, - 0xbfd619b6UL, 0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL, - 0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL, 0x9f95985aUL, - 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL, 0x486ececbUL, 0x3fc4635eUL, - 0x412055ccUL, 0xbdd61bb2UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, - 0xffffffffUL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3b700000UL, - 0xfa5abcbfUL, 0x3ff00b1aUL, 0xa7609f71UL, 0xbc84f6b2UL, 0xa9fb3335UL, - 0x3ff0163dUL, 0x9ab8cdb7UL, 0x3c9b6129UL, 0x143b0281UL, 0x3ff02168UL, - 0x0fc54eb6UL, 0xbc82bf31UL, 0x3e778061UL, 0x3ff02c9aUL, 0x535b085dUL, - 0xbc719083UL, 0x2e11bbccUL, 0x3ff037d4UL, 0xeeade11aUL, 0x3c656811UL, - 0xe86e7f85UL, 0x3ff04315UL, 0x1977c96eUL, 0xbc90a31cUL, 0x72f654b1UL, - 0x3ff04e5fUL, 0x3aa0d08cUL, 0x3c84c379UL, 0xd3158574UL, 0x3ff059b0UL, - 0xa475b465UL, 0x3c8d73e2UL, 0x0e3c1f89UL, 0x3ff0650aUL, 0x5799c397UL, - 0xbc95cb7bUL, 0x29ddf6deUL, 0x3ff0706bUL, 0xe2b13c27UL, 0xbc8c91dfUL, - 0x2b72a836UL, 0x3ff07bd4UL, 0x54458700UL, 0x3c832334UL, 0x18759bc8UL, - 0x3ff08745UL, 0x4bb284ffUL, 0x3c6186beUL, 0xf66607e0UL, 0x3ff092bdUL, - 0x800a3fd1UL, 0xbc968063UL, 0xcac6f383UL, 0x3ff09e3eUL, 0x18316136UL, - 0x3c914878UL, 0x9b1f3919UL, 0x3ff0a9c7UL, 0x873d1d38UL, 0x3c85d16cUL, - 0x6cf9890fUL, 0x3ff0b558UL, 0x4adc610bUL, 0x3c98a62eUL, 0x45e46c85UL, - 0x3ff0c0f1UL, 0x06d21cefUL, 0x3c94f989UL, 0x2b7247f7UL, 0x3ff0cc92UL, - 0x16e24f71UL, 0x3c901edcUL, 0x23395decUL, 0x3ff0d83bUL, 0xe43f316aUL, - 0xbc9bc14dUL, 0x32d3d1a2UL, 0x3ff0e3ecUL, 0x27c57b52UL, 0x3c403a17UL, - 0x5fdfa9c5UL, 0x3ff0efa5UL, 0xbc54021bUL, 0xbc949db9UL, 0xaffed31bUL, - 0x3ff0fb66UL, 0xc44ebd7bUL, 0xbc6b9bedUL, 0x28d7233eUL, 0x3ff10730UL, - 0x1692fdd5UL, 0x3c8d46ebUL, 0xd0125b51UL, 0x3ff11301UL, 0x39449b3aUL, - 0xbc96c510UL, 0xab5e2ab6UL, 0x3ff11edbUL, 0xf703fb72UL, 0xbc9ca454UL, - 0xc06c31ccUL, 0x3ff12abdUL, 0xb36ca5c7UL, 0xbc51b514UL, 0x14f204abUL, - 0x3ff136a8UL, 0xba48dcf0UL, 0xbc67108fUL, 0xaea92de0UL, 0x3ff1429aUL, - 0x9af1369eUL, 0xbc932fbfUL, 0x934f312eUL, 0x3ff14e95UL, 0x39bf44abUL, - 0xbc8b91e8UL, 0xc8a58e51UL, 0x3ff15a98UL, 0xb9eeab0aUL, 0x3c82406aUL, - 0x5471c3c2UL, 0x3ff166a4UL, 0x82ea1a32UL, 0x3c58f23bUL, 0x3c7d517bUL, - 0x3ff172b8UL, 0xb9d78a76UL, 0xbc819041UL, 0x8695bbc0UL, 0x3ff17ed4UL, - 0xe2ac5a64UL, 0x3c709e3fUL, 0x388c8deaUL, 0x3ff18af9UL, 0xd1970f6cUL, - 0xbc911023UL, 0x58375d2fUL, 0x3ff19726UL, 0x85f17e08UL, 0x3c94aaddUL, - 0xeb6fcb75UL, 0x3ff1a35bUL, 0x7b4968e4UL, 0x3c8e5b4cUL, 0xf8138a1cUL, - 0x3ff1af99UL, 0xa4b69280UL, 0x3c97bf85UL, 0x84045cd4UL, 0x3ff1bbe0UL, - 0x352ef607UL, 0xbc995386UL, 0x95281c6bUL, 0x3ff1c82fUL, 0x8010f8c9UL, - 0x3c900977UL, 0x3168b9aaUL, 0x3ff1d487UL, 0x00a2643cUL, 0x3c9e016eUL, - 0x5eb44027UL, 0x3ff1e0e7UL, 0x088cb6deUL, 0xbc96fdd8UL, 0x22fcd91dUL, - 0x3ff1ed50UL, 0x027bb78cUL, 0xbc91df98UL, 0x8438ce4dUL, 0x3ff1f9c1UL, - 0xa097af5cUL, 0xbc9bf524UL, 0x88628cd6UL, 0x3ff2063bUL, 0x814a8495UL, - 0x3c8dc775UL, 0x3578a819UL, 0x3ff212beUL, 0x2cfcaac9UL, 0x3c93592dUL, - 0x917ddc96UL, 0x3ff21f49UL, 0x9494a5eeUL, 0x3c82a97eUL, 0xa27912d1UL, - 0x3ff22bddUL, 0x5577d69fUL, 0x3c8d34fbUL, 0x6e756238UL, 0x3ff2387aUL, - 0xb6c70573UL, 0x3c99b07eUL, 0xfb82140aUL, 0x3ff2451fUL, 0x911ca996UL, - 0x3c8acfccUL, 0x4fb2a63fUL, 0x3ff251ceUL, 0xbef4f4a4UL, 0x3c8ac155UL, - 0x711ece75UL, 0x3ff25e85UL, 0x4ac31b2cUL, 0x3c93e1a2UL, 0x65e27cddUL, - 0x3ff26b45UL, 0x9940e9d9UL, 0x3c82bd33UL, 0x341ddf29UL, 0x3ff2780eUL, - 0x05f9e76cUL, 0x3c9e067cUL, 0xe1f56381UL, 0x3ff284dfUL, 0x8c3f0d7eUL, - 0xbc9a4c3aUL, 0x7591bb70UL, 0x3ff291baUL, 0x28401cbdUL, 0xbc82cc72UL, - 0xf51fdee1UL, 0x3ff29e9dUL, 0xafad1255UL, 0x3c8612e8UL, 0x66d10f13UL, - 0x3ff2ab8aUL, 0x191690a7UL, 0xbc995743UL, 0xd0dad990UL, 0x3ff2b87fUL, - 0xd6381aa4UL, 0xbc410adcUL, 0x39771b2fUL, 0x3ff2c57eUL, 0xa6eb5124UL, - 0xbc950145UL, 0xa6e4030bUL, 0x3ff2d285UL, 0x54db41d5UL, 0x3c900247UL, - 0x1f641589UL, 0x3ff2df96UL, 0xfbbce198UL, 0x3c9d16cfUL, 0xa93e2f56UL, - 0x3ff2ecafUL, 0x45d52383UL, 0x3c71ca0fUL, 0x4abd886bUL, 0x3ff2f9d2UL, - 0x532bda93UL, 0xbc653c55UL, 0x0a31b715UL, 0x3ff306feUL, 0xd23182e4UL, - 0x3c86f46aUL, 0xedeeb2fdUL, 0x3ff31432UL, 0xf3f3fcd1UL, 0x3c8959a3UL, - 0xfc4cd831UL, 0x3ff32170UL, 0x8e18047cUL, 0x3c8a9ce7UL, 0x3ba8ea32UL, - 0x3ff32eb8UL, 0x3cb4f318UL, 0xbc9c45e8UL, 0xb26416ffUL, 0x3ff33c08UL, - 0x843659a6UL, 0x3c932721UL, 0x66e3fa2dUL, 0x3ff34962UL, 0x930881a4UL, - 0xbc835a75UL, 0x5f929ff1UL, 0x3ff356c5UL, 0x5c4e4628UL, 0xbc8b5ceeUL, - 0xa2de883bUL, 0x3ff36431UL, 0xa06cb85eUL, 0xbc8c3144UL, 0x373aa9cbUL, - 0x3ff371a7UL, 0xbf42eae2UL, 0xbc963aeaUL, 0x231e754aUL, 0x3ff37f26UL, - 0x9eceb23cUL, 0xbc99f5caUL, 0x6d05d866UL, 0x3ff38caeUL, 0x3c9904bdUL, - 0xbc9e958dUL, 0x1b7140efUL, 0x3ff39a40UL, 0xfc8e2934UL, 0xbc99a9a5UL, - 0x34e59ff7UL, 0x3ff3a7dbUL, 0xd661f5e3UL, 0xbc75e436UL, 0xbfec6cf4UL, - 0x3ff3b57fUL, 0xe26fff18UL, 0x3c954c66UL, 0xc313a8e5UL, 0x3ff3c32dUL, - 0x375d29c3UL, 0xbc9efff8UL, 0x44ede173UL, 0x3ff3d0e5UL, 0x8c284c71UL, - 0x3c7fe8d0UL, 0x4c123422UL, 0x3ff3dea6UL, 0x11f09ebcUL, 0x3c8ada09UL, - 0xdf1c5175UL, 0x3ff3ec70UL, 0x7b8c9bcaUL, 0xbc8af663UL, 0x04ac801cUL, - 0x3ff3fa45UL, 0xf956f9f3UL, 0xbc97d023UL, 0xc367a024UL, 0x3ff40822UL, - 0xb6f4d048UL, 0x3c8bddf8UL, 0x21f72e2aUL, 0x3ff4160aUL, 0x1c309278UL, - 0xbc5ef369UL, 0x2709468aUL, 0x3ff423fbUL, 0xc0b314ddUL, 0xbc98462dUL, - 0xd950a897UL, 0x3ff431f5UL, 0xe35f7999UL, 0xbc81c7ddUL, 0x3f84b9d4UL, - 0x3ff43ffaUL, 0x9704c003UL, 0x3c8880beUL, 0x6061892dUL, 0x3ff44e08UL, - 0x04ef80d0UL, 0x3c489b7aUL, 0x42a7d232UL, 0x3ff45c20UL, 0x82fb1f8eUL, - 0xbc686419UL, 0xed1d0057UL, 0x3ff46a41UL, 0xd1648a76UL, 0x3c9c944bUL, - 0x668b3237UL, 0x3ff4786dUL, 0xed445733UL, 0xbc9c20f0UL, 0xb5c13cd0UL, - 0x3ff486a2UL, 0xb69062f0UL, 0x3c73c1a3UL, 0xe192aed2UL, 0x3ff494e1UL, - 0x5e499ea0UL, 0xbc83b289UL, 0xf0d7d3deUL, 0x3ff4a32aUL, 0xf3d1be56UL, - 0x3c99cb62UL, 0xea6db7d7UL, 0x3ff4b17dUL, 0x7f2897f0UL, 0xbc8125b8UL, - 0xd5362a27UL, 0x3ff4bfdaUL, 0xafec42e2UL, 0x3c7d4397UL, 0xb817c114UL, - 0x3ff4ce41UL, 0x690abd5dUL, 0x3c905e29UL, 0x99fddd0dUL, 0x3ff4dcb2UL, - 0xbc6a7833UL, 0x3c98ecdbUL, 0x81d8abffUL, 0x3ff4eb2dUL, 0x2e5d7a52UL, - 0xbc95257dUL, 0x769d2ca7UL, 0x3ff4f9b2UL, 0xd25957e3UL, 0xbc94b309UL, - 0x7f4531eeUL, 0x3ff50841UL, 0x49b7465fUL, 0x3c7a249bUL, 0xa2cf6642UL, - 0x3ff516daUL, 0x69bd93efUL, 0xbc8f7685UL, 0xe83f4eefUL, 0x3ff5257dUL, - 0x43efef71UL, 0xbc7c998dUL, 0x569d4f82UL, 0x3ff5342bUL, 0x1db13cadUL, - 0xbc807abeUL, 0xf4f6ad27UL, 0x3ff542e2UL, 0x192d5f7eUL, 0x3c87926dUL, - 0xca5d920fUL, 0x3ff551a4UL, 0xefede59bUL, 0xbc8d689cUL, 0xdde910d2UL, - 0x3ff56070UL, 0x168eebf0UL, 0xbc90fb6eUL, 0x36b527daUL, 0x3ff56f47UL, - 0x011d93adUL, 0x3c99bb2cUL, 0xdbe2c4cfUL, 0x3ff57e27UL, 0x8a57b9c4UL, - 0xbc90b98cUL, 0xd497c7fdUL, 0x3ff58d12UL, 0x5b9a1de8UL, 0x3c8295e1UL, - 0x27ff07ccUL, 0x3ff59c08UL, 0xe467e60fUL, 0xbc97e2ceUL, 0xdd485429UL, - 0x3ff5ab07UL, 0x054647adUL, 0x3c96324cUL, 0xfba87a03UL, 0x3ff5ba11UL, - 0x4c233e1aUL, 0xbc9b77a1UL, 0x8a5946b7UL, 0x3ff5c926UL, 0x816986a2UL, - 0x3c3c4b1bUL, 0x90998b93UL, 0x3ff5d845UL, 0xa8b45643UL, 0xbc9cd6a7UL, - 0x15ad2148UL, 0x3ff5e76fUL, 0x3080e65eUL, 0x3c9ba6f9UL, 0x20dceb71UL, - 0x3ff5f6a3UL, 0xe3cdcf92UL, 0xbc89eaddUL, 0xb976dc09UL, 0x3ff605e1UL, - 0x9b56de47UL, 0xbc93e242UL, 0xe6cdf6f4UL, 0x3ff6152aUL, 0x4ab84c27UL, - 0x3c9e4b3eUL, 0xb03a5585UL, 0x3ff6247eUL, 0x7e40b497UL, 0xbc9383c1UL, - 0x1d1929fdUL, 0x3ff633ddUL, 0xbeb964e5UL, 0x3c984710UL, 0x34ccc320UL, - 0x3ff64346UL, 0x759d8933UL, 0xbc8c483cUL, 0xfebc8fb7UL, 0x3ff652b9UL, - 0xc9a73e09UL, 0xbc9ae3d5UL, 0x82552225UL, 0x3ff66238UL, 0x87591c34UL, - 0xbc9bb609UL, 0xc70833f6UL, 0x3ff671c1UL, 0x586c6134UL, 0xbc8e8732UL, - 0xd44ca973UL, 0x3ff68155UL, 0x44f73e65UL, 0x3c6038aeUL, 0xb19e9538UL, - 0x3ff690f4UL, 0x9aeb445dUL, 0x3c8804bdUL, 0x667f3bcdUL, 0x3ff6a09eUL, - 0x13b26456UL, 0xbc9bdd34UL, 0xfa75173eUL, 0x3ff6b052UL, 0x2c9a9d0eUL, - 0x3c7a38f5UL, 0x750bdabfUL, 0x3ff6c012UL, 0x67ff0b0dUL, 0xbc728956UL, - 0xddd47645UL, 0x3ff6cfdcUL, 0xb6f17309UL, 0x3c9c7aa9UL, 0x3c651a2fUL, - 0x3ff6dfb2UL, 0x683c88abUL, 0xbc6bbe3aUL, 0x98593ae5UL, 0x3ff6ef92UL, - 0x9e1ac8b2UL, 0xbc90b974UL, 0xf9519484UL, 0x3ff6ff7dUL, 0x25860ef6UL, - 0xbc883c0fUL, 0x66f42e87UL, 0x3ff70f74UL, 0xd45aa65fUL, 0x3c59d644UL, - 0xe8ec5f74UL, 0x3ff71f75UL, 0x86887a99UL, 0xbc816e47UL, 0x86ead08aUL, - 0x3ff72f82UL, 0x2cd62c72UL, 0xbc920aa0UL, 0x48a58174UL, 0x3ff73f9aUL, - 0x6c65d53cUL, 0xbc90a8d9UL, 0x35d7cbfdUL, 0x3ff74fbdUL, 0x618a6e1cUL, - 0x3c9047fdUL, 0x564267c9UL, 0x3ff75febUL, 0x57316dd3UL, 0xbc902459UL, - 0xb1ab6e09UL, 0x3ff77024UL, 0x169147f8UL, 0x3c9b7877UL, 0x4fde5d3fUL, - 0x3ff78069UL, 0x0a02162dUL, 0x3c9866b8UL, 0x38ac1cf6UL, 0x3ff790b9UL, - 0x62aadd3eUL, 0x3c9349a8UL, 0x73eb0187UL, 0x3ff7a114UL, 0xee04992fUL, - 0xbc841577UL, 0x0976cfdbUL, 0x3ff7b17bUL, 0x8468dc88UL, 0xbc9bebb5UL, - 0x0130c132UL, 0x3ff7c1edUL, 0xd1164dd6UL, 0x3c9f124cUL, 0x62ff86f0UL, - 0x3ff7d26aUL, 0xfb72b8b4UL, 0x3c91bddbUL, 0x36cf4e62UL, 0x3ff7e2f3UL, - 0xba15797eUL, 0x3c705d02UL, 0x8491c491UL, 0x3ff7f387UL, 0xcf9311aeUL, - 0xbc807f11UL, 0x543e1a12UL, 0x3ff80427UL, 0x626d972bUL, 0xbc927c86UL, - 0xadd106d9UL, 0x3ff814d2UL, 0x0d151d4dUL, 0x3c946437UL, 0x994cce13UL, - 0x3ff82589UL, 0xd41532d8UL, 0xbc9d4c1dUL, 0x1eb941f7UL, 0x3ff8364cUL, - 0x31df2bd5UL, 0x3c999b9aUL, 0x4623c7adUL, 0x3ff8471aUL, 0xa341cdfbUL, - 0xbc88d684UL, 0x179f5b21UL, 0x3ff857f4UL, 0xf8b216d0UL, 0xbc5ba748UL, - 0x9b4492edUL, 0x3ff868d9UL, 0x9bd4f6baUL, 0xbc9fc6f8UL, 0xd931a436UL, - 0x3ff879caUL, 0xd2db47bdUL, 0x3c85d2d7UL, 0xd98a6699UL, 0x3ff88ac7UL, - 0xf37cb53aUL, 0x3c9994c2UL, 0xa478580fUL, 0x3ff89bd0UL, 0x4475202aUL, - 0x3c9d5395UL, 0x422aa0dbUL, 0x3ff8ace5UL, 0x56864b27UL, 0x3c96e9f1UL, - 0xbad61778UL, 0x3ff8be05UL, 0xfc43446eUL, 0x3c9ecb5eUL, 0x16b5448cUL, - 0x3ff8cf32UL, 0x32e9e3aaUL, 0xbc70d55eUL, 0x5e0866d9UL, 0x3ff8e06aUL, - 0x6fc9b2e6UL, 0xbc97114aUL, 0x99157736UL, 0x3ff8f1aeUL, 0xa2e3976cUL, - 0x3c85cc13UL, 0xd0282c8aUL, 0x3ff902feUL, 0x85fe3fd2UL, 0x3c9592caUL, - 0x0b91ffc6UL, 0x3ff9145bUL, 0x2e582524UL, 0xbc9dd679UL, 0x53aa2fe2UL, - 0x3ff925c3UL, 0xa639db7fUL, 0xbc83455fUL, 0xb0cdc5e5UL, 0x3ff93737UL, - 0x81b57ebcUL, 0xbc675fc7UL, 0x2b5f98e5UL, 0x3ff948b8UL, 0x797d2d99UL, - 0xbc8dc3d6UL, 0xcbc8520fUL, 0x3ff95a44UL, 0x96a5f039UL, 0xbc764b7cUL, - 0x9a7670b3UL, 0x3ff96bddUL, 0x7f19c896UL, 0xbc5ba596UL, 0x9fde4e50UL, - 0x3ff97d82UL, 0x7c1b85d1UL, 0xbc9d185bUL, 0xe47a22a2UL, 0x3ff98f33UL, - 0xa24c78ecUL, 0x3c7cabdaUL, 0x70ca07baUL, 0x3ff9a0f1UL, 0x91cee632UL, - 0xbc9173bdUL, 0x4d53fe0dUL, 0x3ff9b2bbUL, 0x4df6d518UL, 0xbc9dd84eUL, - 0x82a3f090UL, 0x3ff9c491UL, 0xb071f2beUL, 0x3c7c7c46UL, 0x194bb8d5UL, - 0x3ff9d674UL, 0xa3dd8233UL, 0xbc9516beUL, 0x19e32323UL, 0x3ff9e863UL, - 0x78e64c6eUL, 0x3c7824caUL, 0x8d07f29eUL, 0x3ff9fa5eUL, 0xaaf1faceUL, - 0xbc84a9ceUL, 0x7b5de565UL, 0x3ffa0c66UL, 0x5d1cd533UL, 0xbc935949UL, - 0xed8eb8bbUL, 0x3ffa1e7aUL, 0xee8be70eUL, 0x3c9c6618UL, 0xec4a2d33UL, - 0x3ffa309bUL, 0x7ddc36abUL, 0x3c96305cUL, 0x80460ad8UL, 0x3ffa42c9UL, - 0x589fb120UL, 0xbc9aa780UL, 0xb23e255dUL, 0x3ffa5503UL, 0xdb8d41e1UL, - 0xbc9d2f6eUL, 0x8af46052UL, 0x3ffa674aUL, 0x30670366UL, 0x3c650f56UL, - 0x1330b358UL, 0x3ffa799eUL, 0xcac563c7UL, 0x3c9bcb7eUL, 0x53c12e59UL, - 0x3ffa8bfeUL, 0xb2ba15a9UL, 0xbc94f867UL, 0x5579fdbfUL, 0x3ffa9e6bUL, - 0x0ef7fd31UL, 0x3c90fac9UL, 0x21356ebaUL, 0x3ffab0e5UL, 0xdae94545UL, - 0x3c889c31UL, 0xbfd3f37aUL, 0x3ffac36bUL, 0xcae76cd0UL, 0xbc8f9234UL, - 0x3a3c2774UL, 0x3ffad5ffUL, 0xb6b1b8e5UL, 0x3c97ef3bUL, 0x995ad3adUL, - 0x3ffae89fUL, 0x345dcc81UL, 0x3c97a1cdUL, 0xe622f2ffUL, 0x3ffafb4cUL, - 0x0f315ecdUL, 0xbc94b2fcUL, 0x298db666UL, 0x3ffb0e07UL, 0x4c80e425UL, - 0xbc9bdef5UL, 0x6c9a8952UL, 0x3ffb20ceUL, 0x4a0756ccUL, 0x3c94dd02UL, - 0xb84f15fbUL, 0x3ffb33a2UL, 0x3084d708UL, 0xbc62805eUL, 0x15b749b1UL, - 0x3ffb4684UL, 0xe9df7c90UL, 0xbc7f763dUL, 0x8de5593aUL, 0x3ffb5972UL, - 0xbbba6de3UL, 0xbc9c71dfUL, 0x29f1c52aUL, 0x3ffb6c6eUL, 0x52883f6eUL, - 0x3c92a8f3UL, 0xf2fb5e47UL, 0x3ffb7f76UL, 0x7e54ac3bUL, 0xbc75584fUL, - 0xf22749e4UL, 0x3ffb928cUL, 0x54cb65c6UL, 0xbc9b7216UL, 0x30a1064aUL, - 0x3ffba5b0UL, 0x0e54292eUL, 0xbc9efcd3UL, 0xb79a6f1fUL, 0x3ffbb8e0UL, - 0xc9696205UL, 0xbc3f52d1UL, 0x904bc1d2UL, 0x3ffbcc1eUL, 0x7a2d9e84UL, - 0x3c823dd0UL, 0xc3f3a207UL, 0x3ffbdf69UL, 0x60ea5b53UL, 0xbc3c2623UL, - 0x5bd71e09UL, 0x3ffbf2c2UL, 0x3f6b9c73UL, 0xbc9efdcaUL, 0x6141b33dUL, - 0x3ffc0628UL, 0xa1fbca34UL, 0xbc8d8a5aUL, 0xdd85529cUL, 0x3ffc199bUL, - 0x895048ddUL, 0x3c811065UL, 0xd9fa652cUL, 0x3ffc2d1cUL, 0x17c8a5d7UL, - 0xbc96e516UL, 0x5fffd07aUL, 0x3ffc40abUL, 0xe083c60aUL, 0x3c9b4537UL, - 0x78fafb22UL, 0x3ffc5447UL, 0x2493b5afUL, 0x3c912f07UL, 0x2e57d14bUL, - 0x3ffc67f1UL, 0xff483cadUL, 0x3c92884dUL, 0x8988c933UL, 0x3ffc7ba8UL, - 0xbe255559UL, 0xbc8e76bbUL, 0x9406e7b5UL, 0x3ffc8f6dUL, 0x48805c44UL, - 0x3c71acbcUL, 0x5751c4dbUL, 0x3ffca340UL, 0xd10d08f5UL, 0xbc87f2beUL, - 0xdcef9069UL, 0x3ffcb720UL, 0xd1e949dbUL, 0x3c7503cbUL, 0x2e6d1675UL, - 0x3ffccb0fUL, 0x86009092UL, 0xbc7d220fUL, 0x555dc3faUL, 0x3ffcdf0bUL, - 0x53829d72UL, 0xbc8dd83bUL, 0x5b5bab74UL, 0x3ffcf315UL, 0xb86dff57UL, - 0xbc9a08e9UL, 0x4a07897cUL, 0x3ffd072dUL, 0x43797a9cUL, 0xbc9cbc37UL, - 0x2b08c968UL, 0x3ffd1b53UL, 0x219a36eeUL, 0x3c955636UL, 0x080d89f2UL, - 0x3ffd2f87UL, 0x719d8578UL, 0xbc9d487bUL, 0xeacaa1d6UL, 0x3ffd43c8UL, - 0xbf5a1614UL, 0x3c93db53UL, 0xdcfba487UL, 0x3ffd5818UL, 0xd75b3707UL, - 0x3c82ed02UL, 0xe862e6d3UL, 0x3ffd6c76UL, 0x4a8165a0UL, 0x3c5fe87aUL, - 0x16c98398UL, 0x3ffd80e3UL, 0x8beddfe8UL, 0xbc911ec1UL, 0x71ff6075UL, - 0x3ffd955dUL, 0xbb9af6beUL, 0x3c9a052dUL, 0x03db3285UL, 0x3ffda9e6UL, - 0x696db532UL, 0x3c9c2300UL, 0xd63a8315UL, 0x3ffdbe7cUL, 0x926b8be4UL, - 0xbc9b76f1UL, 0xf301b460UL, 0x3ffdd321UL, 0x78f018c3UL, 0x3c92da57UL, - 0x641c0658UL, 0x3ffde7d5UL, 0x8e79ba8fUL, 0xbc9ca552UL, 0x337b9b5fUL, - 0x3ffdfc97UL, 0x4f184b5cUL, 0xbc91a5cdUL, 0x6b197d17UL, 0x3ffe1167UL, - 0xbd5c7f44UL, 0xbc72b529UL, 0x14f5a129UL, 0x3ffe2646UL, 0x817a1496UL, - 0xbc97b627UL, 0x3b16ee12UL, 0x3ffe3b33UL, 0x31fdc68bUL, 0xbc99f4a4UL, - 0xe78b3ff6UL, 0x3ffe502eUL, 0x80a9cc8fUL, 0x3c839e89UL, 0x24676d76UL, - 0x3ffe6539UL, 0x7522b735UL, 0xbc863ff8UL, 0xfbc74c83UL, 0x3ffe7a51UL, - 0xca0c8de2UL, 0x3c92d522UL, 0x77cdb740UL, 0x3ffe8f79UL, 0x80b054b1UL, - 0xbc910894UL, 0xa2a490daUL, 0x3ffea4afUL, 0x179c2893UL, 0xbc9e9c23UL, - 0x867cca6eUL, 0x3ffeb9f4UL, 0x2293e4f2UL, 0x3c94832fUL, 0x2d8e67f1UL, - 0x3ffecf48UL, 0xb411ad8cUL, 0xbc9c93f3UL, 0xa2188510UL, 0x3ffee4aaUL, - 0xa487568dUL, 0x3c91c68dUL, 0xee615a27UL, 0x3ffefa1bUL, 0x86a4b6b0UL, - 0x3c9dc7f4UL, 0x1cb6412aUL, 0x3fff0f9cUL, 0x65181d45UL, 0xbc932200UL, - 0x376bba97UL, 0x3fff252bUL, 0xbf0d8e43UL, 0x3c93a1a5UL, 0x48dd7274UL, - 0x3fff3ac9UL, 0x3ed837deUL, 0xbc795a5aUL, 0x5b6e4540UL, 0x3fff5076UL, - 0x2dd8a18bUL, 0x3c99d3e1UL, 0x798844f8UL, 0x3fff6632UL, 0x3539343eUL, - 0x3c9fa37bUL, 0xad9cbe14UL, 0x3fff7bfdUL, 0xd006350aUL, 0xbc9dbb12UL, - 0x02243c89UL, 0x3fff91d8UL, 0xa779f689UL, 0xbc612ea8UL, 0x819e90d8UL, - 0x3fffa7c1UL, 0xf3a5931eUL, 0x3c874853UL, 0x3692d514UL, 0x3fffbdbaUL, - 0x15098eb6UL, 0xbc796773UL, 0x2b8f71f1UL, 0x3fffd3c2UL, 0x966579e7UL, - 0x3c62eb74UL, 0x6b2a23d9UL, 0x3fffe9d9UL, 0x7442fde3UL, 0x3c74a603UL, - 0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL, 0x6fba4e77UL, - 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL, 0xfefa39efUL, 0x3fe62e42UL, - 0x00000000UL, 0x00000000UL, 0xfefa39efUL, 0x3fe62e42UL, 0xfefa39efUL, - 0xbfe62e42UL, 0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL, - 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL - -}; - -ATTRIBUTE_ALIGNED(8) static const double _DOUBLE2 = 2.0; -ATTRIBUTE_ALIGNED(8) static const double _DOUBLE0 = 0.0; -ATTRIBUTE_ALIGNED(8) static const double _DOUBLE0DOT5 = 0.5; - -//registers, -// input: xmm0, xmm1 -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// eax, edx, ecx, ebx - -// Code generated by Intel C compiler for LIBM library - -void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; - Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, L_2TAG_PACKET_14_0_2, L_2TAG_PACKET_15_0_2; - Label L_2TAG_PACKET_16_0_2, L_2TAG_PACKET_17_0_2, L_2TAG_PACKET_18_0_2, L_2TAG_PACKET_19_0_2; - Label L_2TAG_PACKET_20_0_2, L_2TAG_PACKET_21_0_2, L_2TAG_PACKET_22_0_2, L_2TAG_PACKET_23_0_2; - Label L_2TAG_PACKET_24_0_2, L_2TAG_PACKET_25_0_2, L_2TAG_PACKET_26_0_2, L_2TAG_PACKET_27_0_2; - Label L_2TAG_PACKET_28_0_2, L_2TAG_PACKET_29_0_2, L_2TAG_PACKET_30_0_2, L_2TAG_PACKET_31_0_2; - Label L_2TAG_PACKET_32_0_2, L_2TAG_PACKET_33_0_2, L_2TAG_PACKET_34_0_2, L_2TAG_PACKET_35_0_2; - Label L_2TAG_PACKET_36_0_2, L_2TAG_PACKET_37_0_2, L_2TAG_PACKET_38_0_2, L_2TAG_PACKET_39_0_2; - Label L_2TAG_PACKET_40_0_2, L_2TAG_PACKET_41_0_2, L_2TAG_PACKET_42_0_2, L_2TAG_PACKET_43_0_2; - Label L_2TAG_PACKET_44_0_2, L_2TAG_PACKET_45_0_2, L_2TAG_PACKET_46_0_2, L_2TAG_PACKET_47_0_2; - Label L_2TAG_PACKET_48_0_2, L_2TAG_PACKET_49_0_2, L_2TAG_PACKET_50_0_2, L_2TAG_PACKET_51_0_2; - Label L_2TAG_PACKET_52_0_2, L_2TAG_PACKET_53_0_2, L_2TAG_PACKET_54_0_2, L_2TAG_PACKET_55_0_2; - Label L_2TAG_PACKET_56_0_2, L_2TAG_PACKET_57_0_2, L_2TAG_PACKET_58_0_2, start; - Label L_NOT_DOUBLE2, L_NOT_DOUBLE0DOT5; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_pow = (address)_static_const_table_pow; - address DOUBLE2 = (address) &_DOUBLE2; - address DOUBLE0 = (address) &_DOUBLE0; - address DOUBLE0DOT5 = (address) &_DOUBLE0DOT5; - - subl(rsp, 120); - movl(Address(rsp, 64), tmp); - lea(tmp, ExternalAddress(static_const_table_pow)); - movsd(xmm0, Address(rsp, 128)); - movsd(xmm1, Address(rsp, 136)); - - // Special case: pow(x, 2.0) => x * x - ucomisd(xmm1, ExternalAddress(DOUBLE2)); - jccb(Assembler::notEqual, L_NOT_DOUBLE2); - jccb(Assembler::parity, L_NOT_DOUBLE2); - mulsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_NOT_DOUBLE2); - // Special case: pow(x, 0.5) => sqrt(x) - ucomisd(xmm1, ExternalAddress(DOUBLE0DOT5)); // For pow(x, y), check whether y == 0.5 - jccb(Assembler::notEqual, L_NOT_DOUBLE0DOT5); - jccb(Assembler::parity, L_NOT_DOUBLE0DOT5); - ucomisd(xmm0, ExternalAddress(DOUBLE0)); - // According to the API specs, pow(-0.0, 0.5) = 0.0 and sqrt(-0.0) = -0.0. - // So pow(-0.0, 0.5) shouldn't be replaced with sqrt(-0.0). - // -0.0/+0.0 are both excluded since floating-point comparison doesn't distinguish -0.0 from +0.0. - jccb(Assembler::belowEqual, L_NOT_DOUBLE0DOT5); // pow(x, 0.5) => sqrt(x) only for x > 0.0 - sqrtsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_NOT_DOUBLE0DOT5); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movl(ecx, 1069088768); - movdl(xmm7, ecx); - movsd(Address(rsp, 16), xmm1); - xorpd(xmm1, xmm1); - movl(edx, 30704); - pinsrw(xmm1, edx, 3); - movsd(Address(rsp, 8), xmm0); - movdqu(xmm3, xmm0); - movl(edx, 8192); - movdl(xmm4, edx); - movdqu(xmm6, Address(tmp, 8240)); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 8256)); - psrlq(xmm0, 34); - movl(edx, eax); - andl(edx, 32752); - subl(edx, 16368); - movl(ecx, edx); - sarl(edx, 31); - addl(ecx, edx); - xorl(ecx, edx); - rcpss(xmm0, xmm0); - psllq(xmm3, 12); - addl(ecx, 16); - bsrl(ecx, ecx); - psrlq(xmm3, 12); - movl(Address(rsp, 24), rsi); - subl(eax, 16); - cmpl(eax, 32736); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - movl(rsi, 0); - - bind(L_2TAG_PACKET_1_0_2); - mulss(xmm0, xmm7); - movl(edx, -1); - subl(ecx, 4); - shll(edx); - movdl(xmm5, edx); - por(xmm3, xmm1); - subl(eax, 16351); - cmpl(eax, 1); - jcc(Assembler::belowEqual, L_2TAG_PACKET_2_0_2); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm3); - - bind(L_2TAG_PACKET_3_0_2); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - subl(eax, 1); - sarl(eax, 4); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - - bind(L_2TAG_PACKET_4_0_2); - mulsd(xmm3, xmm0); - movdqu(xmm1, Address(tmp, 8272)); - subsd(xmm5, xmm2); - movdqu(xmm4, Address(tmp, 8288)); - movl(ecx, eax); - sarl(eax, 31); - addl(ecx, eax); - xorl(eax, ecx); - addl(eax, 1); - bsrl(eax, eax); - unpcklpd(xmm5, xmm3); - movdqu(xmm6, Address(tmp, 8304)); - addsd(xmm3, xmm5); - andl(edx, 16760832); - shrl(edx, 10); - addpd(xmm5, Address(tmp, edx, Address::times_1, -3616)); - movdqu(xmm0, Address(tmp, 8320)); - pshufd(xmm2, xmm3, 68); - mulsd(xmm3, xmm3); - mulpd(xmm1, xmm2); - mulpd(xmm4, xmm2); - addsd(xmm5, xmm7); - mulsd(xmm2, xmm3); - addpd(xmm6, xmm1); - mulsd(xmm3, xmm3); - addpd(xmm0, xmm4); - movsd(xmm1, Address(rsp, 16)); - movzwl(ecx, Address(rsp, 22)); - pshufd(xmm7, xmm5, 238); - movsd(xmm4, Address(tmp, 8368)); - mulpd(xmm6, xmm2); - pshufd(xmm3, xmm3, 68); - mulpd(xmm0, xmm2); - shll(eax, 4); - subl(eax, 15872); - andl(ecx, 32752); - addl(eax, ecx); - mulpd(xmm3, xmm6); - cmpl(eax, 624); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); - xorpd(xmm6, xmm6); - movl(edx, 17080); - pinsrw(xmm6, edx, 3); - movdqu(xmm2, xmm1); - pand(xmm4, xmm1); - subsd(xmm1, xmm4); - mulsd(xmm4, xmm5); - addsd(xmm0, xmm7); - mulsd(xmm1, xmm5); - movdqu(xmm7, xmm6); - addsd(xmm6, xmm4); - addpd(xmm3, xmm0); - movdl(edx, xmm6); - subsd(xmm6, xmm7); - pshufd(xmm0, xmm3, 238); - subsd(xmm4, xmm6); - addsd(xmm0, xmm3); - movl(ecx, edx); - andl(edx, 255); - addl(edx, edx); - movdqu(xmm5, Address(tmp, edx, Address::times_8, 8384)); - addsd(xmm4, xmm1); - mulsd(xmm2, xmm0); - movdqu(xmm7, Address(tmp, 12480)); - movdqu(xmm3, Address(tmp, 12496)); - shll(ecx, 12); - xorl(ecx, rsi); - andl(ecx, -1048576); - movdl(xmm6, ecx); - addsd(xmm2, xmm4); - movsd(xmm1, Address(tmp, 12512)); - pshufd(xmm0, xmm2, 68); - pshufd(xmm4, xmm2, 68); - mulpd(xmm0, xmm0); - movl(rsi, Address(rsp, 24)); - mulpd(xmm7, xmm4); - pshufd(xmm6, xmm6, 17); - mulsd(xmm1, xmm2); - mulsd(xmm0, xmm0); - paddd(xmm5, xmm6); - addpd(xmm3, xmm7); - mulsd(xmm1, xmm5); - pshufd(xmm6, xmm5, 238); - mulpd(xmm0, xmm3); - addsd(xmm1, xmm6); - pshufd(xmm3, xmm0, 238); - mulsd(xmm0, xmm5); - mulsd(xmm3, xmm5); - addsd(xmm0, xmm1); - addsd(xmm0, xmm3); - addsd(xmm0, xmm5); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 128)); - movsd(xmm1, Address(rsp, 136)); - mulsd(xmm0, xmm1); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_0_0_2); - addl(eax, 16); - movl(edx, 32752); - andl(edx, eax); - cmpl(edx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - testl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_2); - - bind(L_2TAG_PACKET_10_0_2); - movl(ecx, Address(rsp, 16)); - xorl(edx, edx); - testl(ecx, ecx); - movl(ecx, 1); - cmovl(Assembler::notEqual, edx, ecx); - orl(edx, Address(rsp, 20)); - cmpl(edx, 1072693248); - jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 8)); - movsd(xmm3, Address(rsp, 8)); - movdl(edx, xmm3); - psrlq(xmm3, 32); - movdl(ecx, xmm3); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_11_0_2); - xorpd(xmm3, xmm3); - movl(eax, 18416); - pinsrw(xmm3, eax, 3); - mulsd(xmm0, xmm3); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movdqu(xmm3, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 18416); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 8256)); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm3, 12); - movdqu(xmm6, Address(tmp, 8240)); - psrlq(xmm3, 12); - mulss(xmm0, xmm7); - movl(edx, -1024); - movdl(xmm5, edx); - por(xmm3, xmm1); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm3); - movl(rsi, 0); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - andl(eax, 32752); - subl(eax, 18416); - sarl(eax, 4); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - jmp(L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_12_0_2); - movl(ecx, Address(rsp, 16)); - xorl(edx, edx); - testl(ecx, ecx); - movl(ecx, 1); - cmovl(Assembler::notEqual, edx, ecx); - orl(edx, Address(rsp, 20)); - cmpl(edx, 1072693248); - jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 8)); - movsd(xmm3, Address(rsp, 8)); - movdl(edx, xmm3); - psrlq(xmm3, 32); - movdl(ecx, xmm3); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_11_0_2); - xorpd(xmm3, xmm3); - movl(eax, 18416); - pinsrw(xmm3, eax, 3); - mulsd(xmm0, xmm3); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movdqu(xmm3, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 18416); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 8256)); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm3, 12); - movdqu(xmm6, Address(tmp, 8240)); - psrlq(xmm3, 12); - mulss(xmm0, xmm7); - movl(edx, -1024); - movdl(xmm5, edx); - por(xmm3, xmm1); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm3); - movl(rsi, INT_MIN); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - andl(eax, 32752); - subl(eax, 18416); - sarl(eax, 4); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - jmp(L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_5_0_2); - cmpl(eax, 0); - jcc(Assembler::less, L_2TAG_PACKET_13_0_2); - cmpl(eax, 752); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_14_0_2); - - bind(L_2TAG_PACKET_15_0_2); - addsd(xmm0, xmm7); - movsd(xmm2, Address(tmp, 12544)); - addpd(xmm3, xmm0); - xorpd(xmm6, xmm6); - movl(eax, 17080); - pinsrw(xmm6, eax, 3); - pshufd(xmm0, xmm3, 238); - addsd(xmm0, xmm3); - movdqu(xmm3, xmm5); - addsd(xmm5, xmm0); - movdqu(xmm4, xmm2); - subsd(xmm3, xmm5); - movdqu(xmm7, xmm5); - pand(xmm5, xmm2); - movdqu(xmm2, xmm1); - pand(xmm4, xmm1); - subsd(xmm7, xmm5); - addsd(xmm0, xmm3); - subsd(xmm1, xmm4); - mulsd(xmm4, xmm5); - addsd(xmm0, xmm7); - mulsd(xmm2, xmm0); - movdqu(xmm7, xmm6); - mulsd(xmm1, xmm5); - addsd(xmm6, xmm4); - movdl(eax, xmm6); - subsd(xmm6, xmm7); - addsd(xmm2, xmm1); - movdqu(xmm7, Address(tmp, 12480)); - movdqu(xmm3, Address(tmp, 12496)); - subsd(xmm4, xmm6); - pextrw(edx, xmm6, 3); - movl(ecx, eax); - andl(eax, 255); - addl(eax, eax); - movdqu(xmm5, Address(tmp, eax, Address::times_8, 8384)); - addsd(xmm2, xmm4); - sarl(ecx, 8); - movl(eax, ecx); - sarl(ecx, 1); - subl(eax, ecx); - shll(ecx, 20); - xorl(ecx, rsi); - movdl(xmm6, ecx); - movsd(xmm1, Address(tmp, 12512)); - andl(edx, 32767); - cmpl(edx, 16529); - jcc(Assembler::above, L_2TAG_PACKET_14_0_2); - pshufd(xmm0, xmm2, 68); - pshufd(xmm4, xmm2, 68); - mulpd(xmm0, xmm0); - mulpd(xmm7, xmm4); - pshufd(xmm6, xmm6, 17); - mulsd(xmm1, xmm2); - mulsd(xmm0, xmm0); - paddd(xmm5, xmm6); - addpd(xmm3, xmm7); - mulsd(xmm1, xmm5); - pshufd(xmm6, xmm5, 238); - mulpd(xmm0, xmm3); - addsd(xmm1, xmm6); - pshufd(xmm3, xmm0, 238); - mulsd(xmm0, xmm5); - mulsd(xmm3, xmm5); - shll(eax, 4); - xorpd(xmm4, xmm4); - addl(eax, 16368); - pinsrw(xmm4, eax, 3); - addsd(xmm0, xmm1); - movl(rsi, Address(rsp, 24)); - addsd(xmm0, xmm3); - movdqu(xmm1, xmm0); - addsd(xmm0, xmm5); - mulsd(xmm0, xmm4); - pextrw(eax, xmm0, 3); - andl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_16_0_2); - cmpl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_17_0_2); - - bind(L_2TAG_PACKET_18_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_8_0_2); - movsd(xmm1, Address(rsp, 16)); - movsd(xmm0, Address(rsp, 8)); - movdqu(xmm2, xmm0); - movdl(eax, xmm2); - psrlq(xmm2, 20); - movdl(edx, xmm2); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_19_0_2); - addsd(xmm0, xmm0); - movdl(eax, xmm1); - psrlq(xmm1, 32); - movdl(edx, xmm1); - movl(ecx, edx); - addl(edx, edx); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_20_0_2); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_20_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - movl(edx, 29); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_22_0_2); - movsd(xmm0, Address(rsp, 16)); - addpd(xmm0, xmm0); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_19_0_2); - movdl(eax, xmm1); - movdqu(xmm2, xmm1); - psrlq(xmm1, 32); - movdl(edx, xmm1); - movl(ecx, edx); - addl(edx, edx); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_23_0_2); - pextrw(eax, xmm2, 3); - andl(eax, 32752); - cmpl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_24_0_2); - movdl(eax, xmm2); - psrlq(xmm2, 20); - movdl(edx, xmm2); - orl(eax, edx); - jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2); - - bind(L_2TAG_PACKET_24_0_2); - pextrw(eax, xmm0, 3); - testl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_25_0_2); - testl(ecx, INT_MIN); - jcc(Assembler::notEqual, L_2TAG_PACKET_26_0_2); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_27_0_2); - movsd(xmm1, Address(rsp, 16)); - movdl(eax, xmm1); - testl(eax, 1); - jcc(Assembler::notEqual, L_2TAG_PACKET_28_0_2); - testl(eax, 2); - jcc(Assembler::notEqual, L_2TAG_PACKET_29_0_2); - jmp(L_2TAG_PACKET_28_0_2); - - bind(L_2TAG_PACKET_25_0_2); - shrl(ecx, 20); - andl(ecx, 2047); - cmpl(ecx, 1075); - jcc(Assembler::above, L_2TAG_PACKET_28_0_2); - jcc(Assembler::equal, L_2TAG_PACKET_30_0_2); - cmpl(ecx, 1074); - jcc(Assembler::above, L_2TAG_PACKET_27_0_2); - cmpl(ecx, 1023); - jcc(Assembler::below, L_2TAG_PACKET_28_0_2); - movsd(xmm1, Address(rsp, 16)); - movl(eax, 17208); - xorpd(xmm3, xmm3); - pinsrw(xmm3, eax, 3); - movdqu(xmm4, xmm3); - addsd(xmm3, xmm1); - subsd(xmm4, xmm3); - addsd(xmm1, xmm4); - pextrw(eax, xmm1, 3); - andl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_28_0_2); - movdl(eax, xmm3); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_28_0_2); - - bind(L_2TAG_PACKET_29_0_2); - movsd(xmm1, Address(rsp, 16)); - pextrw(eax, xmm1, 3); - andl(eax, 32768); - jcc(Assembler::equal, L_2TAG_PACKET_18_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32768); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_28_0_2); - movsd(xmm1, Address(rsp, 16)); - pextrw(eax, xmm1, 3); - andl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_26_0_2); - - bind(L_2TAG_PACKET_31_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32752); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_30_0_2); - movsd(xmm1, Address(rsp, 16)); - movdl(eax, xmm1); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_28_0_2); - jmp(L_2TAG_PACKET_29_0_2); - - bind(L_2TAG_PACKET_32_0_2); - movdl(eax, xmm1); - psrlq(xmm1, 20); - movdl(edx, xmm1); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_33_0_2); - movsd(xmm0, Address(rsp, 16)); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_33_0_2); - movsd(xmm0, Address(rsp, 8)); - pextrw(eax, xmm0, 3); - cmpl(eax, 49136); - jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2); - movdl(ecx, xmm0); - psrlq(xmm0, 20); - movdl(edx, xmm0); - orl(ecx, edx); - jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32760); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_34_0_2); - movsd(xmm1, Address(rsp, 16)); - andl(eax, 32752); - subl(eax, 16368); - pextrw(edx, xmm1, 3); - xorpd(xmm0, xmm0); - xorl(eax, edx); - andl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2); - movl(ecx, 32752); - pinsrw(xmm0, ecx, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_35_0_2); - movdl(eax, xmm1); - cmpl(edx, 17184); - jcc(Assembler::above, L_2TAG_PACKET_36_0_2); - testl(eax, 1); - jcc(Assembler::notEqual, L_2TAG_PACKET_37_0_2); - testl(eax, 2); - jcc(Assembler::equal, L_2TAG_PACKET_38_0_2); - jmp(L_2TAG_PACKET_39_0_2); - - bind(L_2TAG_PACKET_36_0_2); - testl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_38_0_2); - jmp(L_2TAG_PACKET_39_0_2); - - bind(L_2TAG_PACKET_9_0_2); - movsd(xmm2, Address(rsp, 8)); - movdl(eax, xmm2); - psrlq(xmm2, 31); - movdl(ecx, xmm2); - orl(eax, ecx); - jcc(Assembler::equal, L_2TAG_PACKET_11_0_2); - movsd(xmm1, Address(rsp, 16)); - pextrw(edx, xmm1, 3); - movdl(eax, xmm1); - movdqu(xmm2, xmm1); - psrlq(xmm2, 32); - movdl(ecx, xmm2); - addl(ecx, ecx); - orl(ecx, eax); - jcc(Assembler::equal, L_2TAG_PACKET_40_0_2); - andl(edx, 32752); - cmpl(edx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_32_0_2); - cmpl(edx, 17200); - jcc(Assembler::above, L_2TAG_PACKET_38_0_2); - cmpl(edx, 17184); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_35_0_2); - cmpl(edx, 16368); - jcc(Assembler::below, L_2TAG_PACKET_37_0_2); - movl(eax, 17208); - xorpd(xmm2, xmm2); - pinsrw(xmm2, eax, 3); - movdqu(xmm4, xmm2); - addsd(xmm2, xmm1); - subsd(xmm4, xmm2); - addsd(xmm1, xmm4); - pextrw(eax, xmm1, 3); - andl(eax, 32767); - jcc(Assembler::notEqual, L_2TAG_PACKET_37_0_2); - movdl(eax, xmm2); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_38_0_2); - - bind(L_2TAG_PACKET_39_0_2); - xorpd(xmm1, xmm1); - movl(edx, 30704); - pinsrw(xmm1, edx, 3); - movsd(xmm2, Address(tmp, 8256)); - movsd(xmm4, Address(rsp, 8)); - pextrw(eax, xmm4, 3); - movl(edx, 8192); - movdl(xmm4, edx); - andl(eax, 32767); - subl(eax, 16); - jcc(Assembler::less, L_2TAG_PACKET_12_0_2); - movl(edx, eax); - andl(edx, 32752); - subl(edx, 16368); - movl(ecx, edx); - sarl(edx, 31); - addl(ecx, edx); - xorl(ecx, edx); - addl(ecx, 16); - bsrl(ecx, ecx); - movl(rsi, INT_MIN); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_37_0_2); - xorpd(xmm1, xmm1); - movl(eax, 32752); - pinsrw(xmm1, eax, 3); - xorpd(xmm0, xmm0); - mulsd(xmm0, xmm1); - movl(edx, 28); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_38_0_2); - xorpd(xmm1, xmm1); - movl(edx, 30704); - pinsrw(xmm1, edx, 3); - movsd(xmm2, Address(tmp, 8256)); - movsd(xmm4, Address(rsp, 8)); - pextrw(eax, xmm4, 3); - movl(edx, 8192); - movdl(xmm4, edx); - andl(eax, 32767); - subl(eax, 16); - jcc(Assembler::less, L_2TAG_PACKET_10_0_2); - movl(edx, eax); - andl(edx, 32752); - subl(edx, 16368); - movl(ecx, edx); - sarl(edx, 31); - addl(ecx, edx); - xorl(ecx, edx); - addl(ecx, 16); - bsrl(ecx, ecx); - movl(rsi, 0); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_23_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_26_0_2); - xorpd(xmm0, xmm0); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_13_0_2); - addl(eax, 384); - cmpl(eax, 0); - jcc(Assembler::less, L_2TAG_PACKET_41_0_2); - mulsd(xmm5, xmm1); - addsd(xmm0, xmm7); - shrl(rsi, 31); - addpd(xmm3, xmm0); - pshufd(xmm0, xmm3, 238); - addsd(xmm3, xmm0); - movsd(xmm4, Address(tmp, rsi, Address::times_8, 12528)); - mulsd(xmm1, xmm3); - xorpd(xmm0, xmm0); - movl(eax, 16368); - shll(rsi, 15); - orl(eax, rsi); - pinsrw(xmm0, eax, 3); - addsd(xmm5, xmm1); - movl(rsi, Address(rsp, 24)); - mulsd(xmm5, xmm4); - addsd(xmm0, xmm5); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_41_0_2); - movl(rsi, Address(rsp, 24)); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_40_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_42_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - movl(edx, 26); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_11_0_2); - movsd(xmm1, Address(rsp, 16)); - movdqu(xmm2, xmm1); - pextrw(eax, xmm1, 3); - andl(eax, 32752); - cmpl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_43_0_2); - movdl(eax, xmm2); - psrlq(xmm2, 20); - movdl(edx, xmm2); - orl(eax, edx); - jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2); - - bind(L_2TAG_PACKET_43_0_2); - movdl(eax, xmm1); - psrlq(xmm1, 32); - movdl(edx, xmm1); - movl(ecx, edx); - addl(edx, edx); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_42_0_2); - shrl(edx, 21); - cmpl(edx, 1075); - jcc(Assembler::above, L_2TAG_PACKET_44_0_2); - jcc(Assembler::equal, L_2TAG_PACKET_45_0_2); - cmpl(edx, 1023); - jcc(Assembler::below, L_2TAG_PACKET_44_0_2); - movsd(xmm1, Address(rsp, 16)); - movl(eax, 17208); - xorpd(xmm3, xmm3); - pinsrw(xmm3, eax, 3); - movdqu(xmm4, xmm3); - addsd(xmm3, xmm1); - subsd(xmm4, xmm3); - addsd(xmm1, xmm4); - pextrw(eax, xmm1, 3); - andl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_44_0_2); - movdl(eax, xmm3); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_44_0_2); - - bind(L_2TAG_PACKET_46_0_2); - movsd(xmm0, Address(rsp, 8)); - testl(ecx, INT_MIN); - jcc(Assembler::notEqual, L_2TAG_PACKET_47_0_2); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_45_0_2); - movsd(xmm1, Address(rsp, 16)); - movdl(eax, xmm1); - testl(eax, 1); - jcc(Assembler::notEqual, L_2TAG_PACKET_46_0_2); - - bind(L_2TAG_PACKET_44_0_2); - testl(ecx, INT_MIN); - jcc(Assembler::equal, L_2TAG_PACKET_26_0_2); - xorpd(xmm0, xmm0); - - bind(L_2TAG_PACKET_47_0_2); - movl(eax, 16368); - xorpd(xmm1, xmm1); - pinsrw(xmm1, eax, 3); - divsd(xmm1, xmm0); - movdqu(xmm0, xmm1); - movl(edx, 27); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_14_0_2); - movsd(xmm2, Address(rsp, 8)); - movsd(xmm6, Address(rsp, 16)); - pextrw(eax, xmm2, 3); - pextrw(edx, xmm6, 3); - movl(ecx, 32752); - andl(ecx, edx); - cmpl(ecx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_48_0_2); - andl(eax, 32752); - subl(eax, 16368); - xorl(edx, eax); - testl(edx, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_49_0_2); - - bind(L_2TAG_PACKET_50_0_2); - movl(eax, 32736); - pinsrw(xmm0, eax, 3); - shrl(rsi, 16); - orl(eax, rsi); - pinsrw(xmm1, eax, 3); - movl(rsi, Address(rsp, 24)); - mulsd(xmm0, xmm1); - - bind(L_2TAG_PACKET_17_0_2); - movl(edx, 24); - - bind(L_2TAG_PACKET_21_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_49_0_2); - movl(eax, 16); - pinsrw(xmm0, eax, 3); - mulsd(xmm0, xmm0); - testl(rsi, INT_MIN); - jcc(Assembler::equal, L_2TAG_PACKET_51_0_2); - movsd(xmm2, Address(tmp, 12560)); - xorpd(xmm0, xmm2); - - bind(L_2TAG_PACKET_51_0_2); - movl(rsi, Address(rsp, 24)); - movl(edx, 25); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_16_0_2); - pextrw(ecx, xmm5, 3); - pextrw(edx, xmm4, 3); - movl(eax, -1); - andl(ecx, 32752); - subl(ecx, 16368); - andl(edx, 32752); - addl(edx, ecx); - movl(ecx, -31); - sarl(edx, 4); - subl(ecx, edx); - jcc(Assembler::lessEqual, L_2TAG_PACKET_52_0_2); - cmpl(ecx, 20); - jcc(Assembler::above, L_2TAG_PACKET_53_0_2); - shll(eax); - - bind(L_2TAG_PACKET_52_0_2); - movdl(xmm0, eax); - psllq(xmm0, 32); - pand(xmm0, xmm5); - subsd(xmm5, xmm0); - addsd(xmm5, xmm1); - mulsd(xmm0, xmm4); - mulsd(xmm5, xmm4); - addsd(xmm0, xmm5); - - bind(L_2TAG_PACKET_53_0_2); - movl(edx, 25); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movzwl(ecx, Address(rsp, 22)); - movl(edx, INT_MIN); - movdl(xmm1, edx); - xorpd(xmm7, xmm7); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - paddq(xmm1, xmm3); - pand(xmm5, xmm1); - andl(ecx, 32752); - cmpl(ecx, 16560); - jcc(Assembler::below, L_2TAG_PACKET_3_0_2); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - addl(eax, 16351); - shrl(eax, 4); - subl(eax, 1022); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - movsd(xmm4, Address(tmp, 0)); - mulsd(xmm3, xmm0); - movsd(xmm6, Address(tmp, 0)); - subsd(xmm5, xmm2); - movsd(xmm1, Address(tmp, 8)); - pshufd(xmm2, xmm3, 68); - unpcklpd(xmm5, xmm3); - addsd(xmm3, xmm5); - movsd(xmm0, Address(tmp, 8)); - andl(edx, 16760832); - shrl(edx, 10); - addpd(xmm7, Address(tmp, edx, Address::times_1, -3616)); - mulsd(xmm4, xmm5); - mulsd(xmm0, xmm5); - mulsd(xmm6, xmm2); - mulsd(xmm1, xmm2); - movdqu(xmm2, xmm5); - mulsd(xmm4, xmm5); - addsd(xmm5, xmm0); - movdqu(xmm0, xmm7); - addsd(xmm2, xmm3); - addsd(xmm7, xmm5); - mulsd(xmm6, xmm2); - subsd(xmm0, xmm7); - movdqu(xmm2, xmm7); - addsd(xmm7, xmm4); - addsd(xmm0, xmm5); - subsd(xmm2, xmm7); - addsd(xmm4, xmm2); - pshufd(xmm2, xmm5, 238); - movdqu(xmm5, xmm7); - addsd(xmm7, xmm2); - addsd(xmm4, xmm0); - movdqu(xmm0, Address(tmp, 8272)); - subsd(xmm5, xmm7); - addsd(xmm6, xmm4); - movdqu(xmm4, xmm7); - addsd(xmm5, xmm2); - addsd(xmm7, xmm1); - movdqu(xmm2, Address(tmp, 8336)); - subsd(xmm4, xmm7); - addsd(xmm6, xmm5); - addsd(xmm4, xmm1); - pshufd(xmm5, xmm7, 238); - movdqu(xmm1, xmm7); - addsd(xmm7, xmm5); - subsd(xmm1, xmm7); - addsd(xmm1, xmm5); - movdqu(xmm5, Address(tmp, 8352)); - pshufd(xmm3, xmm3, 68); - addsd(xmm6, xmm4); - addsd(xmm6, xmm1); - movdqu(xmm1, Address(tmp, 8304)); - mulpd(xmm0, xmm3); - mulpd(xmm2, xmm3); - pshufd(xmm4, xmm3, 68); - mulpd(xmm3, xmm3); - addpd(xmm0, xmm1); - addpd(xmm5, xmm2); - mulsd(xmm4, xmm3); - movsd(xmm2, Address(tmp, 16)); - mulpd(xmm3, xmm3); - movsd(xmm1, Address(rsp, 16)); - movzwl(ecx, Address(rsp, 22)); - mulpd(xmm0, xmm4); - pextrw(eax, xmm7, 3); - mulpd(xmm5, xmm4); - mulpd(xmm0, xmm3); - movsd(xmm4, Address(tmp, 8376)); - pand(xmm2, xmm7); - addsd(xmm5, xmm6); - subsd(xmm7, xmm2); - addpd(xmm5, xmm0); - andl(eax, 32752); - subl(eax, 16368); - andl(ecx, 32752); - cmpl(ecx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_48_0_2); - addl(ecx, eax); - cmpl(ecx, 16576); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_54_0_2); - pshufd(xmm0, xmm5, 238); - pand(xmm4, xmm1); - movdqu(xmm3, xmm1); - addsd(xmm5, xmm0); - subsd(xmm1, xmm4); - xorpd(xmm6, xmm6); - movl(edx, 17080); - pinsrw(xmm6, edx, 3); - addsd(xmm7, xmm5); - mulsd(xmm4, xmm2); - mulsd(xmm1, xmm2); - movdqu(xmm5, xmm6); - mulsd(xmm3, xmm7); - addsd(xmm6, xmm4); - addsd(xmm1, xmm3); - movdqu(xmm7, Address(tmp, 12480)); - movdl(edx, xmm6); - subsd(xmm6, xmm5); - movdqu(xmm3, Address(tmp, 12496)); - movsd(xmm2, Address(tmp, 12512)); - subsd(xmm4, xmm6); - movl(ecx, edx); - andl(edx, 255); - addl(edx, edx); - movdqu(xmm5, Address(tmp, edx, Address::times_8, 8384)); - addsd(xmm4, xmm1); - pextrw(edx, xmm6, 3); - shrl(ecx, 8); - movl(eax, ecx); - shrl(ecx, 1); - subl(eax, ecx); - shll(ecx, 20); - movdl(xmm6, ecx); - pshufd(xmm0, xmm4, 68); - pshufd(xmm1, xmm4, 68); - mulpd(xmm0, xmm0); - mulpd(xmm7, xmm1); - pshufd(xmm6, xmm6, 17); - mulsd(xmm2, xmm4); - andl(edx, 32767); - cmpl(edx, 16529); - jcc(Assembler::above, L_2TAG_PACKET_14_0_2); - mulsd(xmm0, xmm0); - paddd(xmm5, xmm6); - addpd(xmm3, xmm7); - mulsd(xmm2, xmm5); - pshufd(xmm6, xmm5, 238); - mulpd(xmm0, xmm3); - addsd(xmm2, xmm6); - pshufd(xmm3, xmm0, 238); - addl(eax, 1023); - shll(eax, 20); - orl(eax, rsi); - movdl(xmm4, eax); - mulsd(xmm0, xmm5); - mulsd(xmm3, xmm5); - addsd(xmm0, xmm2); - psllq(xmm4, 32); - addsd(xmm0, xmm3); - movdqu(xmm1, xmm0); - addsd(xmm0, xmm5); - movl(rsi, Address(rsp, 24)); - mulsd(xmm0, xmm4); - pextrw(eax, xmm0, 3); - andl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_16_0_2); - cmpl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_17_0_2); - - bind(L_2TAG_PACKET_55_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_48_0_2); - movl(rsi, Address(rsp, 24)); - - bind(L_2TAG_PACKET_56_0_2); - movsd(xmm0, Address(rsp, 8)); - movsd(xmm1, Address(rsp, 16)); - addsd(xmm1, xmm1); - xorpd(xmm2, xmm2); - movl(eax, 49136); - pinsrw(xmm2, eax, 3); - addsd(xmm2, xmm0); - pextrw(eax, xmm2, 3); - cmpl(eax, 0); - jcc(Assembler::notEqual, L_2TAG_PACKET_57_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32760); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_57_0_2); - movdl(edx, xmm1); - movdqu(xmm3, xmm1); - psrlq(xmm3, 20); - movdl(ecx, xmm3); - orl(ecx, edx); - jcc(Assembler::equal, L_2TAG_PACKET_58_0_2); - addsd(xmm1, xmm1); - movdqu(xmm0, xmm1); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_58_0_2); - pextrw(eax, xmm0, 3); - andl(eax, 32752); - pextrw(edx, xmm1, 3); - xorpd(xmm0, xmm0); - subl(eax, 16368); - xorl(eax, edx); - testl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2); - movl(edx, 32752); - pinsrw(xmm0, edx, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_54_0_2); - pextrw(eax, xmm1, 3); - pextrw(ecx, xmm2, 3); - xorl(eax, ecx); - testl(eax, 32768); - jcc(Assembler::equal, L_2TAG_PACKET_50_0_2); - jmp(L_2TAG_PACKET_49_0_2); - - bind(L_2TAG_PACKET_6_0_2); - movl(tmp, Address(rsp, 64)); - -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp deleted file mode 100644 index cd593ba3356..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp +++ /dev/null @@ -1,1742 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "stubRoutines_x86.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - SIN() -// --------------------- -// -// 1. RANGE REDUCTION -// -// We perform an initial range reduction from X to r with -// -// X =~= N * pi/32 + r -// -// so that |r| <= pi/64 + epsilon. We restrict inputs to those -// where |N| <= 932560. Beyond this, the range reduction is -// insufficiently accurate. For extremely small inputs, -// denormalization can occur internally, impacting performance. -// This means that the main path is actually only taken for -// 2^-252 <= |X| < 90112. -// -// To avoid branches, we perform the range reduction to full -// accuracy each time. -// -// X - N * (P_1 + P_2 + P_3) -// -// where P_1 and P_2 are 32-bit numbers (so multiplication by N -// is exact) and P_3 is a 53-bit number. Together, these -// approximate pi well enough for all cases in the restricted -// range. -// -// The main reduction sequence is: -// -// y = 32/pi * x -// N = integer(y) -// (computed by adding and subtracting off SHIFTER) -// -// m_1 = N * P_1 -// m_2 = N * P_2 -// r_1 = x - m_1 -// r = r_1 - m_2 -// (this r can be used for most of the calculation) -// -// c_1 = r_1 - r -// m_3 = N * P_3 -// c_2 = c_1 - m_2 -// c = c_2 - m_3 -// -// 2. MAIN ALGORITHM -// -// The algorithm uses a table lookup based on B = M * pi / 32 -// where M = N mod 64. The stored values are: -// sigma closest power of 2 to cos(B) -// C_hl 53-bit cos(B) - sigma -// S_hi + S_lo 2 * 53-bit sin(B) -// -// The computation is organized as follows: -// -// sin(B + r + c) = [sin(B) + sigma * r] + -// r * (cos(B) - sigma) + -// sin(B) * [cos(r + c) - 1] + -// cos(B) * [sin(r + c) - r] -// -// which is approximately: -// -// [S_hi + sigma * r] + -// C_hl * r + -// S_lo + S_hi * [(cos(r) - 1) - r * c] + -// (C_hl + sigma) * [(sin(r) - r) + c] -// -// and this is what is actually computed. We separate this sum -// into four parts: -// -// hi + med + pols + corr -// -// where -// -// hi = S_hi + sigma r -// med = C_hl * r -// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) -// corr = S_lo + c * ((C_hl + sigma) - S_hi * r) -// -// 3. POLYNOMIAL -// -// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * -// (sin(r) - r) can be rearranged freely, since it is quite -// small, so we exploit parallelism to the fullest. -// -// psc4 = SC_4 * r_1 -// msc4 = psc4 * r -// r2 = r * r -// msc2 = SC_2 * r2 -// r4 = r2 * r2 -// psc3 = SC_3 + msc4 -// psc1 = SC_1 + msc2 -// msc3 = r4 * psc3 -// sincospols = psc1 + msc3 -// pols = sincospols * -// -// -// 4. CORRECTION TERM -// -// This is where the "c" component of the range reduction is -// taken into account; recall that just "r" is used for most of -// the calculation. -// -// -c = m_3 - c_2 -// -d = S_hi * r - (C_hl + sigma) -// corr = -c * -d + S_lo -// -// 5. COMPENSATED SUMMATIONS -// -// The two successive compensated summations add up the high -// and medium parts, leaving just the low parts to add up at -// the end. -// -// rs = sigma * r -// res_int = S_hi + rs -// k_0 = S_hi - res_int -// k_2 = k_0 + rs -// med = C_hl * r -// res_hi = res_int + med -// k_1 = res_int - res_hi -// k_3 = k_1 + med -// -// 6. FINAL SUMMATION -// -// We now add up all the small parts: -// -// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 -// -// Now the overall result is just: -// -// res_hi + res_lo -// -// 7. SMALL ARGUMENTS -// -// If |x| < SNN (SNN meaning the smallest normal number), we -// simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we -// do 2^-55 * (2^55 * x - x). -// -// Special cases: -// sin(NaN) = quiet NaN, and raise invalid exception -// sin(INF) = NaN and raise invalid exception -// sin(+/-0) = +/-0 -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant -ATTRIBUTE_ALIGNED(8) static const juint _zero_none[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint __4onpi_d[] = -{ - 0x6dc9c883UL, 0x3ff45f30UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _TWO_32H[] = -{ - 0x00000000UL, 0x41f80000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _pi04_3d[] = -{ - 0x54442d00UL, 0x3fe921fbUL, 0x98cc5180UL, 0x3ce84698UL, 0xcbb5bf6cUL, - 0xb9dfc8f8UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _pi04_5d[] = -{ - 0x54400000UL, 0x3fe921fbUL, 0x1a600000UL, 0x3dc0b461UL, 0x2e000000UL, - 0x3b93198aUL, 0x25200000UL, 0x396b839aUL, 0x533e63a0UL, 0x37027044UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _SCALE[] = -{ - 0x00000000UL, 0x32600000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _zeros[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _pi04_2d[] = -{ - 0x54400000UL, 0x3fe921fbUL, 0x1a626331UL, 0x3dc0b461UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _TWO_12H[] = -{ - 0x00000000UL, 0x40b80000UL -}; - -ATTRIBUTE_ALIGNED(2) static const jushort __4onpi_31l[] = -{ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x836e, 0xa2f9, - 0x40d8, 0x0000, 0x0000, 0x0000, 0x2a50, 0x9c88, 0x40b7, 0x0000, 0x0000, 0x0000, - 0xabe8, 0xfe13, 0x4099, 0x0000, 0x0000, 0x0000, 0x6ee0, 0xfa9a, 0x4079, 0x0000, - 0x0000, 0x0000, 0x9580, 0xdb62, 0x4058, 0x0000, 0x0000, 0x0000, 0x1c82, 0xc9e2, - 0x403d, 0x0000, 0x0000, 0x0000, 0xb1c0, 0xff28, 0x4019, 0x0000, 0x0000, 0x0000, - 0xef14, 0xaf7a, 0x3ffe, 0x0000, 0x0000, 0x0000, 0x48dc, 0xc36e, 0x3fdf, 0x0000, - 0x0000, 0x0000, 0x3740, 0xe909, 0x3fbe, 0x0000, 0x0000, 0x0000, 0x924a, 0xb801, - 0x3fa2, 0x0000, 0x0000, 0x0000, 0x3a32, 0xdd41, 0x3f83, 0x0000, 0x0000, 0x0000, - 0x8778, 0x873f, 0x3f62, 0x0000, 0x0000, 0x0000, 0x1298, 0xb1cb, 0x3f44, 0x0000, - 0x0000, 0x0000, 0xa208, 0x9cfb, 0x3f26, 0x0000, 0x0000, 0x0000, 0xbaec, 0xd7d4, - 0x3f06, 0x0000, 0x0000, 0x0000, 0xd338, 0x8909, 0x3ee7, 0x0000, 0x0000, 0x0000, - 0x68b8, 0xe04d, 0x3ec7, 0x0000, 0x0000, 0x0000, 0x4e64, 0xdf90, 0x3eaa, 0x0000, - 0x0000, 0x0000, 0xc1a8, 0xeb1c, 0x3e89, 0x0000, 0x0000, 0x0000, 0x2720, 0xce7d, - 0x3e6a, 0x0000, 0x0000, 0x0000, 0x77b8, 0x8bf1, 0x3e4b, 0x0000, 0x0000, 0x0000, - 0xec7e, 0xe4a0, 0x3e2e, 0x0000, 0x0000, 0x0000, 0xffbc, 0xf12f, 0x3e0f, 0x0000, - 0x0000, 0x0000, 0xfdc0, 0xb301, 0x3deb, 0x0000, 0x0000, 0x0000, 0xc5ac, 0x9788, - 0x3dd1, 0x0000, 0x0000, 0x0000, 0x47da, 0x829b, 0x3db2, 0x0000, 0x0000, 0x0000, - 0xd9e4, 0xa6cf, 0x3d93, 0x0000, 0x0000, 0x0000, 0x36e8, 0xf961, 0x3d73, 0x0000, - 0x0000, 0x0000, 0xf668, 0xf463, 0x3d54, 0x0000, 0x0000, 0x0000, 0x5168, 0xf2ff, - 0x3d35, 0x0000, 0x0000, 0x0000, 0x758e, 0xea4f, 0x3d17, 0x0000, 0x0000, 0x0000, - 0xf17a, 0xebe5, 0x3cf8, 0x0000, 0x0000, 0x0000, 0x9cfa, 0x9e83, 0x3cd9, 0x0000, - 0x0000, 0x0000, 0xa4ba, 0xe294, 0x3cba, 0x0000, 0x0000, 0x0000, 0xd7ec, 0x9afe, - 0x3c9a, 0x0000, 0x0000, 0x0000, 0xae80, 0x8fc6, 0x3c79, 0x0000, 0x0000, 0x0000, - 0x3304, 0x8560, 0x3c5c, 0x0000, 0x0000, 0x0000, 0x6d70, 0xdf8f, 0x3c3b, 0x0000, - 0x0000, 0x0000, 0x3ef0, 0xafc3, 0x3c1e, 0x0000, 0x0000, 0x0000, 0xd0d8, 0x826b, - 0x3bfe, 0x0000, 0x0000, 0x0000, 0x1c80, 0xed4f, 0x3bdd, 0x0000, 0x0000, 0x0000, - 0x730c, 0xb0af, 0x3bc1, 0x0000, 0x0000, 0x0000, 0x6660, 0xc219, 0x3ba2, 0x0000, - 0x0000, 0x0000, 0x940c, 0xabe2, 0x3b83, 0x0000, 0x0000, 0x0000, 0xdffc, 0x8408, - 0x3b64, 0x0000, 0x0000, 0x0000, 0x6b98, 0xc402, 0x3b45, 0x0000, 0x0000, 0x0000, - 0x1818, 0x9cc4, 0x3b26, 0x0000, 0x0000, 0x0000, 0x5390, 0xaab6, 0x3b05, 0x0000, - 0x0000, 0x0000, 0xb070, 0xd464, 0x3ae9, 0x0000, 0x0000, 0x0000, 0x231a, 0x9ef0, - 0x3aca, 0x0000, 0x0000, 0x0000, 0x0670, 0xd1f1, 0x3aaa, 0x0000, 0x0000, 0x0000, - 0x7738, 0xd9f3, 0x3a8a, 0x0000, 0x0000, 0x0000, 0xa834, 0x8092, 0x3a6c, 0x0000, - 0x0000, 0x0000, 0xb45c, 0xce23, 0x3a4d, 0x0000, 0x0000, 0x0000, 0x36e8, 0xb0e5, - 0x3a2d, 0x0000, 0x0000, 0x0000, 0xd156, 0xaf44, 0x3a10, 0x0000, 0x0000, 0x0000, - 0x9f52, 0x8c82, 0x39f1, 0x0000, 0x0000, 0x0000, 0x829c, 0xff83, 0x39d1, 0x0000, - 0x0000, 0x0000, 0x7d06, 0xefc6, 0x39b3, 0x0000, 0x0000, 0x0000, 0x93e0, 0xb0b7, - 0x3992, 0x0000, 0x0000, 0x0000, 0xedde, 0xc193, 0x3975, 0x0000, 0x0000, 0x0000, - 0xbbc0, 0xcf49, 0x3952, 0x0000, 0x0000, 0x0000, 0xbdf0, 0xd63c, 0x3937, 0x0000, - 0x0000, 0x0000, 0x1f34, 0x9f3a, 0x3918, 0x0000, 0x0000, 0x0000, 0x3f8e, 0xe579, - 0x38f9, 0x0000, 0x0000, 0x0000, 0x90c8, 0xc3f8, 0x38d9, 0x0000, 0x0000, 0x0000, - 0x48c0, 0xf8f8, 0x38b7, 0x0000, 0x0000, 0x0000, 0xed56, 0xafa6, 0x389c, 0x0000, - 0x0000, 0x0000, 0x8218, 0xb969, 0x387d, 0x0000, 0x0000, 0x0000, 0x1852, 0xec57, - 0x385e, 0x0000, 0x0000, 0x0000, 0x670c, 0xd674, 0x383e, 0x0000, 0x0000, 0x0000, - 0xad40, 0xc2c4, 0x3820, 0x0000, 0x0000, 0x0000, 0x2e80, 0xa696, 0x3801, 0x0000, - 0x0000, 0x0000, 0xd800, 0xc467, 0x37dc, 0x0000, 0x0000, 0x0000, 0x3c72, 0xc5ae, - 0x37c3, 0x0000, 0x0000, 0x0000, 0xb006, 0xac69, 0x37a4, 0x0000, 0x0000, 0x0000, - 0x34a0, 0x8cdf, 0x3782, 0x0000, 0x0000, 0x0000, 0x9ed2, 0xd25e, 0x3766, 0x0000, - 0x0000, 0x0000, 0x6fec, 0xaaaa, 0x3747, 0x0000, 0x0000, 0x0000, 0x6040, 0xfb5c, - 0x3726, 0x0000, 0x0000, 0x0000, 0x764c, 0xa3fc, 0x3708, 0x0000, 0x0000, 0x0000, - 0xb254, 0x954e, 0x36e9, 0x0000, 0x0000, 0x0000, 0x3e1c, 0xf5dc, 0x36ca, 0x0000, - 0x0000, 0x0000, 0x7b06, 0xc635, 0x36ac, 0x0000, 0x0000, 0x0000, 0xa8ba, 0xd738, - 0x368d, 0x0000, 0x0000, 0x0000, 0x06cc, 0xb24e, 0x366d, 0x0000, 0x0000, 0x0000, - 0x7108, 0xac76, 0x364f, 0x0000, 0x0000, 0x0000, 0x2324, 0xa7cb, 0x3630, 0x0000, - 0x0000, 0x0000, 0xac40, 0xef15, 0x360f, 0x0000, 0x0000, 0x0000, 0xae46, 0xd516, - 0x35f2, 0x0000, 0x0000, 0x0000, 0x615e, 0xe003, 0x35d3, 0x0000, 0x0000, 0x0000, - 0x0cf0, 0xefe7, 0x35b1, 0x0000, 0x0000, 0x0000, 0xfb50, 0xf98c, 0x3595, 0x0000, - 0x0000, 0x0000, 0x0abc, 0xf333, 0x3575, 0x0000, 0x0000, 0x0000, 0xdd60, 0xca3f, - 0x3555, 0x0000, 0x0000, 0x0000, 0x7eb6, 0xd87f, 0x3538, 0x0000, 0x0000, 0x0000, - 0x44f4, 0xb291, 0x3519, 0x0000, 0x0000, 0x0000, 0xff80, 0xc982, 0x34f6, 0x0000, - 0x0000, 0x0000, 0x9de0, 0xd9b8, 0x34db, 0x0000, 0x0000, 0x0000, 0xcd42, 0x9366, - 0x34bc, 0x0000, 0x0000, 0x0000, 0xbef0, 0xfaee, 0x349d, 0x0000, 0x0000, 0x0000, - 0xdac4, 0xb6f1, 0x347d, 0x0000, 0x0000, 0x0000, 0xf140, 0x94de, 0x345d, 0x0000, - 0x0000, 0x0000, 0xa218, 0x8b4b, 0x343e, 0x0000, 0x0000, 0x0000, 0x6380, 0xa135, - 0x341e, 0x0000, 0x0000, 0x0000, 0xb184, 0x8cb2, 0x3402, 0x0000, 0x0000, 0x0000, - 0x196e, 0xdc61, 0x33e3, 0x0000, 0x0000, 0x0000, 0x0c00, 0xde05, 0x33c4, 0x0000, - 0x0000, 0x0000, 0xef9a, 0xbd38, 0x33a5, 0x0000, 0x0000, 0x0000, 0xc1a0, 0xdf00, - 0x3385, 0x0000, 0x0000, 0x0000, 0x1090, 0x9973, 0x3365, 0x0000, 0x0000, 0x0000, - 0x4882, 0x8301, 0x3348, 0x0000, 0x0000, 0x0000, 0x7abe, 0xadc7, 0x3329, 0x0000, - 0x0000, 0x0000, 0x7cba, 0xec2b, 0x330a, 0x0000, 0x0000, 0x0000, 0xa520, 0x8f21, - 0x32e9, 0x0000, 0x0000, 0x0000, 0x710c, 0x8d36, 0x32cc, 0x0000, 0x0000, 0x0000, - 0x5212, 0xc6ed, 0x32ad, 0x0000, 0x0000, 0x0000, 0x7308, 0xfd76, 0x328d, 0x0000, - 0x0000, 0x0000, 0x5014, 0xd548, 0x326f, 0x0000, 0x0000, 0x0000, 0xd3f2, 0xb499, - 0x3250, 0x0000, 0x0000, 0x0000, 0x7f74, 0xa606, 0x3230, 0x0000, 0x0000, 0x0000, - 0xf0a8, 0xd720, 0x3212, 0x0000, 0x0000, 0x0000, 0x185c, 0xe20f, 0x31f2, 0x0000, - 0x0000, 0x0000, 0xa5a8, 0x8738, 0x31d4, 0x0000, 0x0000, 0x0000, 0xdd74, 0xcafb, - 0x31b4, 0x0000, 0x0000, 0x0000, 0x98b6, 0xbd8e, 0x3196, 0x0000, 0x0000, 0x0000, - 0xe9de, 0x977f, 0x3177, 0x0000, 0x0000, 0x0000, 0x67c0, 0x818d, 0x3158, 0x0000, - 0x0000, 0x0000, 0xe52a, 0x9322, 0x3139, 0x0000, 0x0000, 0x0000, 0xe568, 0x9b6c, - 0x3119, 0x0000, 0x0000, 0x0000, 0x2358, 0xaa0a, 0x30fa, 0x0000, 0x0000, 0x0000, - 0xe480, 0xe13b, 0x30d9, 0x0000, 0x0000, 0x0000, 0x3024, 0x90a1, 0x30bd, 0x0000, - 0x0000, 0x0000, 0x9620, 0xda30, 0x309d, 0x0000, 0x0000, 0x0000, 0x898a, 0xb388, - 0x307f, 0x0000, 0x0000, 0x0000, 0xb24c, 0xc891, 0x3060, 0x0000, 0x0000, 0x0000, - 0x8056, 0xf98b, 0x3041, 0x0000, 0x0000, 0x0000, 0x72a4, 0xa1ea, 0x3021, 0x0000, - 0x0000, 0x0000, 0x6af8, 0x9488, 0x3001, 0x0000, 0x0000, 0x0000, 0xe00c, 0xdfcb, - 0x2fe4, 0x0000, 0x0000, 0x0000, 0xeeec, 0xc941, 0x2fc4, 0x0000, 0x0000, 0x0000, - 0x53e0, 0xe70f, 0x2fa4, 0x0000, 0x0000, 0x0000, 0x8f60, 0x9c07, 0x2f85, 0x0000, - 0x0000, 0x0000, 0xb328, 0xc3e7, 0x2f68, 0x0000, 0x0000, 0x0000, 0x9404, 0xf8c7, - 0x2f48, 0x0000, 0x0000, 0x0000, 0x38e0, 0xc99f, 0x2f29, 0x0000, 0x0000, 0x0000, - 0x9778, 0xd984, 0x2f09, 0x0000, 0x0000, 0x0000, 0xe700, 0xd142, 0x2eea, 0x0000, - 0x0000, 0x0000, 0xd904, 0x9443, 0x2ecd, 0x0000, 0x0000, 0x0000, 0xd4ba, 0xae7e, - 0x2eae, 0x0000, 0x0000, 0x0000, 0x8e5e, 0x8524, 0x2e8f, 0x0000, 0x0000, 0x0000, - 0xb550, 0xc9ed, 0x2e6e, 0x0000, 0x0000, 0x0000, 0x53b8, 0x8648, 0x2e51, 0x0000, - 0x0000, 0x0000, 0xdae4, 0x87f9, 0x2e32, 0x0000, 0x0000, 0x0000, 0x2942, 0xd966, - 0x2e13, 0x0000, 0x0000, 0x0000, 0x4f28, 0xcf3c, 0x2df3, 0x0000, 0x0000, 0x0000, - 0xfa40, 0xc4ef, 0x2dd1, 0x0000, 0x0000, 0x0000, 0x4424, 0xbca7, 0x2db5, 0x0000, - 0x0000, 0x0000, 0x2e62, 0xcdc5, 0x2d97, 0x0000, 0x0000, 0x0000, 0xed88, 0x996b, - 0x2d78, 0x0000, 0x0000, 0x0000, 0x7c30, 0xd97d, 0x2d56, 0x0000, 0x0000, 0x0000, - 0xed26, 0xbf6e, 0x2d3a, 0x0000, 0x0000, 0x0000, 0x2918, 0x921b, 0x2d1a, 0x0000, - 0x0000, 0x0000, 0x4e24, 0xe84e, 0x2cfb, 0x0000, 0x0000, 0x0000, 0x6dc0, 0x92ec, - 0x2cdd, 0x0000, 0x0000, 0x0000, 0x4f2c, 0xacf8, 0x2cbd, 0x0000, 0x0000, 0x0000, - 0xc634, 0xf094, 0x2c9e, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe5d3, 0x2c7e, 0x0000, - 0x0000, 0x0000, 0x2180, 0xa600, 0x2c5b, 0x0000, 0x0000, 0x0000, 0x8480, 0xd680, - 0x2c3c, 0x0000, 0x0000, 0x0000, 0x8b24, 0xd63b, 0x2c22, 0x0000, 0x0000, 0x0000, - 0x02e0, 0xaa47, 0x2c00, 0x0000, 0x0000, 0x0000, 0x9ad0, 0xee84, 0x2be3, 0x0000, - 0x0000, 0x0000, 0xf7dc, 0xf699, 0x2bc6, 0x0000, 0x0000, 0x0000, 0xddde, 0xe490, - 0x2ba7, 0x0000, 0x0000, 0x0000, 0x34a0, 0xb4fd, 0x2b85, 0x0000, 0x0000, 0x0000, - 0x91b4, 0x8ef6, 0x2b68, 0x0000, 0x0000, 0x0000, 0xa3e0, 0xa2a7, 0x2b47, 0x0000, - 0x0000, 0x0000, 0xcce4, 0x82b3, 0x2b2a, 0x0000, 0x0000, 0x0000, 0xe4be, 0x8207, - 0x2b0c, 0x0000, 0x0000, 0x0000, 0x1d92, 0xab43, 0x2aed, 0x0000, 0x0000, 0x0000, - 0xe818, 0xf9f6, 0x2acd, 0x0000, 0x0000, 0x0000, 0xff12, 0xba80, 0x2aaf, 0x0000, - 0x0000, 0x0000, 0x5254, 0x8529, 0x2a90, 0x0000, 0x0000, 0x0000, 0x1b88, 0xe032, - 0x2a71, 0x0000, 0x0000, 0x0000, 0x3248, 0xd86d, 0x2a50, 0x0000, 0x0000, 0x0000, - 0x3140, 0xc9d5, 0x2a2e, 0x0000, 0x0000, 0x0000, 0x14e6, 0xbd47, 0x2a14, 0x0000, - 0x0000, 0x0000, 0x5c10, 0xe544, 0x29f4, 0x0000, 0x0000, 0x0000, 0x9f50, 0x90b6, - 0x29d4, 0x0000, 0x0000, 0x0000, 0x9850, 0xab55, 0x29b6, 0x0000, 0x0000, 0x0000, - 0x2750, 0x9d07, 0x2998, 0x0000, 0x0000, 0x0000, 0x6700, 0x8bbb, 0x2973, 0x0000, - 0x0000, 0x0000, 0x5dba, 0xed31, 0x295a, 0x0000, 0x0000, 0x0000, 0x61dc, 0x85fe, - 0x293a, 0x0000, 0x0000, 0x0000, 0x9ba2, 0xd6b4, 0x291c, 0x0000, 0x0000, 0x0000, - 0x2d30, 0xe3a5, 0x28fb, 0x0000, 0x0000, 0x0000, 0x6630, 0xb566, 0x28dd, 0x0000, - 0x0000, 0x0000, 0x5ad4, 0xa829, 0x28bf, 0x0000, 0x0000, 0x0000, 0x89d8, 0xe290, - 0x28a0, 0x0000, 0x0000, 0x0000, 0x3916, 0xc428, 0x2881, 0x0000, 0x0000, 0x0000, - 0x0490, 0xbea4, 0x2860, 0x0000, 0x0000, 0x0000, 0xee06, 0x80ee, 0x2843, 0x0000, - 0x0000, 0x0000, 0xfc00, 0xf327, 0x2820, 0x0000, 0x0000, 0x0000, 0xea40, 0xa871, - 0x2800, 0x0000, 0x0000, 0x0000, 0x63d8, 0x9c26, 0x27e4, 0x0000, 0x0000, 0x0000, - 0x07ba, 0xc0c9, 0x27c7, 0x0000, 0x0000, 0x0000, 0x3fa2, 0x9797, 0x27a8, 0x0000, - 0x0000, 0x0000, 0x21c6, 0xfeca, 0x2789, 0x0000, 0x0000, 0x0000, 0xde40, 0x860d, - 0x2768, 0x0000, 0x0000, 0x0000, 0x9cc8, 0x98ce, 0x2749, 0x0000, 0x0000, 0x0000, - 0x3778, 0xa31c, 0x272a, 0x0000, 0x0000, 0x0000, 0xe778, 0xf6e2, 0x270b, 0x0000, - 0x0000, 0x0000, 0x59b8, 0xf841, 0x26ed, 0x0000, 0x0000, 0x0000, 0x02e0, 0xad04, - 0x26cd, 0x0000, 0x0000, 0x0000, 0x5a92, 0x9380, 0x26b0, 0x0000, 0x0000, 0x0000, - 0xc740, 0x8886, 0x268d, 0x0000, 0x0000, 0x0000, 0x0680, 0xfaf8, 0x266c, 0x0000, - 0x0000, 0x0000, 0xfb60, 0x897f, 0x2653, 0x0000, 0x0000, 0x0000, 0x8760, 0xf903, - 0x2634, 0x0000, 0x0000, 0x0000, 0xad2a, 0xc2c8, 0x2615, 0x0000, 0x0000, 0x0000, - 0x2d86, 0x8aef, 0x25f6, 0x0000, 0x0000, 0x0000, 0x1ef4, 0xe627, 0x25d6, 0x0000, - 0x0000, 0x0000, 0x09e4, 0x8020, 0x25b7, 0x0000, 0x0000, 0x0000, 0x7548, 0xd227, - 0x2598, 0x0000, 0x0000, 0x0000, 0x75dc, 0xfb5b, 0x2579, 0x0000, 0x0000, 0x0000, - 0xea84, 0xc8b6, 0x255a, 0x0000, 0x0000, 0x0000, 0xe4d0, 0x8145, 0x253b, 0x0000, - 0x0000, 0x0000, 0x3640, 0x9768, 0x251c, 0x0000, 0x0000, 0x0000, 0x246a, 0xccec, - 0x24fe, 0x0000, 0x0000, 0x0000, 0x51d0, 0xa075, 0x24dd, 0x0000, 0x0000, 0x0000, - 0x4638, 0xa385, 0x24bf, 0x0000, 0x0000, 0x0000, 0xd788, 0xd776, 0x24a1, 0x0000, - 0x0000, 0x0000, 0x1370, 0x8997, 0x2482, 0x0000, 0x0000, 0x0000, 0x1e88, 0x9b67, - 0x2462, 0x0000, 0x0000, 0x0000, 0x6c08, 0xd975, 0x2444, 0x0000, 0x0000, 0x0000, - 0xfdb0, 0xcfc0, 0x2422, 0x0000, 0x0000, 0x0000, 0x3100, 0xc026, 0x2406, 0x0000, - 0x0000, 0x0000, 0xc5b4, 0xae64, 0x23e6, 0x0000, 0x0000, 0x0000, 0x2280, 0xf687, - 0x23c3, 0x0000, 0x0000, 0x0000, 0x2de0, 0x9006, 0x23a9, 0x0000, 0x0000, 0x0000, - 0x24bc, 0xf631, 0x238a, 0x0000, 0x0000, 0x0000, 0xb8d4, 0xa975, 0x236b, 0x0000, - 0x0000, 0x0000, 0xd9a4, 0xb949, 0x234b, 0x0000, 0x0000, 0x0000, 0xb54e, 0xbd39, - 0x232d, 0x0000, 0x0000, 0x0000, 0x4aac, 0x9a52, 0x230e, 0x0000, 0x0000, 0x0000, - 0xbbbc, 0xd085, 0x22ef, 0x0000, 0x0000, 0x0000, 0xdf18, 0xc633, 0x22cf, 0x0000, - 0x0000, 0x0000, 0x16d0, 0xeca5, 0x22af, 0x0000, 0x0000, 0x0000, 0xf2a0, 0xdf6f, - 0x228e, 0x0000, 0x0000, 0x0000, 0x8c44, 0xe86b, 0x2272, 0x0000, 0x0000, 0x0000, - 0x35c0, 0xbbf4, 0x2253, 0x0000, 0x0000, 0x0000, 0x0c40, 0xdafb, 0x2230, 0x0000, - 0x0000, 0x0000, 0x92dc, 0x9935, 0x2216, 0x0000, 0x0000, 0x0000, 0x0ca0, 0xbda6, - 0x21f3, 0x0000, 0x0000, 0x0000, 0x5958, 0xa6fd, 0x21d6, 0x0000, 0x0000, 0x0000, - 0xa3dc, 0x9d7f, 0x21b9, 0x0000, 0x0000, 0x0000, 0x79dc, 0xfcb5, 0x2199, 0x0000, - 0x0000, 0x0000, 0xf264, 0xcebb, 0x217b, 0x0000, 0x0000, 0x0000, 0x0abe, 0x8308, - 0x215c, 0x0000, 0x0000, 0x0000, 0x30ae, 0xb463, 0x213d, 0x0000, 0x0000, 0x0000, - 0x6228, 0xb040, 0x211c, 0x0000, 0x0000, 0x0000, 0xc9b2, 0xf43b, 0x20ff, 0x0000, - 0x0000, 0x0000, 0x3d8e, 0xa4b3, 0x20e0, 0x0000, 0x0000, 0x0000, 0x84e6, 0x8dab, - 0x20c1, 0x0000, 0x0000, 0x0000, 0xa124, 0x9b74, 0x20a1, 0x0000, 0x0000, 0x0000, - 0xc276, 0xd497, 0x2083, 0x0000, 0x0000, 0x0000, 0x6354, 0xa466, 0x2063, 0x0000, - 0x0000, 0x0000, 0x8654, 0xaf0a, 0x2044, 0x0000, 0x0000, 0x0000, 0x1d20, 0xfa5c, - 0x2024, 0x0000, 0x0000, 0x0000, 0xbcd0, 0xf3f0, 0x2004, 0x0000, 0x0000, 0x0000, - 0xedf0, 0xf0b6, 0x1fe7, 0x0000, 0x0000, 0x0000, 0x45bc, 0x9182, 0x1fc9, 0x0000, - 0x0000, 0x0000, 0xe254, 0xdc85, 0x1faa, 0x0000, 0x0000, 0x0000, 0xb898, 0xe9b1, - 0x1f8a, 0x0000, 0x0000, 0x0000, 0x0ebe, 0xe6f0, 0x1f6c, 0x0000, 0x0000, 0x0000, - 0xa9b8, 0xf584, 0x1f4c, 0x0000, 0x0000, 0x0000, 0x12e8, 0xdf6b, 0x1f2e, 0x0000, - 0x0000, 0x0000, 0x9f9e, 0xcd55, 0x1f0f, 0x0000, 0x0000, 0x0000, 0x05a0, 0xec3a, - 0x1eef, 0x0000, 0x0000, 0x0000, 0xd8e0, 0x96f8, 0x1ed1, 0x0000, 0x0000, 0x0000, - 0x3bd4, 0xccc6, 0x1eb1, 0x0000, 0x0000, 0x0000, 0x4910, 0xb87b, 0x1e93, 0x0000, - 0x0000, 0x0000, 0xbefc, 0xd40b, 0x1e73, 0x0000, 0x0000, 0x0000, 0x317e, 0xa406, - 0x1e55, 0x0000, 0x0000, 0x0000, 0x6bb2, 0xc2b2, 0x1e36, 0x0000, 0x0000, 0x0000, - 0xb87e, 0xbb78, 0x1e17, 0x0000, 0x0000, 0x0000, 0xa03c, 0xdbbd, 0x1df7, 0x0000, - 0x0000, 0x0000, 0x5b6c, 0xe3c8, 0x1dd9, 0x0000, 0x0000, 0x0000, 0x8968, 0xca8e, - 0x1dba, 0x0000, 0x0000, 0x0000, 0xc024, 0xe6ab, 0x1d9a, 0x0000, 0x0000, 0x0000, - 0x4110, 0xd4eb, 0x1d7a, 0x0000, 0x0000, 0x0000, 0xa168, 0xbdb5, 0x1d5d, 0x0000, - 0x0000, 0x0000, 0x012e, 0xa5fa, 0x1d3e, 0x0000, 0x0000, 0x0000, 0x6838, 0x9c1f, - 0x1d1e, 0x0000, 0x0000, 0x0000, 0xa158, 0xaa76, 0x1d00, 0x0000, 0x0000, 0x0000, - 0x090a, 0xbd95, 0x1ce1, 0x0000, 0x0000, 0x0000, 0xf73e, 0x8b6d, 0x1cc2, 0x0000, - 0x0000, 0x0000, 0x5fda, 0xbcbf, 0x1ca3, 0x0000, 0x0000, 0x0000, 0xdbe8, 0xb89f, - 0x1c84, 0x0000, 0x0000, 0x0000, 0x6e4c, 0x96c7, 0x1c64, 0x0000, 0x0000, 0x0000, - 0x19c2, 0xf2a4, 0x1c46, 0x0000, 0x0000, 0x0000, 0xb800, 0xf855, 0x1c1e, 0x0000, - 0x0000, 0x0000, 0x87fc, 0x85ff, 0x1c08, 0x0000, 0x0000, 0x0000, 0x1418, 0x839f, - 0x1be9, 0x0000, 0x0000, 0x0000, 0x6186, 0xd9d8, 0x1bca, 0x0000, 0x0000, 0x0000, - 0xf500, 0xabaa, 0x1ba6, 0x0000, 0x0000, 0x0000, 0x7b36, 0xdafe, 0x1b8c, 0x0000, - 0x0000, 0x0000, 0xf394, 0xe6d8, 0x1b6c, 0x0000, 0x0000, 0x0000, 0x6efc, 0x9e55, - 0x1b4e, 0x0000, 0x0000, 0x0000, 0x5e10, 0xc523, 0x1b2e, 0x0000, 0x0000, 0x0000, - 0x8210, 0xb6f9, 0x1b0d, 0x0000, 0x0000, 0x0000, 0x9ab0, 0x96e3, 0x1af1, 0x0000, - 0x0000, 0x0000, 0x3864, 0x92e7, 0x1ad1, 0x0000, 0x0000, 0x0000, 0x9878, 0xdc65, - 0x1ab1, 0x0000, 0x0000, 0x0000, 0xfa20, 0xd6cb, 0x1a94, 0x0000, 0x0000, 0x0000, - 0x6c00, 0xa4e4, 0x1a70, 0x0000, 0x0000, 0x0000, 0xab40, 0xb41b, 0x1a53, 0x0000, - 0x0000, 0x0000, 0x43a4, 0x8ede, 0x1a37, 0x0000, 0x0000, 0x0000, 0x22e0, 0x9314, - 0x1a15, 0x0000, 0x0000, 0x0000, 0x6170, 0xb949, 0x19f8, 0x0000, 0x0000, 0x0000, - 0x6b00, 0xe056, 0x19d8, 0x0000, 0x0000, 0x0000, 0x9ba8, 0xa94c, 0x19b9, 0x0000, - 0x0000, 0x0000, 0xfaa0, 0xaa16, 0x199b, 0x0000, 0x0000, 0x0000, 0x899a, 0xf627, - 0x197d, 0x0000, 0x0000, 0x0000, 0x9f20, 0xfb70, 0x195d, 0x0000, 0x0000, 0x0000, - 0xa4b8, 0xc176, 0x193e, 0x0000, 0x0000, 0x0000, 0xb21c, 0x85c3, 0x1920, 0x0000, - 0x0000, 0x0000, 0x50d2, 0x9b19, 0x1901, 0x0000, 0x0000, 0x0000, 0xd4b0, 0xb708, - 0x18e0, 0x0000, 0x0000, 0x0000, 0xfb88, 0xf510, 0x18c1, 0x0000, 0x0000, 0x0000, - 0x31ec, 0xdc8d, 0x18a3, 0x0000, 0x0000, 0x0000, 0x3c00, 0xbff9, 0x1885, 0x0000, - 0x0000, 0x0000, 0x5020, 0xc30b, 0x1862, 0x0000, 0x0000, 0x0000, 0xd4f0, 0xda0c, - 0x1844, 0x0000, 0x0000, 0x0000, 0x20d2, 0x99a5, 0x1828, 0x0000, 0x0000, 0x0000, - 0x852e, 0xd159, 0x1809, 0x0000, 0x0000, 0x0000, 0x7cd8, 0x97a1, 0x17e9, 0x0000, - 0x0000, 0x0000, 0x423a, 0x997b, 0x17cb, 0x0000, 0x0000, 0x0000, 0xc1c0, 0xbe7d, - 0x17a8, 0x0000, 0x0000, 0x0000, 0xe8bc, 0xdcdd, 0x178d, 0x0000, 0x0000, 0x0000, - 0x8b28, 0xae06, 0x176e, 0x0000, 0x0000, 0x0000, 0x102e, 0xb8d4, 0x174f, 0x0000, - 0x0000, 0x0000, 0xaa00, 0xaa5c, 0x172f, 0x0000, 0x0000, 0x0000, 0x51f0, 0x9fc0, - 0x170e, 0x0000, 0x0000, 0x0000, 0xf858, 0xe181, 0x16f2, 0x0000, 0x0000, 0x0000, - 0x91a8, 0x8162, 0x16d3, 0x0000, 0x0000, 0x0000, 0x5f40, 0xcb6f, 0x16b1, 0x0000, - 0x0000, 0x0000, 0xbb50, 0xe55f, 0x1693, 0x0000, 0x0000, 0x0000, 0xacd2, 0xd895, - 0x1676, 0x0000, 0x0000, 0x0000, 0xef30, 0x97bf, 0x1654, 0x0000, 0x0000, 0x0000, - 0xf700, 0xb3d7, 0x1633, 0x0000, 0x0000, 0x0000, 0x3454, 0xa7b5, 0x1619, 0x0000, - 0x0000, 0x0000, 0x6b00, 0xa929, 0x15f6, 0x0000, 0x0000, 0x0000, 0x9f04, 0x89f7, - 0x15db, 0x0000, 0x0000, 0x0000, 0xad78, 0xd985, 0x15bc, 0x0000, 0x0000, 0x0000, - 0xa46a, 0xae3f, 0x159d, 0x0000, 0x0000, 0x0000, 0x63a0, 0xd0da, 0x157c, 0x0000, - 0x0000, 0x0000, 0x5e90, 0x817d, 0x155e, 0x0000, 0x0000, 0x0000, 0x1494, 0xb13f, - 0x1540, 0x0000, 0x0000, 0x0000, 0x0090, 0x9c40, 0x1521, 0x0000, 0x0000, 0x0000, - 0xdd70, 0xcc86, 0x1500, 0x0000, 0x0000, 0x0000, 0x64f8, 0xdb6f, 0x14e1, 0x0000, - 0x0000, 0x0000, 0xe22c, 0xac17, 0x14c3, 0x0000, 0x0000, 0x0000, 0x60e0, 0xa9ad, - 0x14a3, 0x0000, 0x0000, 0x0000, 0x4640, 0xd658, 0x1481, 0x0000, 0x0000, 0x0000, - 0x6490, 0xa181, 0x1467, 0x0000, 0x0000, 0x0000, 0x1df4, 0xaaa2, 0x1447, 0x0000, - 0x0000, 0x0000, 0xb94a, 0x8f61, 0x1429, 0x0000, 0x0000, 0x0000, 0x5198, 0x9d83, - 0x1409, 0x0000, 0x0000, 0x0000, 0x0f7a, 0xa818, 0x13eb, 0x0000, 0x0000, 0x0000, - 0xc45e, 0xc06c, 0x13cc, 0x0000, 0x0000, 0x0000, 0x4ec0, 0xfa29, 0x13a8, 0x0000, - 0x0000, 0x0000, 0x6418, 0x8cad, 0x138c, 0x0000, 0x0000, 0x0000, 0xbcc8, 0xe7d1, - 0x136f, 0x0000, 0x0000, 0x0000, 0xc934, 0xf9b0, 0x134f, 0x0000, 0x0000, 0x0000, - 0x6ce0, 0x98df, 0x1331, 0x0000, 0x0000, 0x0000, 0x3516, 0xe5e9, 0x1312, 0x0000, - 0x0000, 0x0000, 0xc6c0, 0xef8b, 0x12ef, 0x0000, 0x0000, 0x0000, 0xaf02, 0x913d, - 0x12d4, 0x0000, 0x0000, 0x0000, 0xd230, 0xe1d5, 0x12b5, 0x0000, 0x0000, 0x0000, - 0xfba8, 0xc232, 0x1295, 0x0000, 0x0000, 0x0000, 0x7ba4, 0xabeb, 0x1277, 0x0000, - 0x0000, 0x0000, 0x6e5c, 0xc692, 0x1258, 0x0000, 0x0000, 0x0000, 0x76a2, 0x9756, - 0x1239, 0x0000, 0x0000, 0x0000, 0xe180, 0xe423, 0x1214, 0x0000, 0x0000, 0x0000, - 0x8c3c, 0x90f8, 0x11fb, 0x0000, 0x0000, 0x0000, 0x9f3c, 0x9fd2, 0x11dc, 0x0000, - 0x0000, 0x0000, 0x53e0, 0xb73e, 0x11bd, 0x0000, 0x0000, 0x0000, 0x45be, 0x88d6, - 0x119e, 0x0000, 0x0000, 0x0000, 0x111a, 0x8bc0, 0x117f, 0x0000, 0x0000, 0x0000, - 0xe26a, 0xd7ff, 0x1160, 0x0000, 0x0000, 0x0000, 0xfb60, 0xdd8d, 0x113f, 0x0000, - 0x0000, 0x0000, 0x9370, 0xc108, 0x1120, 0x0000, 0x0000, 0x0000, 0x9654, 0x8baf, - 0x1103, 0x0000, 0x0000, 0x0000, 0xd6ec, 0xd6b9, 0x10e4, 0x0000, 0x0000, 0x0000, - 0x23e4, 0xd7b7, 0x10c4, 0x0000, 0x0000, 0x0000, 0x1aa6, 0xa847, 0x10a6, 0x0000, - 0x0000, 0x0000, 0xbee6, 0x9fef, 0x1087, 0x0000, 0x0000, 0x0000, 0x26d0, 0xa6eb, - 0x1066, 0x0000, 0x0000, 0x0000, 0x5b86, 0xa880, 0x1049, 0x0000, 0x0000, 0x0000, - 0x125c, 0xd971, 0x1029, 0x0000, 0x0000, 0x0000, 0x1f78, 0x9d18, 0x100a, 0x0000, - 0x0000, 0x0000, 0x0e84, 0xb15b, 0x0feb, 0x0000, 0x0000, 0x0000, 0xd0c0, 0xc150, - 0x0fcc, 0x0000, 0x0000, 0x0000, 0xa330, 0xc40c, 0x0fad, 0x0000, 0x0000, 0x0000, - 0x5202, 0xfc2c, 0x0f8f, 0x0000, 0x0000, 0x0000, 0x3f7c, 0xecf5, 0x0f6f, 0x0000, - 0x0000, 0x0000, 0xef44, 0xfdfd, 0x0f50, 0x0000, 0x0000, 0x0000, 0x3f6c, 0xab1b, - 0x0f31, 0x0000, 0x0000, 0x0000, 0xf658, 0x89ec, 0x0f11, 0x0000, 0x0000, 0x0000, - 0xbfc8, 0x9ba8, 0x0ef4, 0x0000, 0x0000, 0x0000, 0x3d40, 0xbe21, 0x0ed5, 0x0000, - 0x0000, 0x0000, 0xbbc4, 0xc70d, 0x0eb6, 0x0000, 0x0000, 0x0000, 0x5158, 0xdb16, - 0x0e96, 0x0000, 0x0000, 0x0000, 0xb5a8, 0xa8d8, 0x0e78, 0x0000, 0x0000, 0x0000, - 0xcccc, 0xb40e, 0x0e58, 0x0000, 0x0000, 0x0000, 0x448c, 0xcb62, 0x0e3a, 0x0000, - 0x0000, 0x0000, 0xf12a, 0x8aed, 0x0e1b, 0x0000, 0x0000, 0x0000, 0x79d0, 0xc59c, - 0x0dfb, 0x0000, 0x0000, 0x0000, 0x06b4, 0xcdc9, 0x0ddd, 0x0000, 0x0000, 0x0000, - 0xae70, 0xa979, 0x0dbe, 0x0000, 0x0000, 0x0000, 0x317c, 0xa8fb, 0x0d9e, 0x0000, - 0x0000, 0x0000, 0x5fe0, 0x8a50, 0x0d7d, 0x0000, 0x0000, 0x0000, 0x70b6, 0xfdfa, - 0x0d61, 0x0000, 0x0000, 0x0000, 0x1640, 0x9dc7, 0x0d41, 0x0000, 0x0000, 0x0000, - 0x9a9c, 0xdc50, 0x0d23, 0x0000, 0x0000, 0x0000, 0x4fcc, 0x9a9b, 0x0d04, 0x0000, - 0x0000, 0x0000, 0x7e48, 0x8f77, 0x0ce5, 0x0000, 0x0000, 0x0000, 0x84e4, 0xd4b9, - 0x0cc6, 0x0000, 0x0000, 0x0000, 0x84e0, 0xbd10, 0x0ca6, 0x0000, 0x0000, 0x0000, - 0x1b0a, 0xc8d9, 0x0c88, 0x0000, 0x0000, 0x0000, 0x6a48, 0xfc81, 0x0c68, 0x0000, - 0x0000, 0x0000, 0x070a, 0xbef6, 0x0c4a, 0x0000, 0x0000, 0x0000, 0x8a70, 0xf096, - 0x0c2b, 0x0000, 0x0000, 0x0000, 0xecc2, 0xc994, 0x0c0c, 0x0000, 0x0000, 0x0000, - 0x1540, 0x9537, 0x0bea, 0x0000, 0x0000, 0x0000, 0x1b02, 0xab5b, 0x0bce, 0x0000, - 0x0000, 0x0000, 0x5dc0, 0xb0c8, 0x0bad, 0x0000, 0x0000, 0x0000, 0xc928, 0xe034, - 0x0b8f, 0x0000, 0x0000, 0x0000, 0x2d12, 0xb4b0, 0x0b71, 0x0000, 0x0000, 0x0000, - 0x8fc2, 0xbb94, 0x0b52, 0x0000, 0x0000, 0x0000, 0xe236, 0xe22f, 0x0b33, 0x0000, - 0x0000, 0x0000, 0xb97c, 0xbe9e, 0x0b13, 0x0000, 0x0000, 0x0000, 0xe1a6, 0xe16d, - 0x0af5, 0x0000, 0x0000, 0x0000, 0xd330, 0xbaf0, 0x0ad6, 0x0000, 0x0000, 0x0000, - 0xc0bc, 0xbbd0, 0x0ab7, 0x0000, 0x0000, 0x0000, 0x8e66, 0xdd9b, 0x0a98, 0x0000, - 0x0000, 0x0000, 0xc95c, 0xf799, 0x0a79, 0x0000, 0x0000, 0x0000, 0xdac0, 0xbe4c, - 0x0a55, 0x0000, 0x0000, 0x0000, 0xafc0, 0xc378, 0x0a37, 0x0000, 0x0000, 0x0000, - 0xa880, 0xe341, 0x0a19, 0x0000, 0x0000, 0x0000, 0xc242, 0x81f6, 0x09fd, 0x0000, - 0x0000, 0x0000, 0x7470, 0xc777, 0x09de, 0x0000, 0x0000, 0x0000, 0x62bc, 0xb684, - 0x09be, 0x0000, 0x0000, 0x0000, 0x43ac, 0x8c58, 0x099f, 0x0000, 0x0000, 0x0000, - 0xcc3c, 0xf9ac, 0x0981, 0x0000, 0x0000, 0x0000, 0x1526, 0xb670, 0x0962, 0x0000, - 0x0000, 0x0000, 0xc9fe, 0xdf50, 0x0943, 0x0000, 0x0000, 0x0000, 0x6ae6, 0xc065, - 0x0924, 0x0000, 0x0000, 0x0000, 0xb114, 0xcf29, 0x0905, 0x0000, 0x0000, 0x0000, - 0xd388, 0x922a, 0x08e4, 0x0000, 0x0000, 0x0000, 0xcf54, 0xb926, 0x08c7, 0x0000, - 0x0000, 0x0000, 0x3826, 0xe855, 0x08a8, 0x0000, 0x0000, 0x0000, 0xe7c8, 0x829b, - 0x0888, 0x0000, 0x0000, 0x0000, 0x546c, 0xa903, 0x086a, 0x0000, 0x0000, 0x0000, - 0x8768, 0x99cc, 0x0849, 0x0000, 0x0000, 0x0000, 0x00ac, 0xf529, 0x082b, 0x0000, - 0x0000, 0x0000, 0x2658, 0x9f0b, 0x080c, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x9e21, - 0x07ee, 0x0000, 0x0000, 0x0000, 0x6da2, 0x9910, 0x07cf, 0x0000, 0x0000, 0x0000, - 0x9220, 0xf9b3, 0x07b0, 0x0000, 0x0000, 0x0000, 0x3d90, 0xa541, 0x0791, 0x0000, - 0x0000, 0x0000, 0x6e4c, 0xe7cc, 0x0771, 0x0000, 0x0000, 0x0000, 0xa8fa, 0xe80a, - 0x0753, 0x0000, 0x0000, 0x0000, 0x4e14, 0xc3a7, 0x0734, 0x0000, 0x0000, 0x0000, - 0xf7e0, 0xbad9, 0x0712, 0x0000, 0x0000, 0x0000, 0xfea0, 0xeff2, 0x06f5, 0x0000, - 0x0000, 0x0000, 0xcef6, 0xbd48, 0x06d7, 0x0000, 0x0000, 0x0000, 0x7544, 0xf559, - 0x06b7, 0x0000, 0x0000, 0x0000, 0x2388, 0xf655, 0x0698, 0x0000, 0x0000, 0x0000, - 0xe900, 0xad56, 0x0676, 0x0000, 0x0000, 0x0000, 0x2cc0, 0x8437, 0x0659, 0x0000, - 0x0000, 0x0000, 0x3068, 0xc544, 0x063b, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe73c, - 0x061b, 0x0000, 0x0000, 0x0000, 0xee50, 0x9d49, 0x05fc, 0x0000, 0x0000, 0x0000, - 0x93d2, 0x81f6, 0x05df, 0x0000, 0x0000, 0x0000, 0x941c, 0xadff, 0x05bf, 0x0000, - 0x0000, 0x0000, 0x2ce2, 0x8e45, 0x05a1, 0x0000, 0x0000, 0x0000, 0x4a60, 0x95fd, - 0x0581, 0x0000, 0x0000, 0x0000, 0x79f8, 0xb83a, 0x0563, 0x0000, 0x0000, 0x0000, - 0xcb58, 0xa1f5, 0x0543, 0x0000, 0x0000, 0x0000, 0x2a3a, 0xdc36, 0x0525, 0x0000, - 0x0000, 0x0000, 0x14ee, 0x890e, 0x0506, 0x0000, 0x0000, 0x0000, 0x8f20, 0xc432, - 0x04e3, 0x0000, 0x0000, 0x0000, 0x8440, 0xb21d, 0x04c6, 0x0000, 0x0000, 0x0000, - 0x5430, 0xf698, 0x04a7, 0x0000, 0x0000, 0x0000, 0x04ae, 0x8b20, 0x048a, 0x0000, - 0x0000, 0x0000, 0x04d0, 0xe872, 0x046b, 0x0000, 0x0000, 0x0000, 0xc78e, 0x8893, - 0x044c, 0x0000, 0x0000, 0x0000, 0x0f78, 0x9895, 0x042b, 0x0000, 0x0000, 0x0000, - 0x11d4, 0xdf2e, 0x040d, 0x0000, 0x0000, 0x0000, 0xe84c, 0x89d5, 0x03ef, 0x0000, - 0x0000, 0x0000, 0xf7be, 0x8a67, 0x03d0, 0x0000, 0x0000, 0x0000, 0x95d0, 0xc906, - 0x03b1, 0x0000, 0x0000, 0x0000, 0x64ce, 0xd96c, 0x0392, 0x0000, 0x0000, 0x0000, - 0x97ba, 0xa16f, 0x0373, 0x0000, 0x0000, 0x0000, 0x463c, 0xc51a, 0x0354, 0x0000, - 0x0000, 0x0000, 0xef0a, 0xe93e, 0x0335, 0x0000, 0x0000, 0x0000, 0x526a, 0xa466, - 0x0316, 0x0000, 0x0000, 0x0000, 0x4140, 0xa94d, 0x02f5, 0x0000, 0x0000, 0x0000, - 0xb4ec, 0xce68, 0x02d8, 0x0000, 0x0000, 0x0000, 0x4fa2, 0x8490, 0x02b9, 0x0000, - 0x0000, 0x0000, 0x4e60, 0xca98, 0x0298, 0x0000, 0x0000, 0x0000, 0x08dc, 0xe09c, - 0x027a, 0x0000, 0x0000, 0x0000, 0x2b90, 0xc7e3, 0x025c, 0x0000, 0x0000, 0x0000, - 0x5a7c, 0xf8ef, 0x023c, 0x0000, 0x0000, 0x0000, 0x5022, 0x9d58, 0x021e, 0x0000, - 0x0000, 0x0000, 0x553a, 0xe242, 0x01ff, 0x0000, 0x0000, 0x0000, 0x7e6e, 0xb54d, - 0x01e0, 0x0000, 0x0000, 0x0000, 0xd2d4, 0xa88c, 0x01c1, 0x0000, 0x0000, 0x0000, - 0x75b6, 0xfe6d, 0x01a2, 0x0000, 0x0000, 0x0000, 0x3bb2, 0xf04c, 0x0183, 0x0000, - 0x0000, 0x0000, 0xc2d0, 0xc046, 0x0163, 0x0000, 0x0000, 0x0000, 0x250c, 0xf9d6, - 0x0145, 0x0000, 0x0000, 0x0000, 0xb7b4, 0x8a0d, 0x0126, 0x0000, 0x0000, 0x0000, - 0x1a72, 0xe4f5, 0x0107, 0x0000, 0x0000, 0x0000, 0x825c, 0xa9b8, 0x00e8, 0x0000, - 0x0000, 0x0000, 0x6c90, 0xc9ad, 0x00c6, 0x0000, 0x0000, 0x0000, 0x4d00, 0xd1bb, - 0x00aa, 0x0000, 0x0000, 0x0000, 0xa4a0, 0xee01, 0x0087, 0x0000, 0x0000, 0x0000, - 0x89a8, 0xbe9f, 0x006b, 0x0000, 0x0000, 0x0000, 0x038e, 0xc80c, 0x004d, 0x0000, - 0x0000, 0x0000, 0xfe26, 0x8384, 0x002e, 0x0000, 0x0000, 0x0000, 0xcd90, 0xca57, - 0x000e, 0x0000 -}; - -void MacroAssembler::libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { - Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; - Label B1_13, B1_14, B1_15; - - assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); - - address zero_none = (address)_zero_none; - address _4onpi_d = (address)__4onpi_d; - address TWO_32H = (address)_TWO_32H; - address pi04_3d = (address)_pi04_3d; - address pi04_5d = (address)_pi04_5d; - address SCALE = (address)_SCALE; - address zeros = (address)_zeros; - address pi04_2d = (address)_pi04_2d; - address TWO_12H = (address)_TWO_12H; - address _4onpi_31l = (address)__4onpi_31l; - - bind(B1_1); - push(ebp); - movl(ebp, esp); - andl(esp, -16); - push(esi); - push(edi); - push(ebx); - subl(esp, 20); - movzwl(ebx, Address(ebp, 16)); - andl(ebx, 32767); - movl(eax, Address(ebp, 20)); - cmpl(ebx, 16413); - movl(esi, Address(ebp, 24)); - movl(Address(esp, 4), eax); - jcc(Assembler::greaterEqual, B1_8); - - bind(B1_2); - fld_x(Address(ebp, 8)); - fld_d(ExternalAddress(_4onpi_d)); //0x6dc9c883UL, 0x3ff45f30UL - fmul(1); - fstp_x(Address(esp, 8)); - movzwl(ecx, Address(esp, 16)); - negl(ecx); - addl(ecx, 30); - movl(eax, Address(esp, 12)); - shrl(eax); - cmpl(Address(esp, 4), 0); - jcc(Assembler::notEqual, B1_4); - - bind(B1_3); - lea(ecx, Address(eax, 1)); - andl(ecx, -2); - jmp(B1_5); - - bind(B1_4); - movl(ecx, eax); - addl(eax, Address(esp, 4)); - movl(edx, eax); - andl(edx, 1); - addl(ecx, edx); - - bind(B1_5); - fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL - cmpl(ebx, 16400); - movl(Address(esp, 0), ecx); - fild_s(Address(esp, 0)); - jcc(Assembler::greaterEqual, B1_7); - - bind(B1_6); - fld_d(ExternalAddress(pi04_3d)); //0x54442d00UL, 0x3fe921fbUL - fmul(1); - fsubp(3); - fxch(1); - fmul(2); - fld_s(2); - fadd(1); - fsubrp(1); - fld_s(0); - fxch(1); - fsuba(3); - fld_d(ExternalAddress(8 + pi04_3d)); //0x98cc5180UL, 0x3ce84698UL - fmul(3); - fsuba(2); - fxch(1); - fsub(2); - fsubrp(1); - faddp(3); - fld_d(ExternalAddress(16 + pi04_3d)); //0xcbb5bf6cUL, 0xb9dfc8f8UL - fmulp(2); - fld_s(1); - fsubr(1); - fsuba(1); - fxch(2); - fsubp(1); - faddp(2); - fxch(1); - jmp(B1_15); - - bind(B1_7); - fld_d(ExternalAddress(pi04_5d)); //0x54400000UL, 0x3fe921fbUL - fmul(1); - fsubp(3); - fxch(1); - fmul(2); - fld_s(2); - fadd(1); - fsubrp(1); - fld_s(0); - fxch(1); - fsuba(3); - fld_d(ExternalAddress(8 + pi04_5d)); //0x1a600000UL, 0x3dc0b461UL - fmul(3); - fsuba(2); - fxch(1); - fsub(2); - fsubrp(1); - faddp(3); - fld_d(ExternalAddress(16 + pi04_5d)); //0x2e000000UL, 0x3b93198aUL - fmul(2); - fld_s(0); - fsubr(2); - fsuba(2); - fxch(1); - fsubp(2); - fxch(1); - faddp(3); - fld_d(ExternalAddress(24 + pi04_5d)); //0x25200000UL, 0x396b839aUL - fmul(2); - fld_s(0); - fsubr(2); - fsuba(2); - fxch(1); - fsubp(2); - fxch(1); - faddp(3); - fld_d(ExternalAddress(32 + pi04_5d)); //0x533e63a0UL, 0x37027044UL - fmulp(2); - fld_s(1); - fsubr(1); - fsuba(1); - fxch(2); - fsubp(1); - faddp(2); - fxch(1); - jmp(B1_15); - - bind(B1_8); - fld_x(Address(ebp, 8)); - addl(ebx, -16417); - fmul_d(as_Address(ExternalAddress(SCALE))); //0x00000000UL, 0x32600000UL - movl(eax, -2078209981); - imull(ebx); - addl(edx, ebx); - movl(ecx, ebx); - sarl(edx, 4); - sarl(ecx, 31); - subl(edx, ecx); - movl(eax, edx); - shll(eax, 5); - fstp_x(Address(ebp, 8)); - fld_x(Address(ebp, 8)); - subl(eax, edx); - movl(Address(ebp, 8), 0); - subl(ebx, eax); - fld_x(Address(ebp, 8)); - cmpl(ebx, 17); - fsuba(1); - jcc(Assembler::less, B1_10); - - bind(B1_9); - lea(eax, Address(noreg, edx, Address::times_8)); - lea(ecx, Address(eax, edx, Address::times_4)); - incl(edx); - fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmul(2); - fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmul(2); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fld_s(1); - fadd(1); - fstp_x(Address(esp, 8)); - andl(Address(esp, 8), -16777216); - fld_x(Address(esp, 8)); - fsubp(1); - jmp(B1_11); - - bind(B1_10); - fld_d(ExternalAddress(zeros)); //0x00000000UL, 0x00000000UL - fld_s(0); - - bind(B1_11); - fld_s(0); - lea(eax, Address(noreg, edx, Address::times_8)); - fld_s(3); - lea(edx, Address(eax, edx, Address::times_4)); - fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); - fmul(6); - movl(Address(esp, 0), edx); - fadda(2); - fxch(2); - fsuba(3); - fxch(2); - faddp(3); - fxch(2); - faddp(3); - fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); - fmula(2); - fld_s(2); - fadd(2); - fld_s(0); - fxch(1); - fsubra(3); - fxch(3); - fchs(); - faddp(4); - fxch(3); - faddp(4); - fxch(2); - fadd(3); - fxch(2); - fmul(5); - fadda(2); - fld_s(4); - fld_x(Address(24 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); - fmula(1); - fxch(1); - fadda(4); - fxch(4); - fstp_x(Address(esp, 8)); - movzwl(ebx, Address(esp, 16)); - andl(ebx, 32767); - cmpl(ebx, 16415); - jcc(Assembler::greaterEqual, B1_13); - - bind(B1_12); - negl(ebx); - addl(ebx, 30); - movl(ecx, ebx); - movl(eax, Address(esp, 12)); - shrl(eax); - shll(eax); - movl(Address(esp, 12), eax); - movl(Address(esp, 8), 0); - shrl(eax); - jmp(B1_14); - - bind(B1_13); - negl(ebx); - addl(ebx, 30); - movl(ecx, ebx); - movl(edx, Address(esp, 8)); - shrl(edx); - shll(edx); - negl(ecx); - movl(eax, Address(esp, 12)); - shll(eax); - movl(ecx, ebx); - movl(Address(esp, 8), edx); - shrl(edx); - orl(eax, edx); - - bind(B1_14); - fld_x(Address(esp, 8)); - addl(eax, Address(esp, 4)); - fsubp(3); - fmul(6); - fld_s(4); - movl(edx, eax); - andl(edx, 1); - fadd(3); - movl(ecx, Address(esp, 0)); - fsuba(3); - fxch(3); - faddp(5); - fld_s(1); - fxch(3); - fadd_d(Address(zero_none, RelocationHolder::none).plus_disp(edx, Address::times_8)); - fadda(3); - fsub(3); - faddp(2); - fxch(1); - faddp(4); - fld_s(2); - fadd(2); - fsuba(2); - fxch(3); - faddp(2); - fxch(1); - faddp(3); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(2); - fld_s(2); - fld_x(Address(36 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(1); - fld_s(1); - fadd(3); - fsuba(3); - fxch(2); - faddp(3); - fxch(2); - faddp(3); - fxch(1); - fmul(4); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(2); - fld_s(2); - fld_x(Address(48 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(1); - fld_s(1); - fadd(3); - fsuba(3); - fxch(2); - faddp(3); - fxch(2); - faddp(3); - fld_s(3); - fxch(2); - fmul(5); - fld_x(Address(60 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(3); - fxch(3); - faddp(1); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(3); - fld_s(3); - fxch(2); - fmul(5); - fld_x(Address(72 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(3); - fxch(3); - faddp(1); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(3); - fxch(1); - fmulp(4); - fld_x(Address(84 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmulp(3); - fxch(2); - faddp(3); - fld_s(2); - fadd(2); - fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL - fmul(1); - fadda(1); - fsubp(1); - fsuba(2); - fxch(3); - faddp(2); - faddp(1); - fld_d(ExternalAddress(pi04_2d)); //0x54400000UL, 0x3fe921fbUL - fld_s(0); - fmul(2); - fxch(2); - fadd(3); - fxch(1); - fmulp(3); - fmul_d(as_Address(ExternalAddress(8 + pi04_2d))); //0x1a626331UL, 0x3dc0b461UL - faddp(1); - - bind(B1_15); - fld_d(ExternalAddress(TWO_12H)); //0x00000000UL, 0x40b80000UL - fld_s(2); - fadd(2); - fmula(1); - fstp_x(Address(esp, 8)); - fld_x(Address(esp, 8)); - fadd(1); - fsubrp(1); - fst_d(Address(esi, 0)); - fsubp(2); - faddp(1); - fstp_d(Address(esi, 8)); - addl(esp, 20); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); -} - - -ATTRIBUTE_ALIGNED(16) static const jushort _SP[] = -{ - 0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffc, 0x0000, 0x8887, 0x8888, 0x8888, 0x8888, - 0x3ff8, 0x0000, 0xc527, 0x0d00, 0x00d0, 0xd00d, 0xbff2, 0x0000, 0x45f6, 0xb616, - 0x1d2a, 0xb8ef, 0x3fec, 0x0000, 0x825b, 0x3997, 0x2b3f, 0xd732, 0xbfe5, 0x0000, - 0xbf33, 0x8bb4, 0x2fda, 0xb092, 0x3fde, 0x0000, 0x44a6, 0xed1a, 0x29ef, 0xd73e, - 0xbfd6, 0x0000, 0x8610, 0x307f, 0x62a1, 0xc921, 0x3fce, 0x0000 -}; - -ATTRIBUTE_ALIGNED(16) static const jushort _CP[] = -{ - 0x0000, 0x0000, 0x0000, 0x8000, 0xbffe, 0x0000, 0xaaa5, 0xaaaa, 0xaaaa, 0xaaaa, - 0x3ffa, 0x0000, 0x9c2f, 0x0b60, 0x60b6, 0xb60b, 0xbff5, 0x0000, 0xf024, 0x0cac, - 0x00d0, 0xd00d, 0x3fef, 0x0000, 0x03fe, 0x3f65, 0x7dbb, 0x93f2, 0xbfe9, 0x0000, - 0xd84d, 0xadee, 0xc698, 0x8f76, 0x3fe2, 0x0000, 0xdaba, 0xfe79, 0xea36, 0xc9c9, - 0xbfda, 0x0000, 0x3ac6, 0x0ba0, 0x07ce, 0xd585, 0x3fd2, 0x0000 -}; - -void MacroAssembler::libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { - Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; - Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23; - Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34; - Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_41, B1_42, B1_43, B1_46; - - assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); - - address CP = (address)_CP; - address SP = (address)_SP; - - bind(B1_1); - push(ebp); - movl(ebp, esp); - andl(esp, -64); - push(esi); - push(edi); - push(ebx); - subl(esp, 52); - movl(eax, Address(ebp, 16)); - movl(edx, Address(ebp, 20)); - movl(Address(esp, 32), eax); - movl(Address(esp, 36), edx); - - bind(B1_2); - fnstcw(Address(esp, 30)); - - bind(B1_3); - movsd(xmm1, Address(ebp, 8)); - movl(esi, Address(ebp, 12)); - movl(eax, esi); - andl(eax, 2147483647); - andps(xmm1, ExternalAddress(L_2IL0FLOATPACKET_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL - shrl(esi, 31); - movl(Address(esp, 40), eax); - cmpl(eax, 1104150528); - movsd(Address(ebp, 8), xmm1); - jcc(Assembler::aboveEqual, B1_11); - - bind(B1_4); - movsd(xmm0, ExternalAddress(PI4_INV)); //0x6dc9c883UL, 0x3ff45f30UL - mulsd(xmm0, xmm1); - movzwl(edx, Address(esp, 30)); - movl(eax, edx); - andl(eax, 768); - movsd(Address(esp, 0), xmm0); - cmpl(eax, 768); - jcc(Assembler::equal, B1_42); - - bind(B1_5); - orl(edx, -64768); - movw(Address(esp, 28), edx); - - bind(B1_6); - fldcw(Address(esp, 28)); - - bind(B1_7); - movsd(xmm1, Address(ebp, 8)); - movl(ebx, 1); - - bind(B1_8); - movl(Address(esp, 12), ebx); - movl(ebx, Address(esp, 4)); - movl(eax, ebx); - movl(Address(esp, 8), esi); - movl(esi, ebx); - shrl(esi, 20); - andl(eax, 1048575); - movl(ecx, esi); - orl(eax, 1048576); - negl(ecx); - movl(edx, eax); - addl(ecx, 19); - addl(esi, 13); - movl(Address(esp, 24), ecx); - shrl(edx); - movl(ecx, esi); - shll(eax); - movl(ecx, Address(esp, 24)); - movl(esi, Address(esp, 0)); - shrl(esi); - orl(eax, esi); - cmpl(ebx, 1094713344); - movsd(Address(esp, 16), xmm1); - fld_d(Address(esp, 16)); - cmov32(Assembler::below, eax, edx); - movl(esi, Address(esp, 8)); - lea(edx, Address(eax, 1)); - movl(ebx, edx); - andl(ebx, -2); - movl(Address(esp, 16), ebx); - fild_s(Address(esp, 16)); - movl(ebx, Address(esp, 12)); - cmpl(Address(esp, 40), 1094713344); - jcc(Assembler::aboveEqual, B1_10); - - bind(B1_9); - fld_d(ExternalAddress(PI4X3)); //0x54443000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 8)); //0x3b39a000UL, 0x3d373dcbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 16)); //0xe0e68948UL, 0xba845c06UL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_10); - fld_d(ExternalAddress(PI4X4)); //0x54400000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 8)); //0x1a600000UL, 0xbdc0b461UL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 16)); //0x2e000000UL, 0xbb93198aUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 24)); //0x252049c1UL, 0xb96b839aUL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_11); - movzwl(edx, Address(esp, 30)); - movl(eax, edx); - andl(eax, 768); - cmpl(eax, 768); - jcc(Assembler::equal, B1_43); - bind(B1_12); - orl(edx, -64768); - movw(Address(esp, 28), edx); - - bind(B1_13); - fldcw(Address(esp, 28)); - - bind(B1_14); - movsd(xmm1, Address(ebp, 8)); - movl(ebx, 1); - - bind(B1_15); - movsd(Address(esp, 16), xmm1); - fld_d(Address(esp, 16)); - addl(esp, -32); - lea(eax, Address(esp, 32)); - fstp_x(Address(esp, 0)); - movl(Address(esp, 12), 0); - movl(Address(esp, 16), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l()))); - - bind(B1_46); - addl(esp, 32); - - bind(B1_16); - fld_d(Address(esp, 0)); - lea(edx, Address(eax, 1)); - fld_d(Address(esp, 8)); - faddp(1); - - bind(B1_17); - movl(ecx, edx); - addl(eax, 3); - shrl(ecx, 2); - andl(ecx, 1); - shrl(eax, 2); - xorl(esi, ecx); - movl(ecx, Address(esp, 36)); - andl(eax, 1); - andl(ecx, 3); - cmpl(ecx, 3); - jcc(Assembler::notEqual, B1_25); - - bind(B1_18); - fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 - fld_s(1); - fmul((2)); - testb(edx, 2); - fmula((1)); - fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 - faddp(2); - fmula(1); - fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f - faddp(2); - fmula(1); - fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b - faddp(2); - fmula(1); - fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d - faddp(2); - fmula(1); - fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 - faddp(2); - fmula(1); - fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 - faddp(2); - fmula(1); - fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa - faddp(2); - fmula(1); - fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 - fmul(1); - fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea - faddp(1); - fmul(1); - fld_x(ExternalAddress(62 + CP)); //0xd84d, 0xadee, 0xc6 - faddp(1); - fmul(1); - fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d - faddp(1); - fmul(1); - fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 - faddp(1); - fmul(1); - fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 - faddp(1); - fmul(1); - fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa - faddp(1); - fmul(1); - fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 - faddp(1); - fmulp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(esi, Address::times_8)); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - jcc(Assembler::equal, B1_22); - - bind(B1_19); - fmulp(4); - testl(ebx, ebx); - fxch(2); - fmul(3); - movl(eax, Address(esp, 2)); - faddp(3); - fxch(2); - fstp_d(Address(eax, 0)); - fmula(1); - faddp(1); - fstp_d(Address(eax, 8)); - jcc(Assembler::equal, B1_21); - - bind(B1_20); - fldcw(Address(esp, 30)); - - bind(B1_21); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_22); - fxch(1); - fmulp(4); - testl(ebx, ebx); - fxch(2); - fmul(3); - movl(eax, Address(esp, 32)); - faddp(3); - fxch(2); - fstp_d(Address(eax, 8)); - fmula(1); - faddp(1); - fstp_d(Address(eax, 0)); - jcc(Assembler::equal, B1_24); - - bind(B1_23); - fldcw(Address(esp, 30)); - - bind(B1_24); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_25); - testb(Address(esp, 36), 2); - jcc(Assembler::equal, B1_33); - - bind(B1_26); - fld_s(0); - testb(edx, 2); - fmul(1); - fld_s(0); - fmul(1); - jcc(Assembler::equal, B1_30); - - bind(B1_27); - fstp_d(2); - fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 - testl(ebx, ebx); - fmul(2); - fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea - fmul(3); - fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 - movl(eax, Address(rsp, 32)); - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa - faddp(2); - fxch(1); - fmulp(3); - fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 - faddp(1); - fmulp(1); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(rsi, Address::times_8)); - fmula(1); - faddp(1); - fstp_d(Address(eax, 8)); - jcc(Assembler::equal, B1_29); - - bind(B1_28); - fldcw(Address(esp, 30)); - - bind(B1_29); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_30); - fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 - testl(ebx, ebx); - fmul(1); - fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 - fmul(2); - fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f - movl(eax, Address(rsp, 32)); - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmulp(2); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(rsi, Address::times_8)); - fmulp(2); - fmul(1); - faddp(1); - fstp_d(Address(eax, 8)); - jcc(Assembler::equal, B1_32); - - bind(B1_31); - fldcw(Address(esp, 30)); - - bind(B1_32); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_33); - testb(Address(esp, 36), 1); - jcc(Assembler::equal, B1_41); - - bind(B1_34); - fld_s(0); - testb(edx, 2); - fmul(1); - fld_s(0); - fmul(1); - jcc(Assembler::equal, B1_38); - - bind(B1_35); - fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 - testl(ebx, ebx); - fmul(1); - fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 - fmul(2); - fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmulp(2); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmulp(2); - fmul(1); - movl(eax, Address(esp, 32)); - faddp(1); - fstp_d(Address(eax, 0)); - jcc(Assembler::equal, B1_37); - - bind(B1_36); - fldcw(Address(esp, 30)); - - bind(B1_37); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_38); - fstp_d(2); - fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 - testl(ebx, ebx); - fmul(2); - fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea - fmul(3); - fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa - faddp(2); - fxch(1); - fmulp(3); - fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 - faddp(1); - fmulp(1); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - movl(eax, Address(esp, 32)); - faddp(1); - fstp_d(Address(eax, 0)); - jcc(Assembler::equal, B1_40); - - bind(B1_39); - fldcw(Address(esp, 30)); - bind(B1_40); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - bind(B1_41); - fstp_d(0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - bind(B1_42); - xorl(ebx, ebx); - jmp(B1_8); - bind(B1_43); - xorl(ebx, ebx); - jmp(B1_15); -} - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_sin[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, - 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, - 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, - 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, - 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, - 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, - 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, - 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, - 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, - 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, - 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, - 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, - 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, - 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, - 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, - 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, - 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, - 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, - 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, - 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, - 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, - 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, - 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, - 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, - 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, - 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, - 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, - 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, - 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, - 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, - 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, - 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, - 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, - 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, - 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, - 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, - 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, - 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, - 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, - 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, - 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, - 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, - 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, - 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, - 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, - 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, - 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, - 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, - 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, - 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, - 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, - 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, - 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, - 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, - 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, - 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, - 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, - 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, - 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, - 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, - 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, - 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, - 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, - 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, - 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, - 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, - 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, - 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, - 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, - 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, - 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, - 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, - 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, - 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, - 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, - 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, - 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, - 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, - 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, - 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, - 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, - 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL, - 0x00000000UL, 0xffffffffUL, 0x3fefffffUL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x80000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x3fe00000UL, - 0x00000000UL, 0x3fe00000UL -}; - -void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ebx, Register edx) { - - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2; - - assert_different_registers(eax, ebx, edx); - - address static_const_table_sin = (address)_static_const_table_sin; - - subl(rsp, 120); - movl(Address(rsp, 56), ebx); - lea(ebx, ExternalAddress(static_const_table_sin)); - movsd(xmm0, Address(rsp, 128)); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - subl(eax, 12336); - cmpl(eax, 4293); - jcc(Assembler::above, L_2TAG_PACKET_0_0_2); - movsd(xmm1, Address(ebx, 2160)); - mulsd(xmm1, xmm0); - movsd(xmm5, Address(ebx, 2272)); - movdqu(xmm4, Address(ebx, 2256)); - pand(xmm4, xmm0); - por(xmm5, xmm4); - movsd(xmm3, Address(ebx, 2128)); - movdqu(xmm2, Address(ebx, 2112)); - addpd(xmm1, xmm5); - cvttsd2sil(edx, xmm1); - cvtsi2sdl(xmm1, edx); - mulsd(xmm3, xmm1); - unpcklpd(xmm1, xmm1); - addl(edx, 1865216); - movdqu(xmm4, xmm0); - andl(edx, 63); - movdqu(xmm5, Address(ebx, 2096)); - lea(eax, Address(ebx, 0)); - shll(edx, 5); - addl(eax, edx); - mulpd(xmm2, xmm1); - subsd(xmm0, xmm3); - mulsd(xmm1, Address(ebx, 2144)); - subsd(xmm4, xmm3); - movsd(xmm7, Address(eax, 8)); - unpcklpd(xmm0, xmm0); - movapd(xmm3, xmm4); - subsd(xmm4, xmm2); - mulpd(xmm5, xmm0); - subpd(xmm0, xmm2); - movdqu(xmm6, Address(ebx, 2064)); - mulsd(xmm7, xmm4); - subsd(xmm3, xmm4); - mulpd(xmm5, xmm0); - mulpd(xmm0, xmm0); - subsd(xmm3, xmm2); - movdqu(xmm2, Address(eax, 0)); - subsd(xmm1, xmm3); - movsd(xmm3, Address(eax, 24)); - addsd(xmm2, xmm3); - subsd(xmm7, xmm2); - mulsd(xmm2, xmm4); - mulpd(xmm6, xmm0); - mulsd(xmm3, xmm4); - mulpd(xmm2, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm5, Address(ebx, 2080)); - mulsd(xmm4, Address(eax, 0)); - addpd(xmm6, Address(ebx, 2048)); - mulpd(xmm5, xmm0); - movapd(xmm0, xmm3); - addsd(xmm3, Address(eax, 8)); - mulpd(xmm1, xmm7); - movapd(xmm7, xmm4); - addsd(xmm4, xmm3); - addpd(xmm6, xmm5); - movsd(xmm5, Address(eax, 8)); - subsd(xmm5, xmm3); - subsd(xmm3, xmm4); - addsd(xmm1, Address(eax, 16)); - mulpd(xmm6, xmm2); - addsd(xmm5, xmm0); - addsd(xmm3, xmm7); - addsd(xmm1, xmm5); - addsd(xmm1, xmm3); - addsd(xmm1, xmm6); - unpckhpd(xmm6, xmm6); - addsd(xmm1, xmm6); - addsd(xmm4, xmm1); - movsd(Address(rsp, 0), xmm4); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_0_0_2); - jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); - shrl(eax, 4); - cmpl(eax, 268434685); - jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movsd(xmm3, Address(ebx, 2192)); - mulsd(xmm3, xmm0); - subsd(xmm3, xmm0); - mulsd(xmm3, Address(ebx, 2208)); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movl(eax, Address(rsp, 132)); - andl(eax, 2146435072); - cmpl(eax, 2146435072); - jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); - subl(rsp, 32); - movsd(Address(rsp, 0), xmm0); - lea(eax, Address(rsp, 40)); - movl(Address(rsp, 8), eax); - movl(eax, 2); - movl(Address(rsp, 12), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); - addl(rsp, 32); - fld_d(Address(rsp, 16)); - jmp(L_2TAG_PACKET_1_0_2); - bind(L_2TAG_PACKET_4_0_2); - fld_d(Address(rsp, 128)); - fmul_d(Address(ebx, 2240)); - bind(L_2TAG_PACKET_1_0_2); - movl(ebx, Address(rsp, 56)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp deleted file mode 100644 index 4e8be8a1f1d..00000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp +++ /dev/null @@ -1,1172 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - TAN() -// --------------------- -// -// Polynomials coefficients and other constants. -// -// Note that in this algorithm, there is a different polynomial for -// each breakpoint, so there are 32 sets of polynomial coefficients -// as well as 32 instances of the other constants. -// -// The polynomial coefficients and constants are offset from the start -// of the main block as follows: -// -// 0: c8 | c0 -// 16: c9 | c1 -// 32: c10 | c2 -// 48: c11 | c3 -// 64: c12 | c4 -// 80: c13 | c5 -// 96: c14 | c6 -// 112: c15 | c7 -// 128: T_hi -// 136: T_lo -// 144: Sigma -// 152: T_hl -// 160: Tau -// 168: Mask -// 176: (end of block) -// -// The total table size is therefore 5632 bytes. -// -// Note that c0 and c1 are always zero. We could try storing -// other constants here, and just loading the low part of the -// SIMD register in these cases, after ensuring the high part -// is zero. -// -// The higher terms of the polynomial are computed in the *low* -// part of the SIMD register. This is so we can overlap the -// multiplication by r^8 and the unpacking of the other part. -// -// The constants are: -// T_hi + T_lo = accurate constant term in power series -// Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit) -// Tau = multiplier for the reciprocal, always -1 or 0 -// -// The basic reconstruction formula using these constants is: -// -// High = tau * recip_hi + t_hi -// Med = (sgn * r + t_hl * r)_hi -// Low = (sgn * r + t_hl * r)_lo + -// tau * recip_lo + T_lo + (T_hl + sigma) * c + pol -// -// where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 -// -// (c0 = c1 = 0, but using them keeps SIMD regularity) -// -// We then do a compensated sum High + Med, add the low parts together -// and then do the final sum. -// -// Here recip_hi + recip_lo is an accurate reciprocal of the remainder -// modulo pi/2 -// -// Special cases: -// tan(NaN) = quiet NaN, and raise invalid exception -// tan(INF) = NaN and raise invalid exception -// tan(+/-0) = +/-0 -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const jushort _TP[] = -{ - 0x4cd6, 0xaf6c, 0xc710, 0xc662, 0xbffd, 0x0000, 0x4b06, 0xb0ac, 0xd3b2, 0xcc2c, - 0x3ff9, 0x0000, 0x00e3, 0xc850, 0xaa28, 0x9533, 0xbff3, 0x0000, 0x2ff0, 0x466d, - 0x1a3b, 0xb266, 0x3fe5, 0x0000 -}; - -ATTRIBUTE_ALIGNED(16) static const jushort _TQ[] = -{ - 0x399c, 0x8391, 0x154c, 0x94ca, 0xbfff, 0x0000, 0xb6a3, 0xc36a, 0x44e2, 0x8a2c, - 0x3ffe, 0x0000, 0xb70f, 0xd068, 0xa6ce, 0xe9dd, 0xbff9, 0x0000, 0x820f, 0x51ce, - 0x7d76, 0x9bff, 0x3ff3, 0x0000 -}; - -ATTRIBUTE_ALIGNED(16) static const jushort _GP[] = -{ - 0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffd, 0x0000, 0xb62f, 0x0b60, 0x60b6, 0xb60b, - 0xbff9, 0x0000, 0xdfa7, 0x08aa, 0x55e0, 0x8ab3, 0xbff6, 0x0000, 0x85a0, 0xa819, - 0xbc99, 0xddeb, 0xbff2, 0x0000, 0x7065, 0x6a37, 0x795f, 0xb354, 0xbfef, 0x0000, - 0xa8f9, 0x83f1, 0x2ec8, 0x9140, 0xbfec, 0x0000, 0xf3ca, 0x8c96, 0x8e0b, 0xeb6d, - 0xbfe8, 0x0000, 0x355b, 0xd910, 0x67c9, 0xbed3, 0xbfe5, 0x0000, 0x286b, 0xb49e, - 0xb854, 0x9a98, 0xbfe2, 0x0000, 0x0871, 0x1a2f, 0x6477, 0xfcc4, 0xbfde, 0x0000, - 0xa559, 0x1da9, 0xaed2, 0xba76, 0xbfdb, 0x0000, 0x00a3, 0x7fea, 0x9bc3, 0xf205, - 0xbfd8, 0x0000 -}; - -void MacroAssembler::libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { - Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; - Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23; - Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34; - Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_43; - - assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); - - address TP = (address)_TP; - address TQ = (address)_TQ; - address GP = (address)_GP; - - bind(B1_1); - push(ebp); - movl(ebp, esp); - andl(esp, -64); - push(esi); - push(edi); - push(ebx); - subl(esp, 52); - movl(eax, Address(ebp, 16)); - movl(ebx, Address(ebp, 20)); - movl(Address(esp, 40), eax); - - bind(B1_2); - fnstcw(Address(esp, 38)); - - bind(B1_3); - movl(edx, Address(ebp, 12)); - movl(eax, edx); - andl(eax, 2147483647); - shrl(edx, 31); - movl(Address(esp, 44), edx); - cmpl(eax, 1104150528); - jcc(Assembler::aboveEqual, B1_11); - - bind(B1_4); - movsd(xmm1, Address(ebp, 8)); - movzwl(ecx, Address(esp, 38)); - movl(edx, ecx); - andl(edx, 768); - andps(xmm1, ExternalAddress(L_2IL0FLOATPACKET_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL - cmpl(edx, 768); - movsd(xmm0, ExternalAddress(PI4_INV)); ////0x6dc9c883UL, 0x3ff45f30UL - mulsd(xmm0, xmm1); - movsd(Address(ebp, 8), xmm1); - movsd(Address(esp, 0), xmm0); - jcc(Assembler::equal, B1_39); - - bind(B1_5); - orl(ecx, -64768); - movw(Address(esp, 36), ecx); - - bind(B1_6); - fldcw(Address(esp, 36)); - - bind(B1_7); - movsd(xmm1, Address(ebp, 8)); - movl(edi, 1); - - bind(B1_8); - movl(Address(esp, 12), esi); - movl(esi, Address(esp, 4)); - movl(edx, esi); - movl(Address(esp, 24), edi); - movl(edi, esi); - shrl(edi, 20); - andl(edx, 1048575); - movl(ecx, edi); - orl(edx, 1048576); - negl(ecx); - addl(edi, 13); - movl(Address(esp, 8), ebx); - addl(ecx, 19); - movl(ebx, edx); - movl(Address(esp, 28), ecx); - shrl(ebx); - movl(ecx, edi); - shll(edx); - movl(ecx, Address(esp, 28)); - movl(edi, Address(esp, 0)); - shrl(edi); - orl(edx, edi); - cmpl(esi, 1094713344); - movsd(Address(esp, 16), xmm1); - fld_d(Address(esp, 16)); - cmov32(Assembler::below, edx, ebx); - movl(edi, Address(esp, 24)); - movl(esi, Address(esp, 12)); - lea(ebx, Address(edx, 1)); - andl(ebx, -2); - movl(Address(esp, 16), ebx); - cmpl(eax, 1094713344); - fild_s(Address(esp, 16)); - movl(ebx, Address(esp, 8)); - jcc(Assembler::aboveEqual, B1_10); - - bind(B1_9); - fld_d(ExternalAddress(PI4X3)); //0x54443000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 8)); //0x3b39a000UL, 0x3d373dcbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 16)); //0xe0e68948UL, 0xba845c06UL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_10); - fld_d(ExternalAddress(PI4X4)); //0x54400000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 8)); //0x1a600000UL, 0xbdc0b461UL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 16)); //0x2e000000UL, 0xbb93198aUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 24)); //0x252049c1UL, 0xb96b839aUL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_11); - movzwl(edx, Address(esp, 38)); - movl(eax, edx); - andl(eax, 768); - cmpl(eax, 768); - jcc(Assembler::equal, B1_40); - - bind(B1_12); - orl(edx, -64768); - movw(Address(esp, 36), edx); - - bind(B1_13); - fldcw(Address(esp, 36)); - - bind(B1_14); - movl(edi, 1); - - bind(B1_15); - movsd(xmm0, Address(ebp, 8)); - addl(esp, -32); - andps(xmm0, ExternalAddress(L_2IL0FLOATPACKET_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL - lea(eax, Address(esp, 32)); - movsd(Address(eax, 16), xmm0); - fld_d(Address(eax, 16)); - fstp_x(Address(esp, 0)); - movl(Address(esp, 12), 0); - movl(Address(esp, 16), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l()))); - - bind(B1_43); - movl(edx, eax); - addl(esp, 32); - - bind(B1_16); - fld_d(Address(esp, 0)); - fld_d(Address(esp, 8)); - faddp(1); - - bind(B1_17); - movl(eax, ebx); - andl(eax, 3); - cmpl(eax, 3); - jcc(Assembler::notEqual, B1_24); - - bind(B1_18); - fld_d(ExternalAddress(ONES)); - incl(edx); - fdiv(1); - testb(edx, 2); - fstp_x(Address(esp, 24)); - fld_s(0); - fmul(1); - fld_s(0); - fmul(1); - fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a - fmul(2); - fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa - faddp(1); - fmul(2); - fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 - faddp(1); - fmul(2); - fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d - fmul(3); - fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 - faddp(1); - fmul(3); - fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 - faddp(1); - fmul(3); - fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 - faddp(1); - fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 - faddp(2); - fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b - fmul(3); - fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae - fmul(4); - fld_x(ExternalAddress(108 + GP)); //0x0871, 0x1a2f, 0x64 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(72 + GP)); //0x8c96, 0x8e0b, 0xeb - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 - faddp(2); - fxch(1); - fmulp(4); - fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 - faddp(1); - fmul(4); - fmul(5); - fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa - faddp(4); - fxch(3); - fmul(5); - faddp(3); - jcc(Assembler::equal, B1_20); - - bind(B1_19); - fld_x(Address(esp, 24)); - fxch(1); - fdivrp(2); - fxch(1); - fmulp(3); - movl(eax, Address(esp, 44)); - xorl(eax, 1); - fxch(2); - fmul(3); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(2); - fmula(3); - fxch(3); - faddp(2); - fxch(1); - fstp_d(Address(esp, 16)); - fmul(1); - fxch(1); - fmulp(2); - movsd(xmm0, Address(esp, 16)); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm1, Address(esp, 16)); - jmp(B1_21); - - bind(B1_20); - fdivrp(1); - fmulp(2); - fxch(1); - fmul(2); - movl(eax, Address(esp, 44)); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - fmula(3); - fxch(3); - faddp(1); - fstp_d(Address(esp, 16)); - fmul(1); - fld_x(Address(esp, 24)); - fmulp(2); - movsd(xmm0, Address(esp, 16)); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm1, Address(esp, 16)); - - bind(B1_21); - testl(edi, edi); - jcc(Assembler::equal, B1_23); - - bind(B1_22); - fldcw(Address(esp, 38)); - - bind(B1_23); - movl(eax, Address(esp, 40)); - movsd(Address(eax, 0), xmm0); - movsd(Address(eax, 8), xmm1); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_24); - testb(ebx, 2); - jcc(Assembler::equal, B1_31); - - bind(B1_25); - incl(edx); - fld_s(0); - fmul(1); - testb(edx, 2); - jcc(Assembler::equal, B1_27); - - bind(B1_26); - fld_d(ExternalAddress(ONES)); - fdiv(2); - fld_s(1); - fmul(2); - fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b - fmul(1); - fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae - fmul(2); - fld_x(ExternalAddress(108 + GP)); //0x67c9, 0xbed3, 0xbf - movl(eax, Address(esp, 44)); - faddp(2); - fxch(1); - fmul(2); - xorl(eax, 1); - fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(72 + GP)); //0xf3ca, 0x8c96, 0x8e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 - faddp(1); - fmulp(3); - fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmul(3); - fxch(2); - fmulp(3); - fxch(1); - faddp(2); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(2); - fmulp(1); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - jmp(B1_28); - - bind(B1_27); - fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a - fmul(1); - fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa - movl(eax, Address(esp, 44)); - faddp(1); - fmul(1); - fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d - fmul(2); - fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 - faddp(1); - fmul(2); - fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 - faddp(1); - fmul(2); - fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 - faddp(1); - fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 - faddp(1); - fdivrp(1); - fmulp(1); - fmul(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - fmulp(2); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - - bind(B1_28); - testl(edi, edi); - jcc(Assembler::equal, B1_30); - - bind(B1_29); - fldcw(Address(esp, 38)); - - bind(B1_30); - movl(eax, Address(esp, 40)); - movsd(Address(eax, 0), xmm0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_31); - testb(ebx, 1); - jcc(Assembler::equal, B1_38); - - bind(B1_32); - incl(edx); - fld_s(0); - fmul(1); - testb(edx, 2); - jcc(Assembler::equal, B1_34); - - bind(B1_33); - fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a - fmul(1); - fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa - movl(eax, Address(esp, 44)); - faddp(1); - fmul(1); - xorl(eax, 1); - fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d - fmul(2); - fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 - faddp(1); - fmul(2); - fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 - faddp(1); - fmul(2); - fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 - faddp(1); - fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 - faddp(1); - fdivrp(1); - fmulp(1); - fmul(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - fmulp(2); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - jmp(B1_35); - - bind(B1_34); - fld_d(ExternalAddress(ONES)); - fdiv(2); - fld_s(1); - fmul(2); - fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b - fmul(1); - fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae - fmul(2); - fld_x(ExternalAddress(108 + GP)); //0x67c9, 0xbed3, 0xbf - movl(eax, Address(esp, 44)); - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(72 + GP)); //0xf3ca, 0x8c96, 0x8e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 - faddp(1); - fmulp(3); - fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmul(3); - fxch(2); - fmulp(3); - fxch(1); - faddp(2); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(2); - fmulp(1); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - - bind(B1_35); - testl(edi, edi); - jcc(Assembler::equal, B1_37); - - bind(B1_36); - fldcw(Address(esp, 38)); - - bind(B1_37); - movl(eax, Address(esp, 40)); - movsd(Address(eax, 8), xmm0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - mov(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_38); - fstp_d(0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - mov(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_39); - xorl(edi, edi); - jmp(B1_8); - - bind(B1_40); - xorl(edi, edi); - jmp(B1_15); -} - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_tan[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x882c10faUL, - 0x3f9664f4UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x55e6c23dUL, 0x3f8226e3UL, 0x55555555UL, - 0x3fd55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x0e157de0UL, 0x3f6d6d3dUL, 0x11111111UL, 0x3fc11111UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x452b75e3UL, 0x3f57da36UL, - 0x1ba1ba1cUL, 0x3faba1baUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, - 0x3f953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, - 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0x3f85ad63UL, 0xdc230b9bUL, - 0x3fb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, - 0x77bb08baUL, 0x3f757c85UL, 0xb6247521UL, 0x3fb1381eUL, 0x5922170cUL, - 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0x3f64e391UL, - 0x3e666320UL, 0x3fa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, - 0x3fafa8aeUL, 0x8c5b2da2UL, 0x3fb936bbUL, 0x4e88f7a5UL, 0x3c587d05UL, - 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x5a279ea3UL, 0x3faa3407UL, - 0x00000000UL, 0x00000000UL, 0x432d65faUL, 0x3fa70153UL, 0x00000000UL, - 0x00000000UL, 0x891a4602UL, 0x3f9d03efUL, 0xd62ca5f8UL, 0x3fca77d9UL, - 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 0x3fd8cf51UL, 0xb58fd909UL, - 0x3f8f88e3UL, 0x01771ceaUL, 0x3fc2b154UL, 0xf3562f8eUL, 0x3f888f57UL, - 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 0x3f80f44cUL, 0x214368e9UL, - 0x3fb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 0x172dbbf0UL, 0x3fb6cb8eUL, - 0xe0553158UL, 0x3fc975f5UL, 0x593fe814UL, 0x3c2ef5d3UL, 0x00000000UL, - 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x9314533eUL, 0x3fbb8ec5UL, 0x00000000UL, - 0x00000000UL, 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, - 0xdcb427fdUL, 0x3fb13950UL, 0xd87ab0bbUL, 0x3fd5335eUL, 0xce0ae8a5UL, - 0x3fabb382UL, 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0x3fa552f1UL, - 0x59f21a6dUL, 0x3fd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, - 0x3fd0576cUL, 0x8f2c2950UL, 0x3f9a4898UL, 0xc0b3f22cUL, 0x3fc59462UL, - 0x1883a4b8UL, 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, - 0x3fd36a08UL, 0x1dce993dUL, 0xbc6d704dUL, 0x00000000UL, 0x3ff00000UL, - 0x2b82ab63UL, 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x56f37042UL, 0x3fccfc56UL, 0x00000000UL, 0x00000000UL, - 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 0x3d0e7c5dUL, - 0x3fc50533UL, 0x9bed9b2eUL, 0x3fdf0ed9UL, 0x5fe7c47cUL, 0x3fc1f250UL, - 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0x3fbe5c71UL, 0x86362c20UL, - 0x3fda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 0x3fd911bdUL, - 0xb56658beUL, 0x3fb5e4c7UL, 0x93a2fd76UL, 0x3fd3c092UL, 0xda271794UL, - 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 0x3fda8279UL, - 0xb68c1467UL, 0x3c708b2fUL, 0x00000000UL, 0x3ff00000UL, 0x980c4337UL, - 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0xcc03e501UL, 0x3fdff10fUL, 0x00000000UL, 0x00000000UL, 0x44a4e845UL, - 0x3fddb63bUL, 0x00000000UL, 0x00000000UL, 0x3768ad9fUL, 0x3fdb72a4UL, - 0x3dd01ccaUL, 0x3fe5fdb9UL, 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, - 0x3fe977f9UL, 0xd013b3abUL, 0x3fd78ca3UL, 0xbf0bf914UL, 0x3fe4f192UL, - 0x4d53e730UL, 0x3fd5d060UL, 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, - 0x3fd4322aUL, 0x5936a835UL, 0x3fe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, - 0xef478605UL, 0x3fe1659eUL, 0x190834ecUL, 0x3fe11ab7UL, 0xcdb625eaUL, - 0xbc8e564bUL, 0x00000000UL, 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, - 0x3ff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, - 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0x3ff3972eUL, 0xe93463bdUL, - 0x3feeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, - 0xa04e8ea3UL, 0x3ff4541aUL, 0x386accd3UL, 0x3ff1369eUL, 0x222a66ddUL, - 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0x3ff5178fUL, - 0xddaa0031UL, 0x3ff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, - 0x3ff29311UL, 0x2ab7f990UL, 0x3fe561b8UL, 0x209c7df1UL, 0x3c87a8c5UL, - 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc7ab4d5aUL, 0x40085e24UL, - 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 0x400b963dUL, 0x00000000UL, - 0x00000000UL, 0x94a7f25aUL, 0x400f37e2UL, 0x4b6261cbUL, 0x3ff5f984UL, - 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 0x3ffaf5a5UL, 0x7f2ce8e3UL, - 0x4013fe8bUL, 0xfe8e54faUL, 0x3ffd7334UL, 0x670d618dUL, 0x4016a10cUL, - 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 0x40199c5fUL, 0x697d6eceUL, - 0x4003006eUL, 0x83298b82UL, 0x401cfc4dUL, 0x19d490d6UL, 0x40058c19UL, - 0x2ae42850UL, 0x3fea4300UL, 0x118e20e6UL, 0xbc7a6db8UL, 0x00000000UL, - 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x65965966UL, 0x40219659UL, 0x00000000UL, - 0x00000000UL, 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, - 0x83cd3723UL, 0x402c8342UL, 0x00000000UL, 0x40000000UL, 0x55e6c23dUL, - 0x403226e3UL, 0x55555555UL, 0x40055555UL, 0x34451939UL, 0x40371c96UL, - 0xaaaaaaabUL, 0x400aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, - 0x40111111UL, 0xa738201fUL, 0x4042bbceUL, 0x05b05b06UL, 0x4015b05bUL, - 0x452b75e3UL, 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x4f48b8d3UL, 0xbf33eaf9UL, 0x00000000UL, 0x00000000UL, - 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 0xd0258911UL, - 0xbf0abaf3UL, 0x23e49fe9UL, 0xbfab5a8cUL, 0x2d53222eUL, 0x3ef60d15UL, - 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0xbee1d3b5UL, 0xdbf93b8eUL, - 0xbf84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 0x3f743924UL, - 0x794a8297UL, 0xbeb7b7b9UL, 0xe015f797UL, 0xbf5d41f5UL, 0xe41a4a56UL, - 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 0xbfce49ceUL, - 0x8c743719UL, 0x3d1eb860UL, 0x00000000UL, 0x00000000UL, 0x1b4863cfUL, - 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, - 0x535ad890UL, 0xbf2b9320UL, 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, - 0x3f16d61dUL, 0x00000000UL, 0x00000000UL, 0x0359f1beUL, 0xbf0139e4UL, - 0xa4317c6dUL, 0xbfa67e17UL, 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, - 0x3f9f455bUL, 0x51ccf238UL, 0xbed55317UL, 0xf437b9acUL, 0xbf804beeUL, - 0xc791a2b5UL, 0x3ec0e993UL, 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, - 0xbeaa48a2UL, 0x0a268358UL, 0xbf55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, - 0xd7767a58UL, 0x3f431806UL, 0x2aea0000UL, 0xbfc9bbe8UL, 0x7723ea61UL, - 0xbd3a2369UL, 0x00000000UL, 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, - 0xbf231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, - 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0xbef66191UL, 0x848a46c6UL, - 0xbfa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, - 0xfdd299efUL, 0xbec9dd1aUL, 0x3f8dbaafUL, 0xbf793363UL, 0x309fc6eaUL, - 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0xbe9dae11UL, - 0x3e5c67b3UL, 0xbf4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, - 0x3f3d1eb1UL, 0x29cfc000UL, 0xbfc549ceUL, 0xbf159358UL, 0xbd397b33UL, - 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x7d98a556UL, 0xbf1a3958UL, - 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 0x3f0704c2UL, 0x00000000UL, - 0x00000000UL, 0x73742a2bUL, 0xbeed054aUL, 0x58844587UL, 0xbf9c2a13UL, - 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 0x3f9a48f4UL, 0xa8dc9888UL, - 0xbebf8939UL, 0xaad4b5b8UL, 0xbf72f746UL, 0x9102efa1UL, 0x3ea88f82UL, - 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 0xbe90f456UL, 0x741fb4edUL, - 0xbf46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 0xca89ff3fUL, 0x3f36db70UL, - 0xa8a2a000UL, 0xbfc0ee13UL, 0x3da24be1UL, 0xbd338b9fUL, 0x00000000UL, - 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0xfffffff8UL, 0x1a154b97UL, 0xbf116b01UL, 0x00000000UL, - 0x00000000UL, 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, - 0xb93820c8UL, 0xbee264d4UL, 0xbb6cbb18UL, 0xbf94ab8cUL, 0x888d4d92UL, - 0x3ed0568bUL, 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0xbeb2f950UL, - 0x22cf9f74UL, 0xbf6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, - 0x3f64aad7UL, 0x637b73afUL, 0xbe83487cUL, 0xe522591aUL, 0xbf3fc092UL, - 0xa158e8bcUL, 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, - 0xbfb9477fUL, 0xc2c2d2bcUL, 0xbd135ef9UL, 0x00000000UL, 0x00000000UL, - 0xf2fdb123UL, 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, - 0xfffffff8UL, 0xc41acb64UL, 0xbf05448dUL, 0x00000000UL, 0x00000000UL, - 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 0x9e42962dUL, - 0xbed5aea5UL, 0x2579f8efUL, 0xbf8b2398UL, 0x288a1ed9UL, 0x3ec81441UL, - 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0xbea57cd3UL, 0x5766336fUL, - 0xbf617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 0x3f62c646UL, - 0x6b8fb29cUL, 0xbe74e3a3UL, 0xdc4c0409UL, 0xbf33f952UL, 0x9bffe365UL, - 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 0xbfb0cc62UL, - 0x016b907fUL, 0xbd119cbcUL, 0x00000000UL, 0x00000000UL, 0xe6b9d8faUL, - 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, - 0x5daf22a6UL, 0xbef429d7UL, 0x00000000UL, 0x00000000UL, 0x06bca545UL, - 0x3ef7a27dUL, 0x00000000UL, 0x00000000UL, 0x7211c19aUL, 0xbec41c3eUL, - 0x956ed53eUL, 0xbf7ae3f4UL, 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, - 0x3f96f713UL, 0x36661e6cUL, 0xbe936e09UL, 0x506f9381UL, 0xbf5122e8UL, - 0xcb6dd43fUL, 0x3e9041b9UL, 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, - 0xbe625a8aUL, 0xe5a0e9dcUL, 0xbf23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, - 0x68d43db6UL, 0x3f2cb899UL, 0x6ecac000UL, 0xbfa0c414UL, 0xcd7dd58cUL, - 0x3d13500fUL, 0x00000000UL, 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2bf70ebeUL, 0x3ef66a8fUL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0xd644267fUL, 0x3ec22805UL, 0x16c16c17UL, 0x3f96c16cUL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc4e09162UL, - 0x3e8d6db2UL, 0xbc011567UL, 0x3f61566aUL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x1f79955cUL, 0x3e57da4eUL, 0x9334ef0bUL, - 0x3f2bbd77UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x55555555UL, 0x3fd55555UL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x5daf22a6UL, 0x3ef429d7UL, - 0x00000000UL, 0x00000000UL, 0x06bca545UL, 0x3ef7a27dUL, 0x00000000UL, - 0x00000000UL, 0x7211c19aUL, 0x3ec41c3eUL, 0x956ed53eUL, 0x3f7ae3f4UL, - 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 0x3f96f713UL, 0x36661e6cUL, - 0x3e936e09UL, 0x506f9381UL, 0x3f5122e8UL, 0xcb6dd43fUL, 0x3e9041b9UL, - 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 0x3e625a8aUL, 0xe5a0e9dcUL, - 0x3f23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 0x68d43db6UL, 0x3f2cb899UL, - 0x6ecac000UL, 0x3fa0c414UL, 0xcd7dd58cUL, 0xbd13500fUL, 0x00000000UL, - 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0xfffffff8UL, 0xc41acb64UL, 0x3f05448dUL, 0x00000000UL, - 0x00000000UL, 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, - 0x9e42962dUL, 0x3ed5aea5UL, 0x2579f8efUL, 0x3f8b2398UL, 0x288a1ed9UL, - 0x3ec81441UL, 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0x3ea57cd3UL, - 0x5766336fUL, 0x3f617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, - 0x3f62c646UL, 0x6b8fb29cUL, 0x3e74e3a3UL, 0xdc4c0409UL, 0x3f33f952UL, - 0x9bffe365UL, 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, - 0x3fb0cc62UL, 0x016b907fUL, 0x3d119cbcUL, 0x00000000UL, 0x00000000UL, - 0xe6b9d8faUL, 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, - 0xfffffff8UL, 0x1a154b97UL, 0x3f116b01UL, 0x00000000UL, 0x00000000UL, - 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 0xb93820c8UL, - 0x3ee264d4UL, 0xbb6cbb18UL, 0x3f94ab8cUL, 0x888d4d92UL, 0x3ed0568bUL, - 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0x3eb2f950UL, 0x22cf9f74UL, - 0x3f6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 0x3f64aad7UL, - 0x637b73afUL, 0x3e83487cUL, 0xe522591aUL, 0x3f3fc092UL, 0xa158e8bcUL, - 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 0x3fb9477fUL, - 0xc2c2d2bcUL, 0x3d135ef9UL, 0x00000000UL, 0x00000000UL, 0xf2fdb123UL, - 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, - 0x7d98a556UL, 0x3f1a3958UL, 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, - 0x3f0704c2UL, 0x00000000UL, 0x00000000UL, 0x73742a2bUL, 0x3eed054aUL, - 0x58844587UL, 0x3f9c2a13UL, 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, - 0x3f9a48f4UL, 0xa8dc9888UL, 0x3ebf8939UL, 0xaad4b5b8UL, 0x3f72f746UL, - 0x9102efa1UL, 0x3ea88f82UL, 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, - 0x3e90f456UL, 0x741fb4edUL, 0x3f46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, - 0xca89ff3fUL, 0x3f36db70UL, 0xa8a2a000UL, 0x3fc0ee13UL, 0x3da24be1UL, - 0x3d338b9fUL, 0x00000000UL, 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, - 0x3f231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, - 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0x3ef66191UL, 0x848a46c6UL, - 0x3fa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, - 0xfdd299efUL, 0x3ec9dd1aUL, 0x3f8dbaafUL, 0x3f793363UL, 0x309fc6eaUL, - 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0x3e9dae11UL, - 0x3e5c67b3UL, 0x3f4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, - 0x3f3d1eb1UL, 0x29cfc000UL, 0x3fc549ceUL, 0xbf159358UL, 0x3d397b33UL, - 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x535ad890UL, 0x3f2b9320UL, - 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 0x3f16d61dUL, 0x00000000UL, - 0x00000000UL, 0x0359f1beUL, 0x3f0139e4UL, 0xa4317c6dUL, 0x3fa67e17UL, - 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 0x3f9f455bUL, 0x51ccf238UL, - 0x3ed55317UL, 0xf437b9acUL, 0x3f804beeUL, 0xc791a2b5UL, 0x3ec0e993UL, - 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 0x3eaa48a2UL, 0x0a268358UL, - 0x3f55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 0xd7767a58UL, 0x3f431806UL, - 0x2aea0000UL, 0x3fc9bbe8UL, 0x7723ea61UL, 0x3d3a2369UL, 0x00000000UL, - 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0xfffffff8UL, 0x4f48b8d3UL, 0x3f33eaf9UL, 0x00000000UL, - 0x00000000UL, 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, - 0xd0258911UL, 0x3f0abaf3UL, 0x23e49fe9UL, 0x3fab5a8cUL, 0x2d53222eUL, - 0x3ef60d15UL, 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0x3ee1d3b5UL, - 0xdbf93b8eUL, 0x3f84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, - 0x3f743924UL, 0x794a8297UL, 0x3eb7b7b9UL, 0xe015f797UL, 0x3f5d41f5UL, - 0xe41a4a56UL, 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, - 0x3fce49ceUL, 0x8c743719UL, 0xbd1eb860UL, 0x00000000UL, 0x00000000UL, - 0x1b4863cfUL, 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, - 0xfffffff8UL, 0x65965966UL, 0xc0219659UL, 0x00000000UL, 0x00000000UL, - 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 0x83cd3723UL, - 0xc02c8342UL, 0x00000000UL, 0xc0000000UL, 0x55e6c23dUL, 0x403226e3UL, - 0x55555555UL, 0x40055555UL, 0x34451939UL, 0xc0371c96UL, 0xaaaaaaabUL, - 0xc00aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 0x40111111UL, - 0xa738201fUL, 0xc042bbceUL, 0x05b05b06UL, 0xc015b05bUL, 0x452b75e3UL, - 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 0xbff00000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0xc7ab4d5aUL, 0xc0085e24UL, 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, - 0x400b963dUL, 0x00000000UL, 0x00000000UL, 0x94a7f25aUL, 0xc00f37e2UL, - 0x4b6261cbUL, 0xbff5f984UL, 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, - 0x3ffaf5a5UL, 0x7f2ce8e3UL, 0xc013fe8bUL, 0xfe8e54faUL, 0xbffd7334UL, - 0x670d618dUL, 0x4016a10cUL, 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, - 0xc0199c5fUL, 0x697d6eceUL, 0xc003006eUL, 0x83298b82UL, 0x401cfc4dUL, - 0x19d490d6UL, 0x40058c19UL, 0x2ae42850UL, 0xbfea4300UL, 0x118e20e6UL, - 0x3c7a6db8UL, 0x00000000UL, 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, - 0xbff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, - 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0xbff3972eUL, 0xe93463bdUL, - 0xbfeeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, - 0xa04e8ea3UL, 0xbff4541aUL, 0x386accd3UL, 0xbff1369eUL, 0x222a66ddUL, - 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0xbff5178fUL, - 0xddaa0031UL, 0xbff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, - 0x3ff29311UL, 0x2ab7f990UL, 0xbfe561b8UL, 0x209c7df1UL, 0xbc87a8c5UL, - 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xcc03e501UL, 0xbfdff10fUL, - 0x00000000UL, 0x00000000UL, 0x44a4e845UL, 0x3fddb63bUL, 0x00000000UL, - 0x00000000UL, 0x3768ad9fUL, 0xbfdb72a4UL, 0x3dd01ccaUL, 0xbfe5fdb9UL, - 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 0x3fe977f9UL, 0xd013b3abUL, - 0xbfd78ca3UL, 0xbf0bf914UL, 0xbfe4f192UL, 0x4d53e730UL, 0x3fd5d060UL, - 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 0xbfd4322aUL, 0x5936a835UL, - 0xbfe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 0xef478605UL, 0x3fe1659eUL, - 0x190834ecUL, 0xbfe11ab7UL, 0xcdb625eaUL, 0x3c8e564bUL, 0x00000000UL, - 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x56f37042UL, 0xbfccfc56UL, 0x00000000UL, - 0x00000000UL, 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, - 0x3d0e7c5dUL, 0xbfc50533UL, 0x9bed9b2eUL, 0xbfdf0ed9UL, 0x5fe7c47cUL, - 0x3fc1f250UL, 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0xbfbe5c71UL, - 0x86362c20UL, 0xbfda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, - 0x3fd911bdUL, 0xb56658beUL, 0xbfb5e4c7UL, 0x93a2fd76UL, 0xbfd3c092UL, - 0xda271794UL, 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, - 0xbfda8279UL, 0xb68c1467UL, 0xbc708b2fUL, 0x00000000UL, 0x3ff00000UL, - 0x980c4337UL, 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x9314533eUL, 0xbfbb8ec5UL, 0x00000000UL, 0x00000000UL, - 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 0xdcb427fdUL, - 0xbfb13950UL, 0xd87ab0bbUL, 0xbfd5335eUL, 0xce0ae8a5UL, 0x3fabb382UL, - 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0xbfa552f1UL, 0x59f21a6dUL, - 0xbfd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 0x3fd0576cUL, - 0x8f2c2950UL, 0xbf9a4898UL, 0xc0b3f22cUL, 0xbfc59462UL, 0x1883a4b8UL, - 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 0xbfd36a08UL, - 0x1dce993dUL, 0x3c6d704dUL, 0x00000000UL, 0x3ff00000UL, 0x2b82ab63UL, - 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x5a279ea3UL, 0xbfaa3407UL, 0x00000000UL, 0x00000000UL, 0x432d65faUL, - 0x3fa70153UL, 0x00000000UL, 0x00000000UL, 0x891a4602UL, 0xbf9d03efUL, - 0xd62ca5f8UL, 0xbfca77d9UL, 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, - 0x3fd8cf51UL, 0xb58fd909UL, 0xbf8f88e3UL, 0x01771ceaUL, 0xbfc2b154UL, - 0xf3562f8eUL, 0x3f888f57UL, 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, - 0xbf80f44cUL, 0x214368e9UL, 0xbfb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, - 0x172dbbf0UL, 0x3fb6cb8eUL, 0xe0553158UL, 0xbfc975f5UL, 0x593fe814UL, - 0xbc2ef5d3UL, 0x00000000UL, 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, - 0xbf953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, - 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0xbf85ad63UL, 0xdc230b9bUL, - 0xbfb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, - 0x77bb08baUL, 0xbf757c85UL, 0xb6247521UL, 0xbfb1381eUL, 0x5922170cUL, - 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0xbf64e391UL, - 0x3e666320UL, 0xbfa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, - 0x3fafa8aeUL, 0x8c5b2da2UL, 0xbfb936bbUL, 0x4e88f7a5UL, 0xbc587d05UL, - 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x6dc9c883UL, 0x3fe45f30UL, - 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x43780000UL, 0x00000000UL, - 0x43380000UL, 0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL, - 0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL, 0x3707344aUL, - 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL, 0x00000000UL, 0x80000000UL, - 0x00000000UL, 0x80000000UL, 0x676733afUL, 0x3d32e7b9UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x7ff00000UL, 0x00000000UL, 0x00000000UL, 0xfffc0000UL, - 0xffffffffUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ca00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL, 0x00000000UL, - 0x40300000UL, 0x00000000UL, 0x3ff00000UL -}; - -void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { - - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_tan = (address)_static_const_table_tan; - - subl(rsp, 120); - movl(Address(rsp, 56), tmp); - lea(tmp, ExternalAddress(static_const_table_tan)); - movsd(xmm0, Address(rsp, 128)); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - subl(eax, 14368); - cmpl(eax, 2216); - jcc(Assembler::above, L_2TAG_PACKET_0_0_2); - movdqu(xmm5, Address(tmp, 5840)); - movdqu(xmm6, Address(tmp, 5856)); - unpcklpd(xmm0, xmm0); - movdqu(xmm4, Address(tmp, 5712)); - andpd(xmm4, xmm0); - movdqu(xmm1, Address(tmp, 5632)); - mulpd(xmm1, xmm0); - por(xmm5, xmm4); - addpd(xmm1, xmm5); - movdqu(xmm7, xmm1); - unpckhpd(xmm7, xmm7); - cvttsd2sil(edx, xmm7); - cvttpd2dq(xmm1, xmm1); - cvtdq2pd(xmm1, xmm1); - mulpd(xmm1, xmm6); - movdqu(xmm3, Address(tmp, 5664)); - movsd(xmm5, Address(tmp, 5728)); - addl(edx, 469248); - movdqu(xmm4, Address(tmp, 5680)); - mulpd(xmm3, xmm1); - andl(edx, 31); - mulsd(xmm5, xmm1); - movl(ecx, edx); - mulpd(xmm4, xmm1); - shll(ecx, 1); - subpd(xmm0, xmm3); - mulpd(xmm1, Address(tmp, 5696)); - addl(edx, ecx); - shll(ecx, 2); - addl(edx, ecx); - addsd(xmm5, xmm0); - movdqu(xmm2, xmm0); - subpd(xmm0, xmm4); - movsd(xmm6, Address(tmp, 5744)); - shll(edx, 4); - lea(eax, Address(tmp, 0)); - andpd(xmm5, Address(tmp, 5776)); - movdqu(xmm3, xmm0); - addl(eax, edx); - subpd(xmm2, xmm0); - unpckhpd(xmm0, xmm0); - divsd(xmm6, xmm5); - subpd(xmm2, xmm4); - movdqu(xmm7, Address(eax, 16)); - subsd(xmm3, xmm5); - mulpd(xmm7, xmm0); - subpd(xmm2, xmm1); - movdqu(xmm1, Address(eax, 48)); - mulpd(xmm1, xmm0); - movdqu(xmm4, Address(eax, 96)); - mulpd(xmm4, xmm0); - addsd(xmm2, xmm3); - movdqu(xmm3, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm7, Address(eax, 0)); - addpd(xmm1, Address(eax, 32)); - mulpd(xmm1, xmm0); - addpd(xmm4, Address(eax, 80)); - addpd(xmm7, xmm1); - movdqu(xmm1, Address(eax, 112)); - mulpd(xmm1, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm4, xmm1); - movdqu(xmm1, Address(eax, 64)); - mulpd(xmm1, xmm0); - addpd(xmm7, xmm1); - movdqu(xmm1, xmm3); - mulpd(xmm3, xmm0); - mulsd(xmm0, xmm0); - mulpd(xmm1, Address(eax, 144)); - mulpd(xmm4, xmm3); - movdqu(xmm3, xmm1); - addpd(xmm7, xmm4); - movdqu(xmm4, xmm1); - mulsd(xmm0, xmm7); - unpckhpd(xmm7, xmm7); - addsd(xmm0, xmm7); - unpckhpd(xmm1, xmm1); - addsd(xmm3, xmm1); - subsd(xmm4, xmm3); - addsd(xmm1, xmm4); - movdqu(xmm4, xmm2); - movsd(xmm7, Address(eax, 144)); - unpckhpd(xmm2, xmm2); - addsd(xmm7, Address(eax, 152)); - mulsd(xmm7, xmm2); - addsd(xmm7, Address(eax, 136)); - addsd(xmm7, xmm1); - addsd(xmm0, xmm7); - movsd(xmm7, Address(tmp, 5744)); - mulsd(xmm4, xmm6); - movsd(xmm2, Address(eax, 168)); - andpd(xmm2, xmm6); - mulsd(xmm5, xmm2); - mulsd(xmm6, Address(eax, 160)); - subsd(xmm7, xmm5); - subsd(xmm2, Address(eax, 128)); - subsd(xmm7, xmm4); - mulsd(xmm7, xmm6); - movdqu(xmm4, xmm3); - subsd(xmm3, xmm2); - addsd(xmm2, xmm3); - subsd(xmm4, xmm2); - addsd(xmm0, xmm4); - subsd(xmm0, xmm7); - addsd(xmm0, xmm3); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_0_0_2); - jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); - shrl(eax, 4); - cmpl(eax, 268434558); - jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2); - movdqu(xmm3, xmm0); - mulsd(xmm3, Address(tmp, 5808)); - - bind(L_2TAG_PACKET_3_0_2); - movsd(xmm3, Address(tmp, 5792)); - mulsd(xmm3, xmm0); - addsd(xmm3, xmm0); - mulsd(xmm3, Address(tmp, 5808)); - movsd(Address(rsp, 0), xmm3); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movq(xmm7, Address(tmp, 5712)); - andpd(xmm7, xmm0); - xorpd(xmm7, xmm0); - ucomisd(xmm7, Address(tmp, 5760)); - jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); - subl(rsp, 32); - movsd(Address(rsp, 0), xmm0); - lea(eax, Address(rsp, 40)); - movl(Address(rsp, 8), eax); - movl(eax, 2); - movl(Address(rsp, 12), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_tan_cot_huge()))); - addl(rsp, 32); - fld_d(Address(rsp, 8)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movq(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - fsub_d(Address(rsp, 0)); - - bind(L_2TAG_PACKET_1_0_2); - movl(tmp, Address(rsp, 56)); -} diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp deleted file mode 100644 index 8e5e54f244c..00000000000 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp +++ /dev/null @@ -1,2854 +0,0 @@ -/* - * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "asm/macroAssembler.hpp" -#include "asm/macroAssembler.inline.hpp" -#include "code/compiledIC.hpp" -#include "code/debugInfoRec.hpp" -#include "code/nativeInst.hpp" -#include "code/vtableStubs.hpp" -#include "compiler/oopMap.hpp" -#include "gc/shared/gcLocker.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/barrierSetAssembler.hpp" -#include "interpreter/interpreter.hpp" -#include "logging/log.hpp" -#include "memory/resourceArea.hpp" -#include "oops/klass.inline.hpp" -#include "prims/methodHandles.hpp" -#include "runtime/jniHandles.hpp" -#include "runtime/safepointMechanism.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/signature.hpp" -#include "runtime/stubRoutines.hpp" -#include "runtime/timerTrace.hpp" -#include "runtime/vframeArray.hpp" -#include "runtime/vm_version.hpp" -#include "utilities/align.hpp" -#include "vmreg_x86.inline.hpp" -#ifdef COMPILER1 -#include "c1/c1_Runtime1.hpp" -#endif -#ifdef COMPILER2 -#include "opto/runtime.hpp" -#endif - -#define __ masm-> - -#ifdef PRODUCT -#define BLOCK_COMMENT(str) /* nothing */ -#else -#define BLOCK_COMMENT(str) __ block_comment(str) -#endif // PRODUCT - -const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; - -class RegisterSaver { - // Capture info about frame layout -#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off - enum layout { - fpu_state_off = 0, - fpu_state_end = fpu_state_off+FPUStateSizeInWords, - st0_off, st0H_off, - st1_off, st1H_off, - st2_off, st2H_off, - st3_off, st3H_off, - st4_off, st4H_off, - st5_off, st5H_off, - st6_off, st6H_off, - st7_off, st7H_off, - xmm_off, - DEF_XMM_OFFS(0), - DEF_XMM_OFFS(1), - DEF_XMM_OFFS(2), - DEF_XMM_OFFS(3), - DEF_XMM_OFFS(4), - DEF_XMM_OFFS(5), - DEF_XMM_OFFS(6), - DEF_XMM_OFFS(7), - flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word - rdi_off, - rsi_off, - ignore_off, // extra copy of rbp, - rsp_off, - rbx_off, - rdx_off, - rcx_off, - rax_off, - // The frame sender code expects that rbp will be in the "natural" place and - // will override any oopMap setting for it. We must therefore force the layout - // so that it agrees with the frame sender code. - rbp_off, - return_off, // slot for return address - reg_save_size }; - enum { FPU_regs_live = flags_off - fpu_state_end }; - - public: - - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu = true, bool save_vectors = false); - static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); - - static int rax_offset() { return rax_off; } - static int rbx_offset() { return rbx_off; } - - // Offsets into the register save area - // Used by deoptimization when it is managing result register - // values on its own - - static int raxOffset(void) { return rax_off; } - static int rdxOffset(void) { return rdx_off; } - static int rbxOffset(void) { return rbx_off; } - static int xmm0Offset(void) { return xmm0_off; } - // This really returns a slot in the fp save area, which one is not important - static int fpResultOffset(void) { return st0_off; } - - // During deoptimization only the result register need to be restored - // all the other values have already been extracted. - - static void restore_result_registers(MacroAssembler* masm); - -}; - -OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu, bool save_vectors) { - int num_xmm_regs = XMMRegister::number_of_registers; - int ymm_bytes = num_xmm_regs * 16; - int zmm_bytes = num_xmm_regs * 32; -#ifdef COMPILER2 - int opmask_state_bytes = KRegister::number_of_registers * 8; - if (save_vectors) { - assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); - assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); - // Save upper half of YMM registers - int vect_bytes = ymm_bytes; - if (UseAVX > 2) { - // Save upper half of ZMM registers as well - vect_bytes += zmm_bytes; - additional_frame_words += opmask_state_bytes / wordSize; - } - additional_frame_words += vect_bytes / wordSize; - } -#else - assert(!save_vectors, "vectors are generated only by C2"); -#endif - int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; - int frame_words = frame_size_in_bytes / wordSize; - *total_frame_words = frame_words; - - assert(FPUStateSizeInWords == 27, "update stack layout"); - - // save registers, fpu state, and flags - // We assume caller has already has return address slot on the stack - // We push epb twice in this sequence because we want the real rbp, - // to be under the return like a normal enter and we want to use pusha - // We push by hand instead of using push. - __ enter(); - __ pusha(); - __ pushf(); - __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space - __ push_FPU_state(); // Save FPU state & init - - if (verify_fpu) { - // Some stubs may have non standard FPU control word settings so - // only check and reset the value when it required to be the - // standard value. The safepoint blob in particular can be used - // in methods which are using the 24 bit control word for - // optimized float math. - -#ifdef ASSERT - // Make sure the control word has the expected value - Label ok; - __ cmpw(Address(rsp, 0), StubRoutines::x86::fpu_cntrl_wrd_std()); - __ jccb(Assembler::equal, ok); - __ stop("corrupted control word detected"); - __ bind(ok); -#endif - - // Reset the control word to guard against exceptions being unmasked - // since fstp_d can cause FPU stack underflow exceptions. Write it - // into the on stack copy and then reload that to make sure that the - // current and future values are correct. - __ movw(Address(rsp, 0), StubRoutines::x86::fpu_cntrl_wrd_std()); - } - - __ frstor(Address(rsp, 0)); - if (!verify_fpu) { - // Set the control word so that exceptions are masked for the - // following code. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - - int off = st0_off; - int delta = st1_off - off; - - // Save the FPU registers in de-opt-able form - for (int n = 0; n < FloatRegister::number_of_registers; n++) { - __ fstp_d(Address(rsp, off*wordSize)); - off += delta; - } - - off = xmm0_off; - delta = xmm1_off - off; - if(UseSSE == 1) { - // Save the XMM state - for (int n = 0; n < num_xmm_regs; n++) { - __ movflt(Address(rsp, off*wordSize), as_XMMRegister(n)); - off += delta; - } - } else if(UseSSE >= 2) { - // Save whole 128bit (16 bytes) XMM registers - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); - off += delta; - } - } - -#ifdef COMPILER2 - if (save_vectors) { - __ subptr(rsp, ymm_bytes); - // Save upper half of YMM registers - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); - } - if (UseAVX > 2) { - __ subptr(rsp, zmm_bytes); - // Save upper half of ZMM registers - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); - } - __ subptr(rsp, opmask_state_bytes); - // Save opmask registers - for (int n = 0; n < KRegister::number_of_registers; n++) { - __ kmov(Address(rsp, n*8), as_KRegister(n)); - } - } - } -#else - assert(!save_vectors, "vectors are generated only by C2"); -#endif - - __ vzeroupper(); - - // Set an oopmap for the call site. This oopmap will map all - // oop-registers and debug-info registers as callee-saved. This - // will allow deoptimization at this safepoint to find all possible - // debug-info recordings, as well as let GC find all oops. - - OopMapSet *oop_maps = new OopMapSet(); - OopMap* map = new OopMap( frame_words, 0 ); - -#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) -#define NEXTREG(x) (x)->as_VMReg()->next() - - map->set_callee_saved(STACK_OFFSET(rax_off), rax->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rcx_off), rcx->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rdx_off), rdx->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rbx_off), rbx->as_VMReg()); - // rbp, location is known implicitly, no oopMap - map->set_callee_saved(STACK_OFFSET(rsi_off), rsi->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rdi_off), rdi->as_VMReg()); - - // %%% This is really a waste but we'll keep things as they were for now for the upper component - off = st0_off; - delta = st1_off - off; - for (int n = 0; n < FloatRegister::number_of_registers; n++) { - FloatRegister freg_name = as_FloatRegister(n); - map->set_callee_saved(STACK_OFFSET(off), freg_name->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(freg_name)); - off += delta; - } - off = xmm0_off; - delta = xmm1_off - off; - for (int n = 0; n < num_xmm_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(xmm_name)); - off += delta; - } -#undef NEXTREG -#undef STACK_OFFSET - - return map; -} - -void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { - int opmask_state_bytes = 0; - int additional_frame_bytes = 0; - int num_xmm_regs = XMMRegister::number_of_registers; - int ymm_bytes = num_xmm_regs * 16; - int zmm_bytes = num_xmm_regs * 32; - // Recover XMM & FPU state -#ifdef COMPILER2 - if (restore_vectors) { - assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); - assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); - // Save upper half of YMM registers - additional_frame_bytes = ymm_bytes; - if (UseAVX > 2) { - // Save upper half of ZMM registers as well - additional_frame_bytes += zmm_bytes; - opmask_state_bytes = KRegister::number_of_registers * 8; - additional_frame_bytes += opmask_state_bytes; - } - } -#else - assert(!restore_vectors, "vectors are generated only by C2"); -#endif - - int off = xmm0_off; - int delta = xmm1_off - off; - - __ vzeroupper(); - - if (UseSSE == 1) { - // Restore XMM registers - assert(additional_frame_bytes == 0, ""); - for (int n = 0; n < num_xmm_regs; n++) { - __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize)); - off += delta; - } - } else if (UseSSE >= 2) { - // Restore whole 128bit (16 bytes) XMM registers. Do this before restoring YMM and - // ZMM because the movdqu instruction zeros the upper part of the XMM register. - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); - off += delta; - } - } - - if (restore_vectors) { - off = additional_frame_bytes - ymm_bytes; - // Restore upper half of YMM registers. - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16+off)); - } - if (UseAVX > 2) { - // Restore upper half of ZMM registers. - off = opmask_state_bytes; - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32+off)); - } - for (int n = 0; n < KRegister::number_of_registers; n++) { - __ kmov(as_KRegister(n), Address(rsp, n*8)); - } - } - __ addptr(rsp, additional_frame_bytes); - } - - __ pop_FPU_state(); - __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers - - __ popf(); - __ popa(); - // Get the rbp, described implicitly by the frame sender code (no oopMap) - __ pop(rbp); -} - -void RegisterSaver::restore_result_registers(MacroAssembler* masm) { - - // Just restore result register. Only used by deoptimization. By - // now any callee save register that needs to be restore to a c2 - // caller of the deoptee has been extracted into the vframeArray - // and will be stuffed into the c2i adapter we create for later - // restoration so only result registers need to be restored here. - // - - __ frstor(Address(rsp, 0)); // Restore fpu state - - // Recover XMM & FPU state - if( UseSSE == 1 ) { - __ movflt(xmm0, Address(rsp, xmm0_off*wordSize)); - } else if( UseSSE >= 2 ) { - __ movdbl(xmm0, Address(rsp, xmm0_off*wordSize)); - } - __ movptr(rax, Address(rsp, rax_off*wordSize)); - __ movptr(rdx, Address(rsp, rdx_off*wordSize)); - // Pop all of the register save are off the stack except the return address - __ addptr(rsp, return_off * wordSize); -} - -// Is vector's size (in bytes) bigger than a size saved by default? -// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions. -// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated. -bool SharedRuntime::is_wide_vector(int size) { - return size > 16; -} - -// The java_calling_convention describes stack locations as ideal slots on -// a frame with no abi restrictions. Since we must observe abi restrictions -// (like the placement of the register window) the slots must be biased by -// the following value. -static int reg2offset_in(VMReg r) { - // Account for saved rbp, and return address - // This should really be in_preserve_stack_slots - return (r->reg2stack() + 2) * VMRegImpl::stack_slot_size; -} - -static int reg2offset_out(VMReg r) { - return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; -} - -// --------------------------------------------------------------------------- -// Read the array of BasicTypes from a signature, and compute where the -// arguments should go. Values in the VMRegPair regs array refer to 4-byte -// quantities. Values less than SharedInfo::stack0 are registers, those above -// refer to 4-byte stack slots. All stack slots are based off of the stack pointer -// as framesizes are fixed. -// VMRegImpl::stack0 refers to the first slot 0(sp). -// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. -// Register up to Register::number_of_registers are the 32-bit -// integer registers. - -// Pass first two oop/int args in registers ECX and EDX. -// Pass first two float/double args in registers XMM0 and XMM1. -// Doubles have precedence, so if you pass a mix of floats and doubles -// the doubles will grab the registers before the floats will. - -// Note: the INPUTS in sig_bt are in units of Java argument words, which are -// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit -// units regardless of build. Of course for i486 there is no 64 bit build - - -// --------------------------------------------------------------------------- -// The compiled Java calling convention. -// Pass first two oop/int args in registers ECX and EDX. -// Pass first two float/double args in registers XMM0 and XMM1. -// Doubles have precedence, so if you pass a mix of floats and doubles -// the doubles will grab the registers before the floats will. -int SharedRuntime::java_calling_convention(const BasicType *sig_bt, - VMRegPair *regs, - int total_args_passed) { - uint stack = 0; // Starting stack position for args on stack - - - // Pass first two oop/int args in registers ECX and EDX. - uint reg_arg0 = 9999; - uint reg_arg1 = 9999; - - // Pass first two float/double args in registers XMM0 and XMM1. - // Doubles have precedence, so if you pass a mix of floats and doubles - // the doubles will grab the registers before the floats will. - // CNC - TURNED OFF FOR non-SSE. - // On Intel we have to round all doubles (and most floats) at - // call sites by storing to the stack in any case. - // UseSSE=0 ==> Don't Use ==> 9999+0 - // UseSSE=1 ==> Floats only ==> 9999+1 - // UseSSE>=2 ==> Floats or doubles ==> 9999+2 - enum { fltarg_dontuse = 9999+0, fltarg_float_only = 9999+1, fltarg_flt_dbl = 9999+2 }; - uint fargs = (UseSSE>=2) ? 2 : UseSSE; - uint freg_arg0 = 9999+fargs; - uint freg_arg1 = 9999+fargs; - - // Pass doubles & longs aligned on the stack. First count stack slots for doubles - int i; - for( i = 0; i < total_args_passed; i++) { - if( sig_bt[i] == T_DOUBLE ) { - // first 2 doubles go in registers - if( freg_arg0 == fltarg_flt_dbl ) freg_arg0 = i; - else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i; - else // Else double is passed low on the stack to be aligned. - stack += 2; - } else if( sig_bt[i] == T_LONG ) { - stack += 2; - } - } - int dstack = 0; // Separate counter for placing doubles - - // Now pick where all else goes. - for( i = 0; i < total_args_passed; i++) { - // From the type and the argument number (count) compute the location - switch( sig_bt[i] ) { - case T_SHORT: - case T_CHAR: - case T_BYTE: - case T_BOOLEAN: - case T_INT: - case T_ARRAY: - case T_OBJECT: - case T_ADDRESS: - if( reg_arg0 == 9999 ) { - reg_arg0 = i; - regs[i].set1(rcx->as_VMReg()); - } else if( reg_arg1 == 9999 ) { - reg_arg1 = i; - regs[i].set1(rdx->as_VMReg()); - } else { - regs[i].set1(VMRegImpl::stack2reg(stack++)); - } - break; - case T_FLOAT: - if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) { - freg_arg0 = i; - regs[i].set1(xmm0->as_VMReg()); - } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) { - freg_arg1 = i; - regs[i].set1(xmm1->as_VMReg()); - } else { - regs[i].set1(VMRegImpl::stack2reg(stack++)); - } - break; - case T_LONG: - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - regs[i].set2(VMRegImpl::stack2reg(dstack)); - dstack += 2; - break; - case T_DOUBLE: - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - if( freg_arg0 == (uint)i ) { - regs[i].set2(xmm0->as_VMReg()); - } else if( freg_arg1 == (uint)i ) { - regs[i].set2(xmm1->as_VMReg()); - } else { - regs[i].set2(VMRegImpl::stack2reg(dstack)); - dstack += 2; - } - break; - case T_VOID: regs[i].set_bad(); break; - break; - default: - ShouldNotReachHere(); - break; - } - } - - return stack; -} - -// Patch the callers callsite with entry to compiled code if it exists. -static void patch_callers_callsite(MacroAssembler *masm) { - Label L; - __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); - __ jcc(Assembler::equal, L); - // Schedule the branch target address early. - // Call into the VM to patch the caller, then jump to compiled callee - // rax, isn't live so capture return address while we easily can - __ movptr(rax, Address(rsp, 0)); - __ pusha(); - __ pushf(); - - if (UseSSE == 1) { - __ subptr(rsp, 2*wordSize); - __ movflt(Address(rsp, 0), xmm0); - __ movflt(Address(rsp, wordSize), xmm1); - } - if (UseSSE >= 2) { - __ subptr(rsp, 4*wordSize); - __ movdbl(Address(rsp, 0), xmm0); - __ movdbl(Address(rsp, 2*wordSize), xmm1); - } -#ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - if (UseSSE >= 2) { - __ verify_FPU(0, "c2i transition should have clean FPU stack"); - } else { - __ empty_FPU_stack(); - } -#endif /* COMPILER2 */ - - // VM needs caller's callsite - __ push(rax); - // VM needs target method - __ push(rbx); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); - __ addptr(rsp, 2*wordSize); - - if (UseSSE == 1) { - __ movflt(xmm0, Address(rsp, 0)); - __ movflt(xmm1, Address(rsp, wordSize)); - __ addptr(rsp, 2*wordSize); - } - if (UseSSE >= 2) { - __ movdbl(xmm0, Address(rsp, 0)); - __ movdbl(xmm1, Address(rsp, 2*wordSize)); - __ addptr(rsp, 4*wordSize); - } - - __ popf(); - __ popa(); - __ bind(L); -} - - -static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) { - int next_off = st_off - Interpreter::stackElementSize; - __ movdbl(Address(rsp, next_off), r); -} - -static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - Label& skip_fixup) { - // Before we get into the guts of the C2I adapter, see if we should be here - // at all. We've come from compiled code and are attempting to jump to the - // interpreter, which means the caller made a static call to get here - // (vcalls always get a compiled target if there is one). Check for a - // compiled target. If there is one, we need to patch the caller's call. - patch_callers_callsite(masm); - - __ bind(skip_fixup); - -#ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - if (UseSSE >= 2) { - __ verify_FPU(0, "c2i transition should have clean FPU stack"); - } else { - __ empty_FPU_stack(); - } -#endif /* COMPILER2 */ - - // Since all args are passed on the stack, total_args_passed * interpreter_ - // stack_element_size is the - // space we need. - int extraspace = total_args_passed * Interpreter::stackElementSize; - - // Get return address - __ pop(rax); - - // set senderSP value - __ movptr(rsi, rsp); - - __ subptr(rsp, extraspace); - - // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // st_off points to lowest address on stack. - int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; - - // Say 4 args: - // i st_off - // 0 12 T_LONG - // 1 8 T_VOID - // 2 4 T_OBJECT - // 3 0 T_BOOL - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - - if (r_1->is_stack()) { - // memory to memory use fpu stack top - int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; - - if (!r_2->is_valid()) { - __ movl(rdi, Address(rsp, ld_off)); - __ movptr(Address(rsp, st_off), rdi); - } else { - - // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW - // st_off == MSW, st_off-wordSize == LSW - - __ movptr(rdi, Address(rsp, ld_off)); - __ movptr(Address(rsp, next_off), rdi); - __ movptr(rdi, Address(rsp, ld_off + wordSize)); - __ movptr(Address(rsp, st_off), rdi); - } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - __ movl(Address(rsp, st_off), r); - } else { - // long/double in gpr - ShouldNotReachHere(); - } - } else { - assert(r_1->is_XMMRegister(), ""); - if (!r_2->is_valid()) { - __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); - } else { - assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type"); - move_c2i_double(masm, r_1->as_XMMRegister(), st_off); - } - } - } - - // Schedule the branch target address early. - __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); - // And repush original return address - __ push(rax); - __ jmp(rcx); -} - - -static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) { - int next_val_off = ld_off - Interpreter::stackElementSize; - __ movdbl(r, Address(saved_sp, next_val_off)); -} - -static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, - address code_start, address code_end, - Label& L_ok) { - Label L_fail; - __ lea(temp_reg, AddressLiteral(code_start, relocInfo::none)); - __ cmpptr(pc_reg, temp_reg); - __ jcc(Assembler::belowEqual, L_fail); - __ lea(temp_reg, AddressLiteral(code_end, relocInfo::none)); - __ cmpptr(pc_reg, temp_reg); - __ jcc(Assembler::below, L_ok); - __ bind(L_fail); -} - -void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs) { - // Note: rsi contains the senderSP on entry. We must preserve it since - // we may do a i2c -> c2i transition if we lose a race where compiled - // code goes non-entrant while we get args ready. - - // Adapters can be frameless because they do not require the caller - // to perform additional cleanup work, such as correcting the stack pointer. - // An i2c adapter is frameless because the *caller* frame, which is interpreted, - // routinely repairs its own stack pointer (from interpreter_frame_last_sp), - // even if a callee has modified the stack pointer. - // A c2i adapter is frameless because the *callee* frame, which is interpreted, - // routinely repairs its caller's stack pointer (from sender_sp, which is set - // up via the senderSP register). - // In other words, if *either* the caller or callee is interpreted, we can - // get the stack pointer repaired after a call. - // This is why c2i and i2c adapters cannot be indefinitely composed. - // In particular, if a c2i adapter were to somehow call an i2c adapter, - // both caller and callee would be compiled methods, and neither would - // clean up the stack pointer changes performed by the two adapters. - // If this happens, control eventually transfers back to the compiled - // caller, but with an uncorrected stack, causing delayed havoc. - - // Pick up the return address - __ movptr(rax, Address(rsp, 0)); - - if (VerifyAdapterCalls && - (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) { - // So, let's test for cascading c2i/i2c adapters right now. - // assert(Interpreter::contains($return_addr) || - // StubRoutines::contains($return_addr), - // "i2c adapter must return to an interpreter frame"); - __ block_comment("verify_i2c { "); - Label L_ok; - if (Interpreter::code() != nullptr) { - range_check(masm, rax, rdi, - Interpreter::code()->code_start(), Interpreter::code()->code_end(), - L_ok); - } - if (StubRoutines::initial_stubs_code() != nullptr) { - range_check(masm, rax, rdi, - StubRoutines::initial_stubs_code()->code_begin(), - StubRoutines::initial_stubs_code()->code_end(), - L_ok); - } - if (StubRoutines::final_stubs_code() != nullptr) { - range_check(masm, rax, rdi, - StubRoutines::final_stubs_code()->code_begin(), - StubRoutines::final_stubs_code()->code_end(), - L_ok); - } - const char* msg = "i2c adapter must return to an interpreter frame"; - __ block_comment(msg); - __ stop(msg); - __ bind(L_ok); - __ block_comment("} verify_i2ce "); - } - - // Must preserve original SP for loading incoming arguments because - // we need to align the outgoing SP for compiled code. - __ movptr(rdi, rsp); - - // Cut-out for having no stack args. Since up to 2 int/oop args are passed - // in registers, we will occasionally have no stack args. - int comp_words_on_stack = 0; - if (comp_args_on_stack) { - // Sig words on the stack are greater-than VMRegImpl::stack0. Those in - // registers are below. By subtracting stack0, we either get a negative - // number (all values in registers) or the maximum stack slot accessed. - // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); - // Convert 4-byte stack slots to words. - comp_words_on_stack = align_up(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; - // Round up to miminum stack alignment, in wordSize - comp_words_on_stack = align_up(comp_words_on_stack, 2); - __ subptr(rsp, comp_words_on_stack * wordSize); - } - - // Align the outgoing SP - __ andptr(rsp, -(StackAlignmentInBytes)); - - // push the return address on the stack (note that pushing, rather - // than storing it, yields the correct frame alignment for the callee) - __ push(rax); - - // Put saved SP in another register - const Register saved_sp = rax; - __ movptr(saved_sp, rdi); - - - // Will jump to the compiled code just as if compiled code was doing it. - // Pre-load the register-jump target early, to schedule it better. - __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset()))); - - // Now generate the shuffle code. Pick up all register args and move the - // rest through the floating point stack top. - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - // Longs and doubles are passed in native word order, but misaligned - // in the 32-bit build. - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // Pick up 0, 1 or 2 words from SP+offset. - - assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), - "scrambled load targets?"); - // Load in argument order going down. - int ld_off = (total_args_passed - i) * Interpreter::stackElementSize; - // Point to interpreter value (vs. tag) - int next_off = ld_off - Interpreter::stackElementSize; - // - // - // - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // Convert stack slot to an SP offset (+ wordSize to account for return address ) - int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; - - // We can use rsi as a temp here because compiled code doesn't need rsi as an input - // and if we end up going thru a c2i because of a miss a reasonable value of rsi - // we be generated. - if (!r_2->is_valid()) { - // __ fld_s(Address(saved_sp, ld_off)); - // __ fstp_s(Address(rsp, st_off)); - __ movl(rsi, Address(saved_sp, ld_off)); - __ movptr(Address(rsp, st_off), rsi); - } else { - // Interpreter local[n] == MSW, local[n+1] == LSW however locals - // are accessed as negative so LSW is at LOW address - - // ld_off is MSW so get LSW - // st_off is LSW (i.e. reg.first()) - // __ fld_d(Address(saved_sp, next_off)); - // __ fstp_d(Address(rsp, st_off)); - // - // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - // - // Interpreter local[n] == MSW, local[n+1] == LSW however locals - // are accessed as negative so LSW is at LOW address - - // ld_off is MSW so get LSW - __ movptr(rsi, Address(saved_sp, next_off)); - __ movptr(Address(rsp, st_off), rsi); - __ movptr(rsi, Address(saved_sp, ld_off)); - __ movptr(Address(rsp, st_off + wordSize), rsi); - } - } else if (r_1->is_Register()) { // Register argument - Register r = r_1->as_Register(); - assert(r != rax, "must be different"); - if (r_2->is_valid()) { - // - // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - - // this can be a misaligned move - __ movptr(r, Address(saved_sp, next_off)); - assert(r_2->as_Register() != rax, "need another temporary register"); - // Remember r_1 is low address (and LSB on x86) - // So r_2 gets loaded from high address regardless of the platform - __ movptr(r_2->as_Register(), Address(saved_sp, ld_off)); - } else { - __ movl(r, Address(saved_sp, ld_off)); - } - } else { - assert(r_1->is_XMMRegister(), ""); - if (!r_2->is_valid()) { - __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); - } else { - move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off); - } - } - } - - // 6243940 We might end up in handle_wrong_method if - // the callee is deoptimized as we race thru here. If that - // happens we don't want to take a safepoint because the - // caller frame will look interpreted and arguments are now - // "compiled" so it is much better to make this transition - // invisible to the stack walking code. Unfortunately if - // we try and find the callee by normal means a safepoint - // is possible. So we stash the desired callee in the thread - // and the vm will find there should this case occur. - - __ get_thread(rax); - __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx); - - // move Method* to rax, in case we end up in an c2i adapter. - // the c2i adapters expect Method* in rax, (c2) because c2's - // resolve stubs return the result (the method) in rax,. - // I'd love to fix this. - __ mov(rax, rbx); - - __ jmp(rdi); -} - -// --------------------------------------------------------------- -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { - address i2c_entry = __ pc(); - - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); - - // ------------------------------------------------------------------------- - // Generate a C2I adapter. On entry we know rbx, holds the Method* during calls - // to the interpreter. The args start out packed in the compiled layout. They - // need to be unpacked into the interpreter layout. This will almost always - // require some stack space. We grow the current (compiled) stack, then repack - // the args. We finally end in a jump to the generic interpreter entry point. - // On exit from the interpreter, the interpreter will restore our SP (lest the - // compiled code, which relies solely on SP and not EBP, get sick). - - address c2i_unverified_entry = __ pc(); - Label skip_fixup; - - Register data = rax; - Register receiver = rcx; - Register temp = rbx; - - { - __ ic_check(1 /* end_alignment */); - __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset())); - // Method might have been compiled since the call site was patched to - // interpreted if that is the case treat it as a miss so we can get - // the call site corrected. - __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); - __ jcc(Assembler::equal, skip_fixup); - } - - address c2i_entry = __ pc(); - - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->c2i_entry_barrier(masm); - - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); -} - -int SharedRuntime::c_calling_convention(const BasicType *sig_bt, - VMRegPair *regs, - int total_args_passed) { - -// We return the amount of VMRegImpl stack slots we need to reserve for all -// the arguments NOT counting out_preserve_stack_slots. - - uint stack = 0; // All arguments on stack - - for( int i = 0; i < total_args_passed; i++) { - // From the type and the argument number (count) compute the location - switch( sig_bt[i] ) { - case T_BOOLEAN: - case T_CHAR: - case T_FLOAT: - case T_BYTE: - case T_SHORT: - case T_INT: - case T_OBJECT: - case T_ARRAY: - case T_ADDRESS: - case T_METADATA: - regs[i].set1(VMRegImpl::stack2reg(stack++)); - break; - case T_LONG: - case T_DOUBLE: // The stack numbering is reversed from Java - // Since C arguments do not get reversed, the ordering for - // doubles on the stack must be opposite the Java convention - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - regs[i].set2(VMRegImpl::stack2reg(stack)); - stack += 2; - break; - case T_VOID: regs[i].set_bad(); break; - default: - ShouldNotReachHere(); - break; - } - } - return stack; -} - -int SharedRuntime::vector_calling_convention(VMRegPair *regs, - uint num_bits, - uint total_args_passed) { - Unimplemented(); - return 0; -} - -// A simple move of integer like type -static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - if (src.first()->is_stack()) { - if (dst.first()->is_stack()) { - // stack to stack - // __ ld(FP, reg2offset(src.first()), L5); - // __ st(L5, SP, reg2offset(dst.first())); - __ movl2ptr(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - } else { - // stack to reg - __ movl2ptr(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); - } - } else if (dst.first()->is_stack()) { - // reg to stack - // no need to sign extend on 64bit - __ movptr(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); - } else { - if (dst.first() != src.first()) { - __ mov(dst.first()->as_Register(), src.first()->as_Register()); - } - } -} - -// An oop arg. Must pass a handle not the oop itself -static void object_move(MacroAssembler* masm, - OopMap* map, - int oop_handle_offset, - int framesize_in_slots, - VMRegPair src, - VMRegPair dst, - bool is_receiver, - int* receiver_offset) { - - // Because of the calling conventions we know that src can be a - // register or a stack location. dst can only be a stack location. - - assert(dst.first()->is_stack(), "must be stack"); - // must pass a handle. First figure out the location we use as a handle - - if (src.first()->is_stack()) { - // Oop is already on the stack as an argument - Register rHandle = rax; - Label nil; - __ xorptr(rHandle, rHandle); - __ cmpptr(Address(rbp, reg2offset_in(src.first())), NULL_WORD); - __ jcc(Assembler::equal, nil); - __ lea(rHandle, Address(rbp, reg2offset_in(src.first()))); - __ bind(nil); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); - - int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); - map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); - if (is_receiver) { - *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; - } - } else { - // Oop is in a register we must store it to the space we reserve - // on the stack for oop_handles - const Register rOop = src.first()->as_Register(); - const Register rHandle = rax; - int oop_slot = (rOop == rcx ? 0 : 1) * VMRegImpl::slots_per_word + oop_handle_offset; - int offset = oop_slot*VMRegImpl::stack_slot_size; - Label skip; - __ movptr(Address(rsp, offset), rOop); - map->set_oop(VMRegImpl::stack2reg(oop_slot)); - __ xorptr(rHandle, rHandle); - __ cmpptr(rOop, NULL_WORD); - __ jcc(Assembler::equal, skip); - __ lea(rHandle, Address(rsp, offset)); - __ bind(skip); - // Store the handle parameter - __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); - if (is_receiver) { - *receiver_offset = offset; - } - } -} - -// A float arg may have to do float reg int reg conversion -static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); - - // Because of the calling convention we know that src is either a stack location - // or an xmm register. dst can only be a stack location. - - assert(dst.first()->is_stack() && ( src.first()->is_stack() || src.first()->is_XMMRegister()), "bad parameters"); - - if (src.first()->is_stack()) { - __ movl(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - } else { - // reg to stack - __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); - } -} - -// A long move -static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - - // The only legal possibility for a long_move VMRegPair is: - // 1: two stack slots (possibly unaligned) - // as neither the java or C calling convention will use registers - // for longs. - - if (src.first()->is_stack() && dst.first()->is_stack()) { - assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); - __ movptr(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(rbx, Address(rbp, reg2offset_in(src.second()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - __ movptr(Address(rsp, reg2offset_out(dst.second())), rbx); - } else { - ShouldNotReachHere(); - } -} - -// A double move -static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - - // The only legal possibilities for a double_move VMRegPair are: - // The painful thing here is that like long_move a VMRegPair might be - - // Because of the calling convention we know that src is either - // 1: a single physical register (xmm registers only) - // 2: two stack slots (possibly unaligned) - // dst can only be a pair of stack slots. - - assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || src.first()->is_stack()), "bad args"); - - if (src.first()->is_stack()) { - // source is all stack - __ movptr(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(rbx, Address(rbp, reg2offset_in(src.second()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - __ movptr(Address(rsp, reg2offset_out(dst.second())), rbx); - } else { - // reg to stack - // No worries about stack alignment - __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); - } -} - - -void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { - // We always ignore the frame_slots arg and just use the space just below frame pointer - // which by this time is free to use - switch (ret_type) { - case T_FLOAT: - __ fstp_s(Address(rbp, -wordSize)); - break; - case T_DOUBLE: - __ fstp_d(Address(rbp, -2*wordSize)); - break; - case T_VOID: break; - case T_LONG: - __ movptr(Address(rbp, -wordSize), rax); - __ movptr(Address(rbp, -2*wordSize), rdx); - break; - default: { - __ movptr(Address(rbp, -wordSize), rax); - } - } -} - -void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { - // We always ignore the frame_slots arg and just use the space just below frame pointer - // which by this time is free to use - switch (ret_type) { - case T_FLOAT: - __ fld_s(Address(rbp, -wordSize)); - break; - case T_DOUBLE: - __ fld_d(Address(rbp, -2*wordSize)); - break; - case T_LONG: - __ movptr(rax, Address(rbp, -wordSize)); - __ movptr(rdx, Address(rbp, -2*wordSize)); - break; - case T_VOID: break; - default: { - __ movptr(rax, Address(rbp, -wordSize)); - } - } -} - -static void verify_oop_args(MacroAssembler* masm, - const methodHandle& method, - const BasicType* sig_bt, - const VMRegPair* regs) { - Register temp_reg = rbx; // not part of any compiled calling seq - if (VerifyOops) { - for (int i = 0; i < method->size_of_parameters(); i++) { - if (is_reference_type(sig_bt[i])) { - VMReg r = regs[i].first(); - assert(r->is_valid(), "bad oop arg"); - if (r->is_stack()) { - __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); - __ verify_oop(temp_reg); - } else { - __ verify_oop(r->as_Register()); - } - } - } - } -} - -static void gen_special_dispatch(MacroAssembler* masm, - const methodHandle& method, - const BasicType* sig_bt, - const VMRegPair* regs) { - verify_oop_args(masm, method, sig_bt, regs); - vmIntrinsics::ID iid = method->intrinsic_id(); - - // Now write the args into the outgoing interpreter space - bool has_receiver = false; - Register receiver_reg = noreg; - int member_arg_pos = -1; - Register member_reg = noreg; - int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); - if (ref_kind != 0) { - member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument - member_reg = rbx; // known to be free at this point - has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); - } else if (iid == vmIntrinsics::_invokeBasic) { - has_receiver = true; - } else { - fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); - } - - if (member_reg != noreg) { - // Load the member_arg into register, if necessary. - SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); - VMReg r = regs[member_arg_pos].first(); - if (r->is_stack()) { - __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); - } else { - // no data motion is needed - member_reg = r->as_Register(); - } - } - - if (has_receiver) { - // Make sure the receiver is loaded into a register. - assert(method->size_of_parameters() > 0, "oob"); - assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); - VMReg r = regs[0].first(); - assert(r->is_valid(), "bad receiver arg"); - if (r->is_stack()) { - // Porting note: This assumes that compiled calling conventions always - // pass the receiver oop in a register. If this is not true on some - // platform, pick a temp and load the receiver from stack. - fatal("receiver always in a register"); - receiver_reg = rcx; // known to be free at this point - __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); - } else { - // no data motion is needed - receiver_reg = r->as_Register(); - } - } - - // Figure out which address we are really jumping to: - MethodHandles::generate_method_handle_dispatch(masm, iid, - receiver_reg, member_reg, /*for_compiler_entry:*/ true); -} - -// --------------------------------------------------------------------------- -// Generate a native wrapper for a given method. The method takes arguments -// in the Java compiled code convention, marshals them to the native -// convention (handlizes oops, etc), transitions to native, makes the call, -// returns to java state (possibly blocking), unhandlizes any result and -// returns. -// -// Critical native functions are a shorthand for the use of -// GetPrimtiveArrayCritical and disallow the use of any other JNI -// functions. The wrapper is expected to unpack the arguments before -// passing them to the callee. Critical native functions leave the state _in_Java, -// since they cannot stop for GC. -// Some other parts of JNI setup are skipped like the tear down of the JNI handle -// block and the check for pending exceptions it's impossible for them -// to be thrown. -// -// -nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - const methodHandle& method, - int compile_id, - BasicType* in_sig_bt, - VMRegPair* in_regs, - BasicType ret_type) { - if (method->is_method_handle_intrinsic()) { - vmIntrinsics::ID iid = method->intrinsic_id(); - intptr_t start = (intptr_t)__ pc(); - int vep_offset = ((intptr_t)__ pc()) - start; - gen_special_dispatch(masm, - method, - in_sig_bt, - in_regs); - int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period - __ flush(); - int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually - return nmethod::new_native_nmethod(method, - compile_id, - masm->code(), - vep_offset, - frame_complete, - stack_slots / VMRegImpl::slots_per_word, - in_ByteSize(-1), - in_ByteSize(-1), - (OopMapSet*)nullptr); - } - address native_func = method->native_function(); - assert(native_func != nullptr, "must have function"); - - // An OopMap for lock (and class if static) - OopMapSet *oop_maps = new OopMapSet(); - - // We have received a description of where all the java arg are located - // on entry to the wrapper. We need to convert these args to where - // the jni function will expect them. To figure out where they go - // we convert the java signature to a C signature by inserting - // the hidden arguments as arg[0] and possibly arg[1] (static method) - - const int total_in_args = method->size_of_parameters(); - int total_c_args = total_in_args + (method->is_static() ? 2 : 1); - - BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); - VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); - - int argc = 0; - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } - - for (int i = 0; i < total_in_args ; i++ ) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - - // Now figure out where the args must be stored and how much stack space - // they require. - int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); - - // Compute framesize for the wrapper. We need to handlize all oops in - // registers a max of 2 on x86. - - // Calculate the total number of stack slots we will need. - - // First count the abi requirement plus all of the outgoing args - int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; - - // Now the space for the inbound oop handle area - int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers - - int oop_handle_offset = stack_slots; - stack_slots += total_save_slots; - - // Now any space we need for handlizing a klass if static method - - int klass_slot_offset = 0; - int klass_offset = -1; - int lock_slot_offset = 0; - bool is_static = false; - - if (method->is_static()) { - klass_slot_offset = stack_slots; - stack_slots += VMRegImpl::slots_per_word; - klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; - is_static = true; - } - - // Plus a lock if needed - - if (method->is_synchronized()) { - lock_slot_offset = stack_slots; - stack_slots += VMRegImpl::slots_per_word; - } - - // Now a place (+2) to save return values or temp during shuffling - // + 2 for return address (which we own) and saved rbp, - stack_slots += 4; - - // Ok The space we have allocated will look like: - // - // - // FP-> | | - // |---------------------| - // | 2 slots for moves | - // |---------------------| - // | lock box (if sync) | - // |---------------------| <- lock_slot_offset (-lock_slot_rbp_offset) - // | klass (if static) | - // |---------------------| <- klass_slot_offset - // | oopHandle area | - // |---------------------| <- oop_handle_offset (a max of 2 registers) - // | outbound memory | - // | based arguments | - // | | - // |---------------------| - // | | - // SP-> | out_preserved_slots | - // - // - // **************************************************************************** - // WARNING - on Windows Java Natives use pascal calling convention and pop the - // arguments off of the stack after the jni call. Before the call we can use - // instructions that are SP relative. After the jni call we switch to FP - // relative instructions instead of re-adjusting the stack on windows. - // **************************************************************************** - - - // Now compute actual number of stack words we need rounding to make - // stack properly aligned. - stack_slots = align_up(stack_slots, StackAlignmentInSlots); - - int stack_size = stack_slots * VMRegImpl::stack_slot_size; - - intptr_t start = (intptr_t)__ pc(); - - // First thing make an ic check to see if we should even be here - - // We are free to use all registers as temps without saving them and - // restoring them except rbp. rbp is the only callee save register - // as far as the interpreter and the compiler(s) are concerned. - - - const Register receiver = rcx; - Label exception_pending; - - __ verify_oop(receiver); - // verified entry must be aligned for code patching. - __ ic_check(8 /* end_alignment */); - - int vep_offset = ((intptr_t)__ pc()) - start; - -#ifdef COMPILER1 - // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. - if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { - inline_check_hashcode_from_object_header(masm, method, rcx /*obj_reg*/, rax /*result*/); - } -#endif // COMPILER1 - - // The instruction at the verified entry point must be 5 bytes or longer - // because it can be patched on the fly by make_non_entrant. The stack bang - // instruction fits that requirement. - - // Generate stack overflow check - __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); - - // Generate a new frame for the wrapper. - __ enter(); - // -2 because return address is already present and so is saved rbp - __ subptr(rsp, stack_size - 2*wordSize); - - - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */); - - // Frame is now completed as far as size and linkage. - int frame_complete = ((intptr_t)__ pc()) - start; - - // Calculate the difference between rsp and rbp,. We need to know it - // after the native call because on windows Java Natives will pop - // the arguments and it is painful to do rsp relative addressing - // in a platform independent way. So after the call we switch to - // rbp, relative addressing. - - int fp_adjustment = stack_size - 2*wordSize; - -#ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - if (UseSSE >= 2) { - __ verify_FPU(0, "c2i transition should have clean FPU stack"); - } else { - __ empty_FPU_stack(); - } -#endif /* COMPILER2 */ - - // Compute the rbp, offset for any slots used after the jni call - - int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; - - // We use rdi as a thread pointer because it is callee save and - // if we load it once it is usable thru the entire wrapper - const Register thread = rdi; - - // We use rsi as the oop handle for the receiver/klass - // It is callee save so it survives the call to native - - const Register oop_handle_reg = rsi; - - __ get_thread(thread); - - // - // We immediately shuffle the arguments so that any vm call we have to - // make from here on out (sync slow path, jvmti, etc.) we will have - // captured the oops from our caller and have a valid oopMap for - // them. - - // ----------------- - // The Grand Shuffle - // - // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* - // and, if static, the class mirror instead of a receiver. This pretty much - // guarantees that register layout will not match (and x86 doesn't use reg - // parms though amd does). Since the native abi doesn't use register args - // and the java conventions does we don't have to worry about collisions. - // All of our moved are reg->stack or stack->stack. - // We ignore the extra arguments during the shuffle and handle them at the - // last moment. The shuffle is described by the two calling convention - // vectors we have in our possession. We simply walk the java vector to - // get the source locations and the c vector to get the destinations. - - int c_arg = method->is_static() ? 2 : 1; - - // Record rsp-based slot for receiver on stack for non-static methods - int receiver_offset = -1; - - // This is a trick. We double the stack slots so we can claim - // the oops in the caller's frame. Since we are sure to have - // more args than the caller doubling is enough to make - // sure we can capture all the incoming oop args from the - // caller. - // - OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - - // Mark location of rbp, - // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg()); - - // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx - // Are free to temporaries if we have to do stack to steck moves. - // All inbound args are referenced based on rbp, and all outbound args via rsp. - - for (int i = 0; i < total_in_args ; i++, c_arg++ ) { - switch (in_sig_bt[i]) { - case T_ARRAY: - case T_OBJECT: - object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], - ((i == 0) && (!is_static)), - &receiver_offset); - break; - case T_VOID: - break; - - case T_FLOAT: - float_move(masm, in_regs[i], out_regs[c_arg]); - break; - - case T_DOUBLE: - assert( i + 1 < total_in_args && - in_sig_bt[i + 1] == T_VOID && - out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); - double_move(masm, in_regs[i], out_regs[c_arg]); - break; - - case T_LONG : - long_move(masm, in_regs[i], out_regs[c_arg]); - break; - - case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); - - default: - simple_move32(masm, in_regs[i], out_regs[c_arg]); - } - } - - // Pre-load a static method's oop into rsi. Used both by locking code and - // the normal JNI call code. - if (method->is_static()) { - - // load opp into a register - __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); - - // Now handlize the static class mirror it's known not-null. - __ movptr(Address(rsp, klass_offset), oop_handle_reg); - map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); - - // Now get the handle - __ lea(oop_handle_reg, Address(rsp, klass_offset)); - // store the klass handle as second argument - __ movptr(Address(rsp, wordSize), oop_handle_reg); - } - - // Change state to native (we save the return address in the thread, since it might not - // be pushed on the stack when we do a stack traversal). It is enough that the pc() - // points into the right code segment. It does not have to be the correct return pc. - // We use the same pc/oopMap repeatedly when we call out - - intptr_t the_pc = (intptr_t) __ pc(); - oop_maps->add_gc_map(the_pc - start, map); - - __ set_last_Java_frame(thread, rsp, noreg, (address)the_pc, noreg); - - - // We have all of the arguments setup at this point. We must not touch any register - // argument registers at this point (what if we save/restore them there are no oop? - - if (DTraceMethodProbes) { - __ mov_metadata(rax, method()); - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), - thread, rax); - } - - // RedefineClasses() tracing support for obsolete method entry - if (log_is_enabled(Trace, redefine, class, obsolete)) { - __ mov_metadata(rax, method()); - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), - thread, rax); - } - - // These are register definitions we need for locking/unlocking - const Register swap_reg = rax; // Must use rax, for cmpxchg instruction - const Register obj_reg = rcx; // Will contain the oop - const Register lock_reg = rdx; // Address of compiler lock object (BasicLock) - - Label slow_path_lock; - Label lock_done; - - // Lock a synchronized method - if (method->is_synchronized()) { - Label count_mon; - - const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); - - // Get the handle (the 2nd argument) - __ movptr(oop_handle_reg, Address(rsp, wordSize)); - - // Get address of the box - - __ lea(lock_reg, Address(rbp, lock_slot_rbp_offset)); - - // Load the oop from the handle - __ movptr(obj_reg, Address(oop_handle_reg, 0)); - - if (LockingMode == LM_MONITOR) { - __ jmp(slow_path_lock); - } else if (LockingMode == LM_LEGACY) { - // Load immediate 1 into swap_reg %rax, - __ movptr(swap_reg, 1); - - // Load (object->mark() | 1) into swap_reg %rax, - __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - - // Save (object->mark() | 1) into BasicLock's displaced header - __ movptr(Address(lock_reg, mark_word_offset), swap_reg); - - // src -> dest iff dest == rax, else rax, <- dest - // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg) - __ lock(); - __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ jcc(Assembler::equal, count_mon); - - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 3) == 0, and - // 2) rsp <= mark < mark + os::pagesize() - // These 3 tests can be done by evaluating the following - // expression: ((mark - rsp) & (3 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 2 bits clear. - // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg - - __ subptr(swap_reg, rsp); - __ andptr(swap_reg, 3 - (int)os::vm_page_size()); - - // Save the test result, for recursive case, the result is zero - __ movptr(Address(lock_reg, mark_word_offset), swap_reg); - __ jcc(Assembler::notEqual, slow_path_lock); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - // Lacking registers and thread on x86_32. Always take slow path. - __ jmp(slow_path_lock); - } - __ bind(count_mon); - __ inc_held_monitor_count(); - - // Slow path will re-enter here - __ bind(lock_done); - } - - - // Finally just about ready to make the JNI call - - // get JNIEnv* which is first argument to native - __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset()))); - __ movptr(Address(rsp, 0), rdx); - - // Now set thread in native - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native); - - __ call(RuntimeAddress(native_func)); - - // Verify or restore cpu control state after JNI call - __ restore_cpu_control_state_after_jni(noreg); - - // WARNING - on Windows Java Natives use pascal calling convention and pop the - // arguments off of the stack. We could just re-adjust the stack pointer here - // and continue to do SP relative addressing but we instead switch to FP - // relative addressing. - - // Unpack native results. - switch (ret_type) { - case T_BOOLEAN: __ c2bool(rax); break; - case T_CHAR : __ andptr(rax, 0xFFFF); break; - case T_BYTE : __ sign_extend_byte (rax); break; - case T_SHORT : __ sign_extend_short(rax); break; - case T_INT : /* nothing to do */ break; - case T_DOUBLE : - case T_FLOAT : - // Result is in st0 we'll save as needed - break; - case T_ARRAY: // Really a handle - case T_OBJECT: // Really a handle - break; // can't de-handlize until after safepoint check - case T_VOID: break; - case T_LONG: break; - default : ShouldNotReachHere(); - } - - // Switch thread to "native transition" state before reading the synchronization state. - // This additional state is necessary because reading and testing the synchronization - // state is not atomic w.r.t. GC, as this scenario demonstrates: - // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. - // VM thread changes sync state to synchronizing and suspends threads for GC. - // Thread A is resumed to finish this native method, but doesn't block here since it - // didn't see any synchronization is progress, and escapes. - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); - - // Force this write out before the read below - if (!UseSystemMemoryBarrier) { - __ membar(Assembler::Membar_mask_bits( - Assembler::LoadLoad | Assembler::LoadStore | - Assembler::StoreLoad | Assembler::StoreStore)); - } - - if (AlwaysRestoreFPU) { - // Make sure the control word is correct. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - - // check for safepoint operation in progress and/or pending suspend requests - { Label Continue, slow_path; - - __ safepoint_poll(slow_path, thread, true /* at_return */, false /* in_nmethod */); - - __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0); - __ jcc(Assembler::equal, Continue); - __ bind(slow_path); - - // Don't use call_VM as it will see a possible pending exception and forward it - // and never return here preventing us from clearing _last_native_pc down below. - // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are - // preserved and correspond to the bcp/locals pointers. So we do a runtime call - // by hand. - // - __ vzeroupper(); - - save_native_result(masm, ret_type, stack_slots); - __ push(thread); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, - JavaThread::check_special_condition_for_native_trans))); - __ increment(rsp, wordSize); - // Restore any method result value - restore_native_result(masm, ret_type, stack_slots); - __ bind(Continue); - } - - // change thread state - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java); - - Label reguard; - Label reguard_done; - __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); - __ jcc(Assembler::equal, reguard); - - // slow path reguard re-enters here - __ bind(reguard_done); - - // Handle possible exception (will unlock if necessary) - - // native result if any is live - - // Unlock - Label slow_path_unlock; - Label unlock_done; - if (method->is_synchronized()) { - - Label fast_done; - - // Get locked oop from the handle we passed to jni - __ movptr(obj_reg, Address(oop_handle_reg, 0)); - - if (LockingMode == LM_LEGACY) { - Label not_recur; - // Simple recursive lock? - __ cmpptr(Address(rbp, lock_slot_rbp_offset), NULL_WORD); - __ jcc(Assembler::notEqual, not_recur); - __ dec_held_monitor_count(); - __ jmpb(fast_done); - __ bind(not_recur); - } - - // Must save rax, if it is live now because cmpxchg must use it - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - save_native_result(masm, ret_type, stack_slots); - } - - if (LockingMode == LM_MONITOR) { - __ jmp(slow_path_unlock); - } else if (LockingMode == LM_LEGACY) { - // get old displaced header - __ movptr(rbx, Address(rbp, lock_slot_rbp_offset)); - - // get address of the stack lock - __ lea(rax, Address(rbp, lock_slot_rbp_offset)); - - // Atomic swap old header if oop still contains the stack lock - // src -> dest iff dest == rax, else rax, <- dest - // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg) - __ lock(); - __ cmpxchgptr(rbx, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ jcc(Assembler::notEqual, slow_path_unlock); - __ dec_held_monitor_count(); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - __ lightweight_unlock(obj_reg, swap_reg, thread, lock_reg, slow_path_unlock); - __ dec_held_monitor_count(); - } - - // slow path re-enters here - __ bind(unlock_done); - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - restore_native_result(masm, ret_type, stack_slots); - } - - __ bind(fast_done); - } - - if (DTraceMethodProbes) { - // Tell dtrace about this method exit - save_native_result(masm, ret_type, stack_slots); - __ mov_metadata(rax, method()); - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), - thread, rax); - restore_native_result(masm, ret_type, stack_slots); - } - - // We can finally stop using that last_Java_frame we setup ages ago - - __ reset_last_Java_frame(thread, false); - - // Unbox oop result, e.g. JNIHandles::resolve value. - if (is_reference_type(ret_type)) { - __ resolve_jobject(rax /* value */, - thread /* thread */, - rcx /* tmp */); - } - - if (CheckJNICalls) { - // clear_pending_jni_exception_check - __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); - } - - // reset handle block - __ movptr(rcx, Address(thread, JavaThread::active_handles_offset())); - __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD); - - // Any exception pending? - __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - __ jcc(Assembler::notEqual, exception_pending); - - // no exception, we're almost done - - // check that only result value is on FPU stack - __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); - - // Fixup floating pointer results so that result looks like a return from a compiled method - if (ret_type == T_FLOAT) { - if (UseSSE >= 1) { - // Pop st0 and store as float and reload into xmm register - __ fstp_s(Address(rbp, -4)); - __ movflt(xmm0, Address(rbp, -4)); - } - } else if (ret_type == T_DOUBLE) { - if (UseSSE >= 2) { - // Pop st0 and store as double and reload into xmm register - __ fstp_d(Address(rbp, -8)); - __ movdbl(xmm0, Address(rbp, -8)); - } - } - - // Return - - __ leave(); - __ ret(0); - - // Unexpected paths are out of line and go here - - // Slow path locking & unlocking - if (method->is_synchronized()) { - - // BEGIN Slow path lock - - __ bind(slow_path_lock); - - // has last_Java_frame setup. No exceptions so do vanilla call not call_VM - // args are (oop obj, BasicLock* lock, JavaThread* thread) - __ push(thread); - __ push(lock_reg); - __ push(obj_reg); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C))); - __ addptr(rsp, 3*wordSize); - -#ifdef ASSERT - { Label L; - __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - __ jcc(Assembler::equal, L); - __ stop("no pending exception allowed on exit from monitorenter"); - __ bind(L); - } -#endif - __ jmp(lock_done); - - // END Slow path lock - - // BEGIN Slow path unlock - __ bind(slow_path_unlock); - __ vzeroupper(); - // Slow path unlock - - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - save_native_result(masm, ret_type, stack_slots); - } - // Save pending exception around call to VM (which contains an EXCEPTION_MARK) - - __ pushptr(Address(thread, in_bytes(Thread::pending_exception_offset()))); - __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - - - // should be a peal - // +wordSize because of the push above - // args are (oop obj, BasicLock* lock, JavaThread* thread) - __ push(thread); - __ lea(rax, Address(rbp, lock_slot_rbp_offset)); - __ push(rax); - - __ push(obj_reg); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); - __ addptr(rsp, 3*wordSize); -#ifdef ASSERT - { - Label L; - __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - __ jcc(Assembler::equal, L); - __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); - __ bind(L); - } -#endif /* ASSERT */ - - __ popptr(Address(thread, in_bytes(Thread::pending_exception_offset()))); - - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - restore_native_result(masm, ret_type, stack_slots); - } - __ jmp(unlock_done); - // END Slow path unlock - - } - - // SLOW PATH Reguard the stack if needed - - __ bind(reguard); - __ vzeroupper(); - save_native_result(masm, ret_type, stack_slots); - { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); - } - restore_native_result(masm, ret_type, stack_slots); - __ jmp(reguard_done); - - - // BEGIN EXCEPTION PROCESSING - - // Forward the exception - __ bind(exception_pending); - - // remove possible return value from FPU register stack - __ empty_FPU_stack(); - - // pop our frame - __ leave(); - // and forward the exception - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - __ flush(); - - nmethod *nm = nmethod::new_native_nmethod(method, - compile_id, - masm->code(), - vep_offset, - frame_complete, - stack_slots / VMRegImpl::slots_per_word, - (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), - in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), - oop_maps); - - return nm; - -} - -// this function returns the adjust size (in number of words) to a c2i adapter -// activation for use during deoptimization -int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { - return (callee_locals - callee_parameters) * Interpreter::stackElementWords; -} - - -// Number of stack slots between incoming argument block and the start of -// a new frame. The PROLOG must add this many slots to the stack. The -// EPILOG must remove this many slots. Intel needs one slot for -// return address and one for rbp, (must save rbp) -uint SharedRuntime::in_preserve_stack_slots() { - return 2+VerifyStackAtCalls; -} - -uint SharedRuntime::out_preserve_stack_slots() { - return 0; -} - -VMReg SharedRuntime::thread_register() { - Unimplemented(); - return nullptr; -} - -//------------------------------generate_deopt_blob---------------------------- -void SharedRuntime::generate_deopt_blob() { - // allocate space for the code - ResourceMark rm; - // setup code generation tools - // note: the buffer code size must account for StackShadowPages=50 - const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id); - CodeBuffer buffer(name, 1536, 1024); - MacroAssembler* masm = new MacroAssembler(&buffer); - int frame_size_in_words; - OopMap* map = nullptr; - // Account for the extra args we place on the stack - // by the time we call fetch_unroll_info - const int additional_words = 2; // deopt kind, thread - - OopMapSet *oop_maps = new OopMapSet(); - - // ------------- - // This code enters when returning to a de-optimized nmethod. A return - // address has been pushed on the stack, and return values are in - // registers. - // If we are doing a normal deopt then we were called from the patched - // nmethod from the point we returned to the nmethod. So the return - // address on the stack is wrong by NativeCall::instruction_size - // We will adjust the value to it looks like we have the original return - // address on the stack (like when we eagerly deoptimized). - // In the case of an exception pending with deoptimized then we enter - // with a return address on the stack that points after the call we patched - // into the exception handler. We have the following register state: - // rax,: exception - // rbx,: exception handler - // rdx: throwing pc - // So in this case we simply jam rdx into the useless return address and - // the stack looks just like we want. - // - // At this point we need to de-opt. We save the argument return - // registers. We call the first C routine, fetch_unroll_info(). This - // routine captures the return values and returns a structure which - // describes the current frame size and the sizes of all replacement frames. - // The current frame is compiled code and may contain many inlined - // functions, each with their own JVM state. We pop the current frame, then - // push all the new frames. Then we call the C routine unpack_frames() to - // populate these frames. Finally unpack_frames() returns us the new target - // address. Notice that callee-save registers are BLOWN here; they have - // already been captured in the vframeArray at the time the return PC was - // patched. - address start = __ pc(); - Label cont; - - // Prolog for non exception case! - - // Save everything in sight. - - map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); - // Normal deoptimization - __ push(Deoptimization::Unpack_deopt); - __ jmp(cont); - - int reexecute_offset = __ pc() - start; - - // Reexecute case - // return address is the pc describes what bci to do re-execute at - - // No need to update map as each call to save_live_registers will produce identical oopmap - (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); - - __ push(Deoptimization::Unpack_reexecute); - __ jmp(cont); - - int exception_offset = __ pc() - start; - - // Prolog for exception case - - // all registers are dead at this entry point, except for rax, and - // rdx which contain the exception oop and exception pc - // respectively. Set them in TLS and fall thru to the - // unpack_with_exception_in_tls entry point. - - __ get_thread(rdi); - __ movptr(Address(rdi, JavaThread::exception_pc_offset()), rdx); - __ movptr(Address(rdi, JavaThread::exception_oop_offset()), rax); - - int exception_in_tls_offset = __ pc() - start; - - // new implementation because exception oop is now passed in JavaThread - - // Prolog for exception case - // All registers must be preserved because they might be used by LinearScan - // Exceptiop oop and throwing PC are passed in JavaThread - // tos: stack at point of call to method that threw the exception (i.e. only - // args are on the stack, no return address) - - // make room on stack for the return address - // It will be patched later with the throwing pc. The correct value is not - // available now because loading it from memory would destroy registers. - __ push(0); - - // Save everything in sight. - - // No need to update map as each call to save_live_registers will produce identical oopmap - (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); - - // Now it is safe to overwrite any register - - // store the correct deoptimization type - __ push(Deoptimization::Unpack_exception); - - // load throwing pc from JavaThread and patch it as the return address - // of the current frame. Then clear the field in JavaThread - __ get_thread(rdi); - __ movptr(rdx, Address(rdi, JavaThread::exception_pc_offset())); - __ movptr(Address(rbp, wordSize), rdx); - __ movptr(Address(rdi, JavaThread::exception_pc_offset()), NULL_WORD); - -#ifdef ASSERT - // verify that there is really an exception oop in JavaThread - __ movptr(rax, Address(rdi, JavaThread::exception_oop_offset())); - __ verify_oop(rax); - - // verify that there is no pending exception - Label no_pending_exception; - __ movptr(rax, Address(rdi, Thread::pending_exception_offset())); - __ testptr(rax, rax); - __ jcc(Assembler::zero, no_pending_exception); - __ stop("must not have pending exception here"); - __ bind(no_pending_exception); -#endif - - __ bind(cont); - - // Compiled code leaves the floating point stack dirty, empty it. - __ empty_FPU_stack(); - - - // Call C code. Need thread and this frame, but NOT official VM entry - // crud. We cannot block on this call, no GC can happen. - __ get_thread(rcx); - __ push(rcx); - // fetch_unroll_info needs to call last_java_frame() - __ set_last_Java_frame(rcx, noreg, noreg, nullptr, noreg); - - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); - - // Need to have an oopmap that tells fetch_unroll_info where to - // find any register it might need. - - oop_maps->add_gc_map( __ pc()-start, map); - - // Discard args to fetch_unroll_info - __ pop(rcx); - __ pop(rcx); - - __ get_thread(rcx); - __ reset_last_Java_frame(rcx, false); - - // Load UnrollBlock into EDI - __ mov(rdi, rax); - - // Move the unpack kind to a safe place in the UnrollBlock because - // we are very short of registers - - Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset()); - // retrieve the deopt kind from the UnrollBlock. - __ movl(rax, unpack_kind); - - Label noException; - __ cmpl(rax, Deoptimization::Unpack_exception); // Was exception pending? - __ jcc(Assembler::notEqual, noException); - __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset())); - __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset())); - __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD); - __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD); - - __ verify_oop(rax); - - // Overwrite the result registers with the exception results. - __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax); - __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx); - - __ bind(noException); - - // Stack is back to only having register save data on the stack. - // Now restore the result registers. Everything else is either dead or captured - // in the vframeArray. - - RegisterSaver::restore_result_registers(masm); - - // Non standard control word may be leaked out through a safepoint blob, and we can - // deopt at a poll point with the non standard control word. However, we should make - // sure the control word is correct after restore_result_registers. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - - // All of the register save area has been popped of the stack. Only the - // return address remains. - - // Pop all the frames we must move/replace. - // - // Frame picture (youngest to oldest) - // 1: self-frame (no frame link) - // 2: deopting frame (no frame link) - // 3: caller of deopting frame (could be compiled/interpreted). - // - // Note: by leaving the return address of self-frame on the stack - // and using the size of frame 2 to adjust the stack - // when we are done the return to frame 3 will still be on the stack. - - // Pop deoptimized frame - __ addptr(rsp, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); - - // sp should be pointing at the return address to the caller (3) - - // Pick up the initial fp we should save - // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) - __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); - -#ifdef ASSERT - // Compilers generate code that bang the stack by as much as the - // interpreter would need. So this stack banging should never - // trigger a fault. Verify that it does not on non product builds. - __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset())); - __ bang_stack_size(rbx, rcx); -#endif - - // Load array of frame pcs into ECX - __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset())); - - __ pop(rsi); // trash the old pc - - // Load array of frame sizes into ESI - __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset())); - - Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset()); - - __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset())); - __ movl(counter, rbx); - - // Now adjust the caller's stack to make up for the extra locals - // but record the original sp so that we can save it in the skeletal interpreter - // frame and the stack walking of interpreter_sender will get the unextended sp - // value and not the "real" sp value. - - Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset()); - __ movptr(sp_temp, rsp); - __ movl2ptr(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset())); - __ subptr(rsp, rbx); - - // Push interpreter frames in a loop - Label loop; - __ bind(loop); - __ movptr(rbx, Address(rsi, 0)); // Load frame size - __ subptr(rbx, 2*wordSize); // we'll push pc and rbp, by hand - __ pushptr(Address(rcx, 0)); // save return address - __ enter(); // save old & set new rbp, - __ subptr(rsp, rbx); // Prolog! - __ movptr(rbx, sp_temp); // sender's sp - // This value is corrected by layout_activation_impl - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable - __ movptr(sp_temp, rsp); // pass to next frame - __ addptr(rsi, wordSize); // Bump array pointer (sizes) - __ addptr(rcx, wordSize); // Bump array pointer (pcs) - __ decrementl(counter); // decrement counter - __ jcc(Assembler::notZero, loop); - __ pushptr(Address(rcx, 0)); // save final return address - - // Re-push self-frame - __ enter(); // save old & set new rbp, - - // Return address and rbp, are in place - // We'll push additional args later. Just allocate a full sized - // register save area - __ subptr(rsp, (frame_size_in_words-additional_words - 2) * wordSize); - - // Restore frame locals after moving the frame - __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax); - __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx); - __ fstp_d(Address(rsp, RegisterSaver::fpResultOffset()*wordSize)); // Pop float stack and store in local - if( UseSSE>=2 ) __ movdbl(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0); - if( UseSSE==1 ) __ movflt(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0); - - // Set up the args to unpack_frame - - __ pushl(unpack_kind); // get the unpack_kind value - __ get_thread(rcx); - __ push(rcx); - - // set last_Java_sp, last_Java_fp - __ set_last_Java_frame(rcx, noreg, rbp, nullptr, noreg); - - // Call C code. Need thread but NOT official VM entry - // crud. We cannot block on this call, no GC can happen. Call should - // restore return values to their stack-slots with the new SP. - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); - // Set an oopmap for the call site - oop_maps->add_gc_map( __ pc()-start, new OopMap( frame_size_in_words, 0 )); - - // rax, contains the return result type - __ push(rax); - - __ get_thread(rcx); - __ reset_last_Java_frame(rcx, false); - - // Collect return values - __ movptr(rax,Address(rsp, (RegisterSaver::raxOffset() + additional_words + 1)*wordSize)); - __ movptr(rdx,Address(rsp, (RegisterSaver::rdxOffset() + additional_words + 1)*wordSize)); - - // Clear floating point stack before returning to interpreter - __ empty_FPU_stack(); - - // Check if we should push the float or double return value. - Label results_done, yes_double_value; - __ cmpl(Address(rsp, 0), T_DOUBLE); - __ jcc (Assembler::zero, yes_double_value); - __ cmpl(Address(rsp, 0), T_FLOAT); - __ jcc (Assembler::notZero, results_done); - - // return float value as expected by interpreter - if( UseSSE>=1 ) __ movflt(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize)); - else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize)); - __ jmp(results_done); - - // return double value as expected by interpreter - __ bind(yes_double_value); - if( UseSSE>=2 ) __ movdbl(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize)); - else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize)); - - __ bind(results_done); - - // Pop self-frame. - __ leave(); // Epilog! - - // Jump to interpreter - __ ret(0); - - // ------------- - // make sure all code is generated - masm->flush(); - - _deopt_blob = DeoptimizationBlob::create( &buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); - _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); -} - -//------------------------------generate_handler_blob------ -// -// Generate a special Compile2Runtime blob that saves all registers, -// setup oopmap, and calls safepoint code to stop the compiled code for -// a safepoint. -// -SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address call_ptr) { - - // Account for thread arg in our frame - const int additional_words = 1; - int frame_size_in_words; - - assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); - assert(is_polling_page_id(id), "expected a polling page stub id"); - - ResourceMark rm; - OopMapSet *oop_maps = new OopMapSet(); - OopMap* map; - - // allocate space for the code - // setup code generation tools - const char* name = SharedRuntime::stub_name(id); - CodeBuffer buffer(name, 2048, 1024); - MacroAssembler* masm = new MacroAssembler(&buffer); - - const Register java_thread = rdi; // callee-saved for VC++ - address start = __ pc(); - address call_pc = nullptr; - bool cause_return = (id == SharedStubId::polling_page_return_handler_id); - bool save_vectors = (id == SharedStubId::polling_page_vectors_safepoint_handler_id); - - // If cause_return is true we are at a poll_return and there is - // the return address on the stack to the caller on the nmethod - // that is safepoint. We can leave this return on the stack and - // effectively complete the return and safepoint in the caller. - // Otherwise we push space for a return address that the safepoint - // handler will install later to make the stack walking sensible. - if (!cause_return) - __ push(rbx); // Make room for return address (or push it again) - - map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors); - - // The following is basically a call_VM. However, we need the precise - // address of the call in order to generate an oopmap. Hence, we do all the - // work ourselves. - - // Push thread argument and setup last_Java_sp - __ get_thread(java_thread); - __ push(java_thread); - __ set_last_Java_frame(java_thread, noreg, noreg, nullptr, noreg); - - // if this was not a poll_return then we need to correct the return address now. - if (!cause_return) { - // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. - // Additionally, rbx is a callee saved register and we can look at it later to determine - // if someone changed the return address for us! - __ movptr(rbx, Address(java_thread, JavaThread::saved_exception_pc_offset())); - __ movptr(Address(rbp, wordSize), rbx); - } - - // do the call - __ call(RuntimeAddress(call_ptr)); - - // Set an oopmap for the call site. This oopmap will map all - // oop-registers and debug-info registers as callee-saved. This - // will allow deoptimization at this safepoint to find all possible - // debug-info recordings, as well as let GC find all oops. - - oop_maps->add_gc_map( __ pc() - start, map); - - // Discard arg - __ pop(rcx); - - Label noException; - - // Clear last_Java_sp again - __ get_thread(java_thread); - __ reset_last_Java_frame(java_thread, false); - - __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::equal, noException); - - // Exception pending - RegisterSaver::restore_live_registers(masm, save_vectors); - - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - __ bind(noException); - - Label no_adjust, bail, not_special; - if (!cause_return) { - // If our stashed return pc was modified by the runtime we avoid touching it - __ cmpptr(rbx, Address(rbp, wordSize)); - __ jccb(Assembler::notEqual, no_adjust); - - // Skip over the poll instruction. - // See NativeInstruction::is_safepoint_poll() - // Possible encodings: - // 85 00 test %eax,(%rax) - // 85 01 test %eax,(%rcx) - // 85 02 test %eax,(%rdx) - // 85 03 test %eax,(%rbx) - // 85 06 test %eax,(%rsi) - // 85 07 test %eax,(%rdi) - // - // 85 04 24 test %eax,(%rsp) - // 85 45 00 test %eax,0x0(%rbp) - -#ifdef ASSERT - __ movptr(rax, rbx); // remember where 0x85 should be, for verification below -#endif - // rsp/rbp base encoding takes 3 bytes with the following register values: - // rsp 0x04 - // rbp 0x05 - __ movzbl(rcx, Address(rbx, 1)); - __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 - __ subptr(rcx, 4); // looking for 0x00 .. 0x01 - __ cmpptr(rcx, 1); - __ jcc(Assembler::above, not_special); - __ addptr(rbx, 1); - __ bind(not_special); -#ifdef ASSERT - // Verify the correct encoding of the poll we're about to skip. - __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); - __ jcc(Assembler::notEqual, bail); - // Mask out the modrm bits - __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); - // rax encodes to 0, so if the bits are nonzero it's incorrect - __ jcc(Assembler::notZero, bail); -#endif - // Adjust return pc forward to step over the safepoint poll instruction - __ addptr(rbx, 2); - __ movptr(Address(rbp, wordSize), rbx); - } - - __ bind(no_adjust); - // Normal exit, register restoring and exit - RegisterSaver::restore_live_registers(masm, save_vectors); - - __ ret(0); - -#ifdef ASSERT - __ bind(bail); - __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); -#endif - - // make sure all code is generated - masm->flush(); - - // Fill-out other meta info - return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); -} - -// -// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss -// -// Generate a stub that calls into vm to find out the proper destination -// of a java call. All the argument registers are live at this point -// but since this is generic code we don't know what they are and the caller -// must do any gc of the args. -// -RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address destination) { - assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); - assert(is_resolve_id(id), "expected a resolve stub id"); - - // allocate space for the code - ResourceMark rm; - - const char* name = SharedRuntime::stub_name(id); - CodeBuffer buffer(name, 1000, 512); - MacroAssembler* masm = new MacroAssembler(&buffer); - - int frame_size_words; - enum frame_layout { - thread_off, - extra_words }; - - OopMapSet *oop_maps = new OopMapSet(); - OopMap* map = nullptr; - - int start = __ offset(); - - map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words); - - int frame_complete = __ offset(); - - const Register thread = rdi; - __ get_thread(rdi); - - __ push(thread); - __ set_last_Java_frame(thread, noreg, rbp, nullptr, noreg); - - __ call(RuntimeAddress(destination)); - - - // Set an oopmap for the call site. - // We need this not only for callee-saved registers, but also for volatile - // registers that the compiler might be keeping live across a safepoint. - - oop_maps->add_gc_map( __ offset() - start, map); - - // rax, contains the address we are going to jump to assuming no exception got installed - - __ addptr(rsp, wordSize); - - // clear last_Java_sp - __ reset_last_Java_frame(thread, true); - // check for pending exceptions - Label pending; - __ cmpptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::notEqual, pending); - - // get the returned Method* - __ get_vm_result_2(rbx, thread); - __ movptr(Address(rsp, RegisterSaver::rbx_offset() * wordSize), rbx); - - __ movptr(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax); - - RegisterSaver::restore_live_registers(masm); - - // We are back to the original state on entry and ready to go. - - __ jmp(rax); - - // Pending exception after the safepoint - - __ bind(pending); - - RegisterSaver::restore_live_registers(masm); - - // exception pending => remove activation and forward to exception handler - - __ get_thread(thread); - __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD); - __ movptr(rax, Address(thread, Thread::pending_exception_offset())); - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - // ------------- - // make sure all code is generated - masm->flush(); - - // return the blob - // frame_size_words or bytes?? - return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); -} - - //------------------------------------------------------------------------------------------------------------------------ - // Continuation point for throwing of implicit exceptions that are not handled in - // the current activation. Fabricates an exception oop and initiates normal - // exception dispatching in this frame. - // - // Previously the compiler (c2) allowed for callee save registers on Java calls. - // This is no longer true after adapter frames were removed but could possibly - // be brought back in the future if the interpreter code was reworked and it - // was deemed worthwhile. The comment below was left to describe what must - // happen here if callee saves were resurrected. As it stands now this stub - // could actually be a vanilla BufferBlob and have now oopMap at all. - // Since it doesn't make much difference we've chosen to leave it the - // way it was in the callee save days and keep the comment. - - // If we need to preserve callee-saved values we need a callee-saved oop map and - // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs. - // If the compiler needs all registers to be preserved between the fault - // point and the exception handler then it must assume responsibility for that in - // AbstractCompiler::continuation_for_implicit_null_exception or - // continuation_for_implicit_division_by_zero_exception. All other implicit - // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are - // either at call sites or otherwise assume that stack unwinding will be initiated, - // so caller saved registers were assumed volatile in the compiler. -RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address runtime_entry) { - assert(is_throw_id(id), "expected a throw stub id"); - - const char* name = SharedRuntime::stub_name(id); - - // Information about frame layout at time of blocking runtime call. - // Note that we only have to preserve callee-saved registers since - // the compilers are responsible for supplying a continuation point - // if they expect all registers to be preserved. - enum layout { - thread_off, // last_java_sp - arg1_off, - arg2_off, - rbp_off, // callee saved register - ret_pc, - framesize - }; - - int insts_size = 256; - int locs_size = 32; - - ResourceMark rm; - const char* timer_msg = "SharedRuntime generate_throw_exception"; - TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); - - CodeBuffer code(name, insts_size, locs_size); - OopMapSet* oop_maps = new OopMapSet(); - MacroAssembler* masm = new MacroAssembler(&code); - - address start = __ pc(); - - // This is an inlined and slightly modified version of call_VM - // which has the ability to fetch the return PC out of - // thread-local storage and also sets up last_Java_sp slightly - // differently than the real call_VM - Register java_thread = rbx; - __ get_thread(java_thread); - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - // pc and rbp, already pushed - __ subptr(rsp, (framesize-2) * wordSize); // prolog - - // Frame is now completed as far as size and linkage. - - int frame_complete = __ pc() - start; - - // push java thread (becomes first argument of C function) - __ movptr(Address(rsp, thread_off * wordSize), java_thread); - // Set up last_Java_sp and last_Java_fp - __ set_last_Java_frame(java_thread, rsp, rbp, nullptr, noreg); - - // Call runtime - BLOCK_COMMENT("call runtime_entry"); - __ call(RuntimeAddress(runtime_entry)); - // Generate oop map - OopMap* map = new OopMap(framesize, 0); - oop_maps->add_gc_map(__ pc() - start, map); - - // restore the thread (cannot use the pushed argument since arguments - // may be overwritten by C code generated by an optimizing compiler); - // however can use the register value directly if it is callee saved. - __ get_thread(java_thread); - - __ reset_last_Java_frame(java_thread, true); - - __ leave(); // required for proper stackwalking of RuntimeStub frame - - // check for pending exceptions -#ifdef ASSERT - Label L; - __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::notEqual, L); - __ should_not_reach_here(); - __ bind(L); -#endif /* ASSERT */ - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - - RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false); - return stub; -} - -#if INCLUDE_JFR - -static void jfr_prologue(address the_pc, MacroAssembler* masm) { - Register java_thread = rdi; - __ get_thread(java_thread); - __ set_last_Java_frame(java_thread, rsp, rbp, the_pc, noreg); - __ movptr(Address(rsp, 0), java_thread); -} - -// The handle is dereferenced through a load barrier. -static void jfr_epilogue(MacroAssembler* masm) { - Register java_thread = rdi; - __ get_thread(java_thread); - __ reset_last_Java_frame(java_thread, true); -} - -// For c2: c_rarg0 is junk, call to runtime to write a checkpoint. -// It returns a jobject handle to the event writer. -// The handle is dereferenced and the return value is the event writer oop. -RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() { - enum layout { - FPUState_off = 0, - rbp_off = FPUStateSizeInWords, - rdi_off, - rsi_off, - rcx_off, - rbx_off, - saved_argument_off, - saved_argument_off2, // 2nd half of double - framesize - }; - - int insts_size = 1024; - int locs_size = 64; - const char* name = SharedRuntime::stub_name(SharedStubId::jfr_write_checkpoint_id); - CodeBuffer code(name, insts_size, locs_size); - OopMapSet* oop_maps = new OopMapSet(); - MacroAssembler* masm = new MacroAssembler(&code); - - address start = __ pc(); - __ enter(); - int frame_complete = __ pc() - start; - address the_pc = __ pc(); - jfr_prologue(the_pc, masm); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1); - jfr_epilogue(masm); - __ resolve_global_jobject(rax, rdi, rdx); - __ leave(); - __ ret(0); - - OopMap* map = new OopMap(framesize, 1); // rbp - oop_maps->add_gc_map(the_pc - start, map); - - RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size) - RuntimeStub::new_runtime_stub(name, &code, frame_complete, - (framesize >> (LogBytesPerWord - LogBytesPerInt)), - oop_maps, false); - return stub; -} - -// For c2: call to return a leased buffer. -RuntimeStub* SharedRuntime::generate_jfr_return_lease() { - enum layout { - FPUState_off = 0, - rbp_off = FPUStateSizeInWords, - rdi_off, - rsi_off, - rcx_off, - rbx_off, - saved_argument_off, - saved_argument_off2, // 2nd half of double - framesize - }; - - int insts_size = 1024; - int locs_size = 64; - const char* name = SharedRuntime::stub_name(SharedStubId::jfr_return_lease_id); - CodeBuffer code(name, insts_size, locs_size); - OopMapSet* oop_maps = new OopMapSet(); - MacroAssembler* masm = new MacroAssembler(&code); - - address start = __ pc(); - __ enter(); - int frame_complete = __ pc() - start; - address the_pc = __ pc(); - jfr_prologue(the_pc, masm); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1); - jfr_epilogue(masm); - __ leave(); - __ ret(0); - - OopMap* map = new OopMap(framesize, 1); // rbp - oop_maps->add_gc_map(the_pc - start, map); - - RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size) - RuntimeStub::new_runtime_stub(name, &code, frame_complete, - (framesize >> (LogBytesPerWord - LogBytesPerInt)), - oop_maps, false); - return stub; -} - -#endif // INCLUDE_JFR diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp deleted file mode 100644 index 9ec556777b0..00000000000 --- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp +++ /dev/null @@ -1,4314 +0,0 @@ -/* - * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "asm/macroAssembler.hpp" -#include "asm/macroAssembler.inline.hpp" -#include "compiler/oopMap.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/barrierSetAssembler.hpp" -#include "gc/shared/barrierSetNMethod.hpp" -#include "interpreter/interpreter.hpp" -#include "memory/universe.hpp" -#include "nativeInst_x86.hpp" -#include "oops/instanceOop.hpp" -#include "oops/method.hpp" -#include "oops/objArrayKlass.hpp" -#include "oops/oop.inline.hpp" -#include "prims/methodHandles.hpp" -#include "runtime/frame.inline.hpp" -#include "runtime/handles.inline.hpp" -#include "runtime/javaThread.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubCodeGenerator.hpp" -#include "runtime/stubRoutines.hpp" -#ifdef COMPILER2 -#include "opto/runtime.hpp" -#endif - -// Declaration and definition of StubGenerator (no .hpp file). -// For a more detailed description of the stub routine structure -// see the comment in stubRoutines.hpp - -#define __ _masm-> -#define a__ ((Assembler*)_masm)-> - -#ifdef PRODUCT -#define BLOCK_COMMENT(str) /* nothing */ -#else -#define BLOCK_COMMENT(str) __ block_comment(str) -#endif - -#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") - -const int FPU_CNTRL_WRD_MASK = 0xFFFF; - -ATTRIBUTE_ALIGNED(16) static const uint32_t KEY_SHUFFLE_MASK[] = { - 0x00010203UL, 0x04050607UL, 0x08090A0BUL, 0x0C0D0E0FUL, -}; - -ATTRIBUTE_ALIGNED(16) static const uint32_t COUNTER_SHUFFLE_MASK[] = { - 0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL, -}; - -ATTRIBUTE_ALIGNED(16) static const uint32_t GHASH_BYTE_SWAP_MASK[] = { - 0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL, -}; - -ATTRIBUTE_ALIGNED(16) static const uint32_t GHASH_LONG_SWAP_MASK[] = { - 0x0B0A0908UL, 0x0F0E0D0CUL, 0x03020100UL, 0x07060504UL, -}; - -// ------------------------------------------------------------------------------------------------------------------------- -// Stub Code definitions - -class StubGenerator: public StubCodeGenerator { - private: - -#ifdef PRODUCT -#define inc_counter_np(counter) ((void)0) -#else - void inc_counter_np_(uint& counter) { - __ incrementl(ExternalAddress((address)&counter)); - } -#define inc_counter_np(counter) \ - BLOCK_COMMENT("inc_counter " #counter); \ - inc_counter_np_(counter); -#endif //PRODUCT - - void inc_copy_counter_np(BasicType t) { -#ifndef PRODUCT - switch (t) { - case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return; - case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return; - case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return; - case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return; - case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return; - default: ShouldNotReachHere(); - } -#endif //PRODUCT - } - - //------------------------------------------------------------------------------------------------------------------------ - // Call stubs are used to call Java from C - // - // [ return_from_Java ] <--- rsp - // [ argument word n ] - // ... - // -N [ argument word 1 ] - // -7 [ Possible padding for stack alignment ] - // -6 [ Possible padding for stack alignment ] - // -5 [ Possible padding for stack alignment ] - // -4 [ mxcsr save ] <--- rsp_after_call - // -3 [ saved rbx, ] - // -2 [ saved rsi ] - // -1 [ saved rdi ] - // 0 [ saved rbp, ] <--- rbp, - // 1 [ return address ] - // 2 [ ptr. to call wrapper ] - // 3 [ result ] - // 4 [ result_type ] - // 5 [ method ] - // 6 [ entry_point ] - // 7 [ parameters ] - // 8 [ parameter_size ] - // 9 [ thread ] - - - address generate_call_stub(address& return_address) { - StubGenStubId stub_id = StubGenStubId::call_stub_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - // stub code parameters / addresses - assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code"); - bool sse_save = false; - const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()! - const int locals_count_in_bytes (4*wordSize); - const Address mxcsr_save (rbp, -4 * wordSize); - const Address saved_rbx (rbp, -3 * wordSize); - const Address saved_rsi (rbp, -2 * wordSize); - const Address saved_rdi (rbp, -1 * wordSize); - const Address result (rbp, 3 * wordSize); - const Address result_type (rbp, 4 * wordSize); - const Address method (rbp, 5 * wordSize); - const Address entry_point (rbp, 6 * wordSize); - const Address parameters (rbp, 7 * wordSize); - const Address parameter_size(rbp, 8 * wordSize); - const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! - sse_save = UseSSE > 0; - - // stub code - __ enter(); - __ movptr(rcx, parameter_size); // parameter counter - __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes - __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves - __ subptr(rsp, rcx); - __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack - - // save rdi, rsi, & rbx, according to C calling conventions - __ movptr(saved_rdi, rdi); - __ movptr(saved_rsi, rsi); - __ movptr(saved_rbx, rbx); - - // save and initialize %mxcsr - if (sse_save) { - Label skip_ldmx; - __ cmp32_mxcsr_std(mxcsr_save, rax); - __ jcc(Assembler::equal, skip_ldmx); - __ ldmxcsr(mxcsr_std); - __ bind(skip_ldmx); - } - - // make sure the control word is correct. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - -#ifdef ASSERT - // make sure we have no pending exceptions - { Label L; - __ movptr(rcx, thread); - __ cmpptr(Address(rcx, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::equal, L); - __ stop("StubRoutines::call_stub: entered with pending exception"); - __ bind(L); - } -#endif - - // pass parameters if any - BLOCK_COMMENT("pass parameters if any"); - Label parameters_done; - __ movl(rcx, parameter_size); // parameter counter - __ testl(rcx, rcx); - __ jcc(Assembler::zero, parameters_done); - - // parameter passing loop - - Label loop; - // Copy Java parameters in reverse order (receiver last) - // Note that the argument order is inverted in the process - // source is rdx[rcx: N-1..0] - // dest is rsp[rbx: 0..N-1] - - __ movptr(rdx, parameters); // parameter pointer - __ xorptr(rbx, rbx); - - __ BIND(loop); - - // get parameter - __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize)); - __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(), - Interpreter::expr_offset_in_bytes(0)), rax); // store parameter - __ increment(rbx); - __ decrement(rcx); - __ jcc(Assembler::notZero, loop); - - // call Java function - __ BIND(parameters_done); - __ movptr(rbx, method); // get Method* - __ movptr(rax, entry_point); // get entry_point - __ mov(rsi, rsp); // set sender sp - BLOCK_COMMENT("call Java function"); - __ call(rax); - - BLOCK_COMMENT("call_stub_return_address:"); - return_address = __ pc(); - -#ifdef COMPILER2 - { - Label L_skip; - if (UseSSE >= 2) { - __ verify_FPU(0, "call_stub_return"); - } else { - for (int i = 1; i < 8; i++) { - __ ffree(i); - } - - // UseSSE <= 1 so double result should be left on TOS - __ movl(rsi, result_type); - __ cmpl(rsi, T_DOUBLE); - __ jcc(Assembler::equal, L_skip); - if (UseSSE == 0) { - // UseSSE == 0 so float result should be left on TOS - __ cmpl(rsi, T_FLOAT); - __ jcc(Assembler::equal, L_skip); - } - __ ffree(0); - } - __ BIND(L_skip); - } -#endif // COMPILER2 - - // store result depending on type - // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) - __ movptr(rdi, result); - Label is_long, is_float, is_double, exit; - __ movl(rsi, result_type); - __ cmpl(rsi, T_LONG); - __ jcc(Assembler::equal, is_long); - __ cmpl(rsi, T_FLOAT); - __ jcc(Assembler::equal, is_float); - __ cmpl(rsi, T_DOUBLE); - __ jcc(Assembler::equal, is_double); - - // handle T_INT case - __ movl(Address(rdi, 0), rax); - __ BIND(exit); - - // check that FPU stack is empty - __ verify_FPU(0, "generate_call_stub"); - - // pop parameters - __ lea(rsp, rsp_after_call); - - // restore %mxcsr - if (sse_save) { - __ ldmxcsr(mxcsr_save); - } - - // restore rdi, rsi and rbx, - __ movptr(rbx, saved_rbx); - __ movptr(rsi, saved_rsi); - __ movptr(rdi, saved_rdi); - __ addptr(rsp, 4*wordSize); - - // return - __ pop(rbp); - __ ret(0); - - // handle return types different from T_INT - __ BIND(is_long); - __ movl(Address(rdi, 0 * wordSize), rax); - __ movl(Address(rdi, 1 * wordSize), rdx); - __ jmp(exit); - - __ BIND(is_float); - // interpreter uses xmm0 for return values - if (UseSSE >= 1) { - __ movflt(Address(rdi, 0), xmm0); - } else { - __ fstp_s(Address(rdi, 0)); - } - __ jmp(exit); - - __ BIND(is_double); - // interpreter uses xmm0 for return values - if (UseSSE >= 2) { - __ movdbl(Address(rdi, 0), xmm0); - } else { - __ fstp_d(Address(rdi, 0)); - } - __ jmp(exit); - - return start; - } - - - //------------------------------------------------------------------------------------------------------------------------ - // Return point for a Java call if there's an exception thrown in Java code. - // The exception is caught and transformed into a pending exception stored in - // JavaThread that can be tested from within the VM. - // - // Note: Usually the parameters are removed by the callee. In case of an exception - // crossing an activation frame boundary, that is not the case if the callee - // is compiled code => need to setup the rsp. - // - // rax,: exception oop - - address generate_catch_exception() { - StubGenStubId stub_id = StubGenStubId::catch_exception_id; - StubCodeMark mark(this, stub_id); - const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()! - const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()! - address start = __ pc(); - - // get thread directly - __ movptr(rcx, thread); -#ifdef ASSERT - // verify that threads correspond - { Label L; - __ get_thread(rbx); - __ cmpptr(rbx, rcx); - __ jcc(Assembler::equal, L); - __ stop("StubRoutines::catch_exception: threads must correspond"); - __ bind(L); - } -#endif - // set pending exception - __ verify_oop(rax); - __ movptr(Address(rcx, Thread::pending_exception_offset()), rax); - __ lea(Address(rcx, Thread::exception_file_offset()), - ExternalAddress((address)__FILE__), noreg); - __ movl(Address(rcx, Thread::exception_line_offset()), __LINE__ ); - // complete return to VM - assert(StubRoutines::_call_stub_return_address != nullptr, "_call_stub_return_address must have been generated before"); - __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); - - return start; - } - - - //------------------------------------------------------------------------------------------------------------------------ - // Continuation point for runtime calls returning with a pending exception. - // The pending exception check happened in the runtime or native call stub. - // The pending exception in Thread is converted into a Java-level exception. - // - // Contract with Java-level exception handlers: - // rax: exception - // rdx: throwing pc - // - // NOTE: At entry of this stub, exception-pc must be on stack !! - - address generate_forward_exception() { - StubGenStubId stub_id = StubGenStubId::forward_exception_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - const Register thread = rcx; - - // other registers used in this stub - const Register exception_oop = rax; - const Register handler_addr = rbx; - const Register exception_pc = rdx; - - // Upon entry, the sp points to the return address returning into Java - // (interpreted or compiled) code; i.e., the return address becomes the - // throwing pc. - // - // Arguments pushed before the runtime call are still on the stack but - // the exception handler will reset the stack pointer -> ignore them. - // A potential result in registers can be ignored as well. - -#ifdef ASSERT - // make sure this code is only executed if there is a pending exception - { Label L; - __ get_thread(thread); - __ cmpptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::notEqual, L); - __ stop("StubRoutines::forward exception: no pending exception (1)"); - __ bind(L); - } -#endif - - // compute exception handler into rbx, - __ get_thread(thread); - __ movptr(exception_pc, Address(rsp, 0)); - BLOCK_COMMENT("call exception_handler_for_return_address"); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc); - __ mov(handler_addr, rax); - - // setup rax & rdx, remove return address & clear pending exception - __ get_thread(thread); - __ pop(exception_pc); - __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset())); - __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); - -#ifdef ASSERT - // make sure exception is set - { Label L; - __ testptr(exception_oop, exception_oop); - __ jcc(Assembler::notEqual, L); - __ stop("StubRoutines::forward exception: no pending exception (2)"); - __ bind(L); - } -#endif - - // Verify that there is really a valid exception in RAX. - __ verify_oop(exception_oop); - - // continue at exception handler (return address removed) - // rax: exception - // rbx: exception handler - // rdx: throwing pc - __ jmp(handler_addr); - - return start; - } - - //---------------------------------------------------------------------------------------------------- - // Support for void verify_mxcsr() - // - // This routine is used with -Xcheck:jni to verify that native - // JNI code does not return to Java code without restoring the - // MXCSR register to our expected state. - - - address generate_verify_mxcsr() { - StubGenStubId stub_id = StubGenStubId::verify_mxcsr_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - const Address mxcsr_save(rsp, 0); - - if (CheckJNICalls && UseSSE > 0 ) { - Label ok_ret; - __ push(rax); - __ subptr(rsp, wordSize); // allocate a temp location - __ cmp32_mxcsr_std(mxcsr_save, rax); - __ jcc(Assembler::equal, ok_ret); - - __ warn("MXCSR changed by native JNI code."); - - ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std()); - __ ldmxcsr(mxcsr_std); - - __ bind(ok_ret); - __ addptr(rsp, wordSize); - __ pop(rax); - } - - __ ret(0); - - return start; - } - - - //--------------------------------------------------------------------------- - // Support for void verify_fpu_cntrl_wrd() - // - // This routine is used with -Xcheck:jni to verify that native - // JNI code does not return to Java code without restoring the - // FP control word to our expected state. - - address generate_verify_fpu_cntrl_wrd() { - StubGenStubId stub_id = StubGenStubId::verify_fpu_cntrl_word_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - const Address fpu_cntrl_wrd_save(rsp, 0); - - if (CheckJNICalls) { - Label ok_ret; - __ push(rax); - __ subptr(rsp, wordSize); // allocate a temp location - __ fnstcw(fpu_cntrl_wrd_save); - __ movl(rax, fpu_cntrl_wrd_save); - __ andl(rax, FPU_CNTRL_WRD_MASK); - ExternalAddress fpu_std(StubRoutines::x86::addr_fpu_cntrl_wrd_std()); - __ cmp32(rax, fpu_std); - __ jcc(Assembler::equal, ok_ret); - - __ warn("Floating point control word changed by native JNI code."); - - __ fldcw(fpu_std); - - __ bind(ok_ret); - __ addptr(rsp, wordSize); - __ pop(rax); - } - - __ ret(0); - - return start; - } - - //--------------------------------------------------------------------------- - // Wrapper for slow-case handling of double-to-integer conversion - // d2i or f2i fast case failed either because it is nan or because - // of under/overflow. - // Input: FPU TOS: float value - // Output: rax, (rdx): integer (long) result - - address generate_d2i_wrapper(BasicType t, address fcn) { - StubGenStubId stub_id = StubGenStubId::d2i_wrapper_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - // Capture info about frame layout - enum layout { FPUState_off = 0, - rbp_off = FPUStateSizeInWords, - rdi_off, - rsi_off, - rcx_off, - rbx_off, - saved_argument_off, - saved_argument_off2, // 2nd half of double - framesize - }; - - assert(FPUStateSizeInWords == 27, "update stack layout"); - - // Save outgoing argument to stack across push_FPU_state() - __ subptr(rsp, wordSize * 2); - __ fstp_d(Address(rsp, 0)); - - // Save CPU & FPU state - __ push(rbx); - __ push(rcx); - __ push(rsi); - __ push(rdi); - __ push(rbp); - __ push_FPU_state(); - - // push_FPU_state() resets the FP top of stack - // Load original double into FP top of stack - __ fld_d(Address(rsp, saved_argument_off * wordSize)); - // Store double into stack as outgoing argument - __ subptr(rsp, wordSize*2); - __ fst_d(Address(rsp, 0)); - - // Prepare FPU for doing math in C-land - __ empty_FPU_stack(); - // Call the C code to massage the double. Result in EAX - if (t == T_INT) - { BLOCK_COMMENT("SharedRuntime::d2i"); } - else if (t == T_LONG) - { BLOCK_COMMENT("SharedRuntime::d2l"); } - __ call_VM_leaf( fcn, 2 ); - - // Restore CPU & FPU state - __ pop_FPU_state(); - __ pop(rbp); - __ pop(rdi); - __ pop(rsi); - __ pop(rcx); - __ pop(rbx); - __ addptr(rsp, wordSize * 2); - - __ ret(0); - - return start; - } - //--------------------------------------------------------------------------------------------------- - - address generate_vector_mask(StubGenStubId stub_id, int32_t mask) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - for (int i = 0; i < 16; i++) { - __ emit_data(mask, relocInfo::none, 0); - } - - return start; - } - - address generate_count_leading_zeros_lut() { - __ align64(); - StubGenStubId stub_id = StubGenStubId::vector_count_leading_zeros_lut_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - return start; - } - - - address generate_popcount_avx_lut() { - __ align64(); - StubGenStubId stub_id = StubGenStubId::vector_popcount_lut_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - return start; - } - - - address generate_iota_indices() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::vector_iota_indices_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - // B - __ emit_data(0x03020100, relocInfo::none, 0); - __ emit_data(0x07060504, relocInfo::none, 0); - __ emit_data(0x0B0A0908, relocInfo::none, 0); - __ emit_data(0x0F0E0D0C, relocInfo::none, 0); - __ emit_data(0x13121110, relocInfo::none, 0); - __ emit_data(0x17161514, relocInfo::none, 0); - __ emit_data(0x1B1A1918, relocInfo::none, 0); - __ emit_data(0x1F1E1D1C, relocInfo::none, 0); - __ emit_data(0x23222120, relocInfo::none, 0); - __ emit_data(0x27262524, relocInfo::none, 0); - __ emit_data(0x2B2A2928, relocInfo::none, 0); - __ emit_data(0x2F2E2D2C, relocInfo::none, 0); - __ emit_data(0x33323130, relocInfo::none, 0); - __ emit_data(0x37363534, relocInfo::none, 0); - __ emit_data(0x3B3A3938, relocInfo::none, 0); - __ emit_data(0x3F3E3D3C, relocInfo::none, 0); - - // W - __ emit_data(0x00010000, relocInfo::none, 0); - __ emit_data(0x00030002, relocInfo::none, 0); - __ emit_data(0x00050004, relocInfo::none, 0); - __ emit_data(0x00070006, relocInfo::none, 0); - __ emit_data(0x00090008, relocInfo::none, 0); - __ emit_data(0x000B000A, relocInfo::none, 0); - __ emit_data(0x000D000C, relocInfo::none, 0); - __ emit_data(0x000F000E, relocInfo::none, 0); - __ emit_data(0x00110010, relocInfo::none, 0); - __ emit_data(0x00130012, relocInfo::none, 0); - __ emit_data(0x00150014, relocInfo::none, 0); - __ emit_data(0x00170016, relocInfo::none, 0); - __ emit_data(0x00190018, relocInfo::none, 0); - __ emit_data(0x001B001A, relocInfo::none, 0); - __ emit_data(0x001D001C, relocInfo::none, 0); - __ emit_data(0x001F001E, relocInfo::none, 0); - - // D - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000001, relocInfo::none, 0); - __ emit_data(0x00000002, relocInfo::none, 0); - __ emit_data(0x00000003, relocInfo::none, 0); - __ emit_data(0x00000004, relocInfo::none, 0); - __ emit_data(0x00000005, relocInfo::none, 0); - __ emit_data(0x00000006, relocInfo::none, 0); - __ emit_data(0x00000007, relocInfo::none, 0); - __ emit_data(0x00000008, relocInfo::none, 0); - __ emit_data(0x00000009, relocInfo::none, 0); - __ emit_data(0x0000000A, relocInfo::none, 0); - __ emit_data(0x0000000B, relocInfo::none, 0); - __ emit_data(0x0000000C, relocInfo::none, 0); - __ emit_data(0x0000000D, relocInfo::none, 0); - __ emit_data(0x0000000E, relocInfo::none, 0); - __ emit_data(0x0000000F, relocInfo::none, 0); - - // Q - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000001, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000002, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000003, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000004, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000005, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000006, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000007, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - - // D - FP - __ emit_data(0x00000000, relocInfo::none, 0); // 0.0f - __ emit_data(0x3F800000, relocInfo::none, 0); // 1.0f - __ emit_data(0x40000000, relocInfo::none, 0); // 2.0f - __ emit_data(0x40400000, relocInfo::none, 0); // 3.0f - __ emit_data(0x40800000, relocInfo::none, 0); // 4.0f - __ emit_data(0x40A00000, relocInfo::none, 0); // 5.0f - __ emit_data(0x40C00000, relocInfo::none, 0); // 6.0f - __ emit_data(0x40E00000, relocInfo::none, 0); // 7.0f - __ emit_data(0x41000000, relocInfo::none, 0); // 8.0f - __ emit_data(0x41100000, relocInfo::none, 0); // 9.0f - __ emit_data(0x41200000, relocInfo::none, 0); // 10.0f - __ emit_data(0x41300000, relocInfo::none, 0); // 11.0f - __ emit_data(0x41400000, relocInfo::none, 0); // 12.0f - __ emit_data(0x41500000, relocInfo::none, 0); // 13.0f - __ emit_data(0x41600000, relocInfo::none, 0); // 14.0f - __ emit_data(0x41700000, relocInfo::none, 0); // 15.0f - - // Q - FP - __ emit_data(0x00000000, relocInfo::none, 0); // 0.0d - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 1.0d - __ emit_data(0x3FF00000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 2.0d - __ emit_data(0x40000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 3.0d - __ emit_data(0x40080000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 4.0d - __ emit_data(0x40100000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 5.0d - __ emit_data(0x40140000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 6.0d - __ emit_data(0x40180000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 7.0d - __ emit_data(0x401c0000, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_bit_lut() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::vector_reverse_bit_lut_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_byte_perm_mask_long() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::vector_reverse_byte_perm_mask_long_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_byte_perm_mask_int() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::vector_reverse_byte_perm_mask_int_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_byte_perm_mask_short() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::vector_reverse_byte_perm_mask_short_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - return start; - } - - address generate_vector_byte_shuffle_mask() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::vector_byte_shuffle_mask_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - return start; - } - - address generate_vector_mask_long_double(StubGenStubId stub_id, int32_t maskhi, int32_t masklo) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - for (int i = 0; i < 8; i++) { - __ emit_data(masklo, relocInfo::none, 0); - __ emit_data(maskhi, relocInfo::none, 0); - } - - return start; - } - - //---------------------------------------------------------------------------------------------------- - - address generate_vector_byte_perm_mask() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::vector_byte_perm_mask_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - __ emit_data(0x00000001, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000003, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000005, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000007, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000002, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000004, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000006, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - - return start; - } - - address generate_vector_custom_i32(StubGenStubId stub_id, Assembler::AvxVectorLen len, - int32_t val0, int32_t val1, int32_t val2, int32_t val3, - int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0, - int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0, - int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - assert(len != Assembler::AVX_NoVec, "vector len must be specified"); - __ emit_data(val0, relocInfo::none, 0); - __ emit_data(val1, relocInfo::none, 0); - __ emit_data(val2, relocInfo::none, 0); - __ emit_data(val3, relocInfo::none, 0); - if (len >= Assembler::AVX_256bit) { - __ emit_data(val4, relocInfo::none, 0); - __ emit_data(val5, relocInfo::none, 0); - __ emit_data(val6, relocInfo::none, 0); - __ emit_data(val7, relocInfo::none, 0); - if (len >= Assembler::AVX_512bit) { - __ emit_data(val8, relocInfo::none, 0); - __ emit_data(val9, relocInfo::none, 0); - __ emit_data(val10, relocInfo::none, 0); - __ emit_data(val11, relocInfo::none, 0); - __ emit_data(val12, relocInfo::none, 0); - __ emit_data(val13, relocInfo::none, 0); - __ emit_data(val14, relocInfo::none, 0); - __ emit_data(val15, relocInfo::none, 0); - } - } - - return start; - } - - //---------------------------------------------------------------------------------------------------- - // Non-destructive plausibility checks for oops - - address generate_verify_oop() { - StubGenStubId stub_id = StubGenStubId::verify_oop_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - // Incoming arguments on stack after saving rax,: - // - // [tos ]: saved rdx - // [tos + 1]: saved EFLAGS - // [tos + 2]: return address - // [tos + 3]: char* error message - // [tos + 4]: oop object to verify - // [tos + 5]: saved rax, - saved by caller and bashed - - Label exit, error; - __ pushf(); - __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); - __ push(rdx); // save rdx - // make sure object is 'reasonable' - __ movptr(rax, Address(rsp, 4 * wordSize)); // get object - __ testptr(rax, rax); - __ jcc(Assembler::zero, exit); // if obj is null it is ok - - // Check if the oop is in the right area of memory - const int oop_mask = Universe::verify_oop_mask(); - const int oop_bits = Universe::verify_oop_bits(); - __ mov(rdx, rax); - __ andptr(rdx, oop_mask); - __ cmpptr(rdx, oop_bits); - __ jcc(Assembler::notZero, error); - - // make sure klass is 'reasonable', which is not zero. - __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass - __ testptr(rax, rax); - __ jcc(Assembler::zero, error); // if klass is null it is broken - - // return if everything seems ok - __ bind(exit); - __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back - __ pop(rdx); // restore rdx - __ popf(); // restore EFLAGS - __ ret(3 * wordSize); // pop arguments - - // handle errors - __ bind(error); - __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back - __ pop(rdx); // get saved rdx back - __ popf(); // get saved EFLAGS off stack -- will be ignored - __ pusha(); // push registers (eip = return address & msg are already pushed) - BLOCK_COMMENT("call MacroAssembler::debug"); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); - __ hlt(); - return start; - } - - - // Copy 64 bytes chunks - // - // Inputs: - // from - source array address - // to_from - destination array address - from - // qword_count - 8-bytes element count, negative - // - void xmm_copy_forward(Register from, Register to_from, Register qword_count) { - assert( UseSSE >= 2, "supported cpu only" ); - Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; - - // Copy 64-byte chunks - __ jmpb(L_copy_64_bytes); - __ align(OptoLoopAlignment); - __ BIND(L_copy_64_bytes_loop); - - if (UseUnalignedLoadStores) { - if (UseAVX > 2) { - __ evmovdqul(xmm0, Address(from, 0), Assembler::AVX_512bit); - __ evmovdqul(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit); - } else if (UseAVX == 2) { - __ vmovdqu(xmm0, Address(from, 0)); - __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); - __ vmovdqu(xmm1, Address(from, 32)); - __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); - } else { - __ movdqu(xmm0, Address(from, 0)); - __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); - __ movdqu(xmm1, Address(from, 16)); - __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); - __ movdqu(xmm2, Address(from, 32)); - __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); - __ movdqu(xmm3, Address(from, 48)); - __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); - } - } else { - __ movq(xmm0, Address(from, 0)); - __ movq(Address(from, to_from, Address::times_1, 0), xmm0); - __ movq(xmm1, Address(from, 8)); - __ movq(Address(from, to_from, Address::times_1, 8), xmm1); - __ movq(xmm2, Address(from, 16)); - __ movq(Address(from, to_from, Address::times_1, 16), xmm2); - __ movq(xmm3, Address(from, 24)); - __ movq(Address(from, to_from, Address::times_1, 24), xmm3); - __ movq(xmm4, Address(from, 32)); - __ movq(Address(from, to_from, Address::times_1, 32), xmm4); - __ movq(xmm5, Address(from, 40)); - __ movq(Address(from, to_from, Address::times_1, 40), xmm5); - __ movq(xmm6, Address(from, 48)); - __ movq(Address(from, to_from, Address::times_1, 48), xmm6); - __ movq(xmm7, Address(from, 56)); - __ movq(Address(from, to_from, Address::times_1, 56), xmm7); - } - - __ addl(from, 64); - __ BIND(L_copy_64_bytes); - __ subl(qword_count, 8); - __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); - - if (UseUnalignedLoadStores && (UseAVX == 2)) { - // clean upper bits of YMM registers - __ vpxor(xmm0, xmm0); - __ vpxor(xmm1, xmm1); - } - __ addl(qword_count, 8); - __ jccb(Assembler::zero, L_exit); - // - // length is too short, just copy qwords - // - __ BIND(L_copy_8_bytes); - __ movq(xmm0, Address(from, 0)); - __ movq(Address(from, to_from, Address::times_1), xmm0); - __ addl(from, 8); - __ decrement(qword_count); - __ jcc(Assembler::greater, L_copy_8_bytes); - __ BIND(L_exit); - } - - address generate_disjoint_copy(StubGenStubId stub_id, address* entry) { - BasicType t; - bool aligned; - Address::ScaleFactor sf; - bool dest_uninitialized; - - switch (stub_id) { - case jbyte_disjoint_arraycopy_id: - t = T_BYTE; - aligned = false; - sf = Address::times_1; - dest_uninitialized = false; - break; - case arrayof_jbyte_disjoint_arraycopy_id: - t = T_BYTE; - aligned = true; - sf = Address::times_1; - dest_uninitialized = false; - break; - case jshort_disjoint_arraycopy_id: - t = T_SHORT; - aligned = false; - sf = Address::times_2; - dest_uninitialized = false; - break; - case arrayof_jshort_disjoint_arraycopy_id: - t = T_SHORT; - aligned = true; - sf = Address::times_2; - dest_uninitialized = false; - break; - case jint_disjoint_arraycopy_id: - t = T_INT; - aligned = true; - sf = Address::times_4; - dest_uninitialized = false; - break; - case arrayof_jint_disjoint_arraycopy_id: - // since this is always aligned we can (should!) use the same - // stub as for case jint_disjoint_arraycopy - ShouldNotReachHere(); - break; - case jlong_disjoint_arraycopy_id: - case arrayof_jlong_disjoint_arraycopy_id: - // Handled by a special generator routine on 32 bit - ShouldNotReachHere(); - break; - case oop_disjoint_arraycopy_id: - t = T_OBJECT; - aligned = true; - sf = Address::times_ptr; - dest_uninitialized = false; - break; - case arrayof_oop_disjoint_arraycopy_id: - // since this is always aligned we can (should!) use the same - // stub as for case oop_disjoint_arraycopy - ShouldNotReachHere(); - break; - case oop_disjoint_arraycopy_uninit_id: - t = T_OBJECT; - aligned = true; - sf = Address::times_ptr; - dest_uninitialized = true; - break; - case arrayof_oop_disjoint_arraycopy_uninit_id: - // since this is always aligned we can (should!) use the same - // stub as for case oop_disjoint_arraycopy_uninit - ShouldNotReachHere(); - break; - default: - ShouldNotReachHere(); - break; - } - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; - Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes; - - int shift = Address::times_ptr - sf; - - const Register from = rsi; // source array address - const Register to = rdi; // destination array address - const Register count = rcx; // elements count - const Register to_from = to; // (to - from) - const Register saved_to = rdx; // saved destination array address - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ movptr(from , Address(rsp, 12+ 4)); - __ movptr(to , Address(rsp, 12+ 8)); - __ movl(count, Address(rsp, 12+ 12)); - - if (entry != nullptr) { - *entry = __ pc(); // Entry point from conjoint arraycopy stub. - BLOCK_COMMENT("Entry:"); - } - - if (t == T_OBJECT) { - __ testl(count, count); - __ jcc(Assembler::zero, L_0_count); - } - - DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; - if (dest_uninitialized) { - decorators |= IS_DEST_UNINITIALIZED; - } - if (aligned) { - decorators |= ARRAYCOPY_ALIGNED; - } - - BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - { - bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT)); - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, add_entry, true); - __ subptr(to, from); // to --> to_from - __ cmpl(count, 2<arraycopy_epilogue(_masm, decorators, t, from, to, count); - - if (t == T_OBJECT) { - __ BIND(L_0_count); - } - inc_copy_counter_np(t); - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ vzeroupper(); - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - - address generate_fill(StubGenStubId stub_id) { - BasicType t; - bool aligned; - switch(stub_id) { - case jbyte_fill_id: - t = T_BYTE; - aligned = false; - break; - case jshort_fill_id: - t = T_SHORT; - aligned = false; - break; - case jint_fill_id: - t = T_INT; - aligned = false; - break; - case arrayof_jbyte_fill_id: - t = T_BYTE; - aligned = true; - break; - case arrayof_jshort_fill_id: - t = T_SHORT; - aligned = true; - break; - case arrayof_jint_fill_id: - t = T_INT; - aligned = true; - break; - default: - ShouldNotReachHere(); - break; - } - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - - const Register to = rdi; // source array address - const Register value = rdx; // value - const Register count = rsi; // elements count - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ movptr(to , Address(rsp, 12+ 4)); - __ movl(value, Address(rsp, 12+ 8)); - __ movl(count, Address(rsp, 12+ 12)); - - __ generate_fill(t, aligned, to, value, count, rax, xmm0); - - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - return start; - } - - address generate_conjoint_copy(StubGenStubId stub_id, - address nooverlap_target, - address* entry) { - BasicType t; - bool aligned; - Address::ScaleFactor sf; - bool dest_uninitialized; - - switch (stub_id) { - case jbyte_arraycopy_id: - t = T_BYTE; - aligned = false; - sf = Address::times_1; - dest_uninitialized = false; - break; - case arrayof_jbyte_arraycopy_id: - t = T_BYTE; - aligned = true; - sf = Address::times_1; - dest_uninitialized = false; - break; - case jshort_arraycopy_id: - t = T_SHORT; - aligned = false; - sf = Address::times_2; - dest_uninitialized = false; - break; - case arrayof_jshort_arraycopy_id: - t = T_SHORT; - aligned = true; - sf = Address::times_2; - dest_uninitialized = false; - break; - case jint_arraycopy_id: - t = T_INT; - aligned = true; - sf = Address::times_4; - dest_uninitialized = false; - break; - case arrayof_jint_arraycopy_id: - // since this is always aligned we can (should!) use the same - // stub as for case jint_arraycopy - ShouldNotReachHere(); - break; - case jlong_arraycopy_id: - case arrayof_jlong_arraycopy_id: - // Handled by a special generator routine on 32 bit - ShouldNotReachHere(); - break; - case oop_arraycopy_id: - t = T_OBJECT; - aligned = true; - sf = Address::times_ptr; - dest_uninitialized = false; - break; - case arrayof_oop_arraycopy_id: - // since this is always aligned we can (should!) use the same - // stub as for case oop_arraycopy - ShouldNotReachHere(); - break; - case oop_arraycopy_uninit_id: - t = T_OBJECT; - aligned = true; - sf = Address::times_ptr; - dest_uninitialized = true; - break; - case arrayof_oop_arraycopy_uninit_id: - // since this is always aligned we can (should!) use the same - // stub as for case oop_arraycopy_uninit - ShouldNotReachHere(); - break; - default: - ShouldNotReachHere(); - break; - } - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; - Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop; - - int shift = Address::times_ptr - sf; - - const Register src = rax; // source array address - const Register dst = rdx; // destination array address - const Register from = rsi; // source array address - const Register to = rdi; // destination array address - const Register count = rcx; // elements count - const Register end = rax; // array end address - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ movptr(src , Address(rsp, 12+ 4)); // from - __ movptr(dst , Address(rsp, 12+ 8)); // to - __ movl2ptr(count, Address(rsp, 12+12)); // count - - if (entry != nullptr) { - *entry = __ pc(); // Entry point from generic arraycopy stub. - BLOCK_COMMENT("Entry:"); - } - - // nooverlap_target expects arguments in rsi and rdi. - __ mov(from, src); - __ mov(to , dst); - - // arrays overlap test: dispatch to disjoint stub if necessary. - RuntimeAddress nooverlap(nooverlap_target); - __ cmpptr(dst, src); - __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size - __ jump_cc(Assembler::belowEqual, nooverlap); - __ cmpptr(dst, end); - __ jump_cc(Assembler::aboveEqual, nooverlap); - - if (t == T_OBJECT) { - __ testl(count, count); - __ jcc(Assembler::zero, L_0_count); - } - - DecoratorSet decorators = IN_HEAP | IS_ARRAY; - if (dest_uninitialized) { - decorators |= IS_DEST_UNINITIALIZED; - } - if (aligned) { - decorators |= ARRAYCOPY_ALIGNED; - } - - BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - - { - bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT)); - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, add_entry, true); - // copy from high to low - __ cmpl(count, 2<arraycopy_epilogue(_masm, decorators, t, from, to, count); - - if (t == T_OBJECT) { - __ BIND(L_0_count); - } - inc_copy_counter_np(t); - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - - address generate_disjoint_long_copy(address* entry) { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::jlong_disjoint_arraycopy_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Label L_copy_8_bytes, L_copy_8_bytes_loop; - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register count = rcx; // elements count - const Register to_from = rdx; // (to - from) - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ movptr(from , Address(rsp, 8+0)); // from - __ movptr(to , Address(rsp, 8+4)); // to - __ movl2ptr(count, Address(rsp, 8+8)); // count - - *entry = __ pc(); // Entry point from conjoint arraycopy stub. - BLOCK_COMMENT("Entry:"); - - { - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, true, true); - __ subptr(to, from); // to --> to_from - if (UseXMMForArrayCopy) { - xmm_copy_forward(from, to_from, count); - } else { - __ jmpb(L_copy_8_bytes); - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - __ fild_d(Address(from, 0)); - __ fistp_d(Address(from, to_from, Address::times_1)); - __ addptr(from, 8); - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - } - } - inc_copy_counter_np(T_LONG); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ vzeroupper(); - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - address generate_conjoint_long_copy(address nooverlap_target, address* entry) { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::jlong_arraycopy_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Label L_copy_8_bytes, L_copy_8_bytes_loop; - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register count = rcx; // elements count - const Register end_from = rax; // source array end address - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ movptr(from , Address(rsp, 8+0)); // from - __ movptr(to , Address(rsp, 8+4)); // to - __ movl2ptr(count, Address(rsp, 8+8)); // count - - *entry = __ pc(); // Entry point from generic arraycopy stub. - BLOCK_COMMENT("Entry:"); - - // arrays overlap test - __ cmpptr(to, from); - RuntimeAddress nooverlap(nooverlap_target); - __ jump_cc(Assembler::belowEqual, nooverlap); - __ lea(end_from, Address(from, count, Address::times_8, 0)); - __ cmpptr(to, end_from); - __ movptr(from, Address(rsp, 8)); // from - __ jump_cc(Assembler::aboveEqual, nooverlap); - - { - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, true, true); - - __ jmpb(L_copy_8_bytes); - - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - if (UseXMMForArrayCopy) { - __ movq(xmm0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), xmm0); - } else { - __ fild_d(Address(from, count, Address::times_8)); - __ fistp_d(Address(to, count, Address::times_8)); - } - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - - } - inc_copy_counter_np(T_LONG); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - - // Helper for generating a dynamic type check. - // The sub_klass must be one of {rbx, rdx, rsi}. - // The temp is killed. - void generate_type_check(Register sub_klass, - Address& super_check_offset_addr, - Address& super_klass_addr, - Register temp, - Label* L_success, Label* L_failure) { - BLOCK_COMMENT("type_check:"); - - Label L_fallthrough; -#define LOCAL_JCC(assembler_con, label_ptr) \ - if (label_ptr != nullptr) __ jcc(assembler_con, *(label_ptr)); \ - else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ - - // The following is a strange variation of the fast path which requires - // one less register, because needed values are on the argument stack. - // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, - // L_success, L_failure, null); - assert_different_registers(sub_klass, temp); - - int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); - - // if the pointers are equal, we are done (e.g., String[] elements) - __ cmpptr(sub_klass, super_klass_addr); - LOCAL_JCC(Assembler::equal, L_success); - - // check the supertype display: - __ movl2ptr(temp, super_check_offset_addr); - Address super_check_addr(sub_klass, temp, Address::times_1, 0); - __ movptr(temp, super_check_addr); // load displayed supertype - __ cmpptr(temp, super_klass_addr); // test the super type - LOCAL_JCC(Assembler::equal, L_success); - - // if it was a primary super, we can just fail immediately - __ cmpl(super_check_offset_addr, sc_offset); - LOCAL_JCC(Assembler::notEqual, L_failure); - - // The repne_scan instruction uses fixed registers, which will get spilled. - // We happen to know this works best when super_klass is in rax. - Register super_klass = temp; - __ movptr(super_klass, super_klass_addr); - __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, - L_success, L_failure); - - __ bind(L_fallthrough); - - if (L_success == nullptr) { BLOCK_COMMENT("L_success:"); } - if (L_failure == nullptr) { BLOCK_COMMENT("L_failure:"); } - -#undef LOCAL_JCC - } - - // - // Generate checkcasting array copy stub - // - // Input: - // 4(rsp) - source array address - // 8(rsp) - destination array address - // 12(rsp) - element count, can be zero - // 16(rsp) - size_t ckoff (super_check_offset) - // 20(rsp) - oop ckval (super_klass) - // - // Output: - // rax, == 0 - success - // rax, == -1^K - failure, where K is partial transfer count - // - address generate_checkcast_copy(StubGenStubId stub_id, address* entry) { - bool dest_uninitialized; - switch(stub_id) { - case checkcast_arraycopy_id: - dest_uninitialized = false; - break; - case checkcast_arraycopy_uninit_id: - dest_uninitialized = true; - break; - default: - ShouldNotReachHere(); - } - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Label L_load_element, L_store_element, L_do_card_marks, L_done; - - // register use: - // rax, rdx, rcx -- loop control (end_from, end_to, count) - // rdi, rsi -- element access (oop, klass) - // rbx, -- temp - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register length = rcx; // elements count - const Register elem = rdi; // each oop copied - const Register elem_klass = rsi; // each elem._klass (sub_klass) - const Register temp = rbx; // lone remaining temp - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ push(rsi); - __ push(rdi); - __ push(rbx); - - Address from_arg(rsp, 16+ 4); // from - Address to_arg(rsp, 16+ 8); // to - Address length_arg(rsp, 16+12); // elements count - Address ckoff_arg(rsp, 16+16); // super_check_offset - Address ckval_arg(rsp, 16+20); // super_klass - - // Load up: - __ movptr(from, from_arg); - __ movptr(to, to_arg); - __ movl2ptr(length, length_arg); - - if (entry != nullptr) { - *entry = __ pc(); // Entry point from generic arraycopy stub. - BLOCK_COMMENT("Entry:"); - } - - //--------------------------------------------------------------- - // Assembler stub will be used for this call to arraycopy - // if the two arrays are subtypes of Object[] but the - // destination array type is not equal to or a supertype - // of the source type. Each element must be separately - // checked. - - // Loop-invariant addresses. They are exclusive end pointers. - Address end_from_addr(from, length, Address::times_ptr, 0); - Address end_to_addr(to, length, Address::times_ptr, 0); - - Register end_from = from; // re-use - Register end_to = to; // re-use - Register count = length; // re-use - - // Loop-variant addresses. They assume post-incremented count < 0. - Address from_element_addr(end_from, count, Address::times_ptr, 0); - Address to_element_addr(end_to, count, Address::times_ptr, 0); - Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); - - DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST; - if (dest_uninitialized) { - decorators |= IS_DEST_UNINITIALIZED; - } - - BasicType type = T_OBJECT; - BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->arraycopy_prologue(_masm, decorators, type, from, to, count); - - // Copy from low to high addresses, indexed from the end of each array. - __ lea(end_from, end_from_addr); - __ lea(end_to, end_to_addr); - assert(length == count, ""); // else fix next line: - __ negptr(count); // negate and test the length - __ jccb(Assembler::notZero, L_load_element); - - // Empty array: Nothing to do. - __ xorptr(rax, rax); // return 0 on (trivial) success - __ jmp(L_done); - - // ======== begin loop ======== - // (Loop is rotated; its entry is L_load_element.) - // Loop control: - // for (count = -count; count != 0; count++) - // Base pointers src, dst are biased by 8*count,to last element. - __ align(OptoLoopAlignment); - - __ BIND(L_store_element); - __ movptr(to_element_addr, elem); // store the oop - __ increment(count); // increment the count toward zero - __ jccb(Assembler::zero, L_do_card_marks); - - // ======== loop entry is here ======== - __ BIND(L_load_element); - __ movptr(elem, from_element_addr); // load the oop - __ testptr(elem, elem); - __ jccb(Assembler::zero, L_store_element); - - // (Could do a trick here: Remember last successful non-null - // element stored and make a quick oop equality check on it.) - - __ movptr(elem_klass, elem_klass_addr); // query the object klass - generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp, - &L_store_element, nullptr); - // (On fall-through, we have failed the element type check.) - // ======== end loop ======== - - // It was a real error; we must depend on the caller to finish the job. - // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. - // Emit GC store barriers for the oops we have copied (length_arg + count), - // and report their number to the caller. - assert_different_registers(to, count, rax); - Label L_post_barrier; - __ addl(count, length_arg); // transfers = (length - remaining) - __ movl2ptr(rax, count); // save the value - __ notptr(rax); // report (-1^K) to caller (does not affect flags) - __ jccb(Assembler::notZero, L_post_barrier); - __ jmp(L_done); // K == 0, nothing was copied, skip post barrier - - // Come here on success only. - __ BIND(L_do_card_marks); - __ xorptr(rax, rax); // return 0 on success - __ movl2ptr(count, length_arg); - - __ BIND(L_post_barrier); - __ movptr(to, to_arg); // reload - bs->arraycopy_epilogue(_masm, decorators, type, from, to, count); - - // Common exit point (success or failure). - __ BIND(L_done); - __ pop(rbx); - __ pop(rdi); - __ pop(rsi); - inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - // - // Generate 'unsafe' array copy stub - // Though just as safe as the other stubs, it takes an unscaled - // size_t argument instead of an element count. - // - // Input: - // 4(rsp) - source array address - // 8(rsp) - destination array address - // 12(rsp) - byte count, can be zero - // - // Output: - // rax, == 0 - success - // rax, == -1 - need to call System.arraycopy - // - // Examines the alignment of the operands and dispatches - // to a long, int, short, or byte copy loop. - // - address generate_unsafe_copy(address byte_copy_entry, - address short_copy_entry, - address int_copy_entry, - address long_copy_entry) { - - Label L_long_aligned, L_int_aligned, L_short_aligned; - - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::unsafe_arraycopy_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register count = rcx; // elements count - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - Address from_arg(rsp, 12+ 4); // from - Address to_arg(rsp, 12+ 8); // to - Address count_arg(rsp, 12+12); // byte count - - // Load up: - __ movptr(from , from_arg); - __ movptr(to , to_arg); - __ movl2ptr(count, count_arg); - - // bump this on entry, not on exit: - inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); - - const Register bits = rsi; - __ mov(bits, from); - __ orptr(bits, to); - __ orptr(bits, count); - - __ testl(bits, BytesPerLong-1); - __ jccb(Assembler::zero, L_long_aligned); - - __ testl(bits, BytesPerInt-1); - __ jccb(Assembler::zero, L_int_aligned); - - __ testl(bits, BytesPerShort-1); - __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); - - __ BIND(L_short_aligned); - __ shrptr(count, LogBytesPerShort); // size => short_count - __ movl(count_arg, count); // update 'count' - __ jump(RuntimeAddress(short_copy_entry)); - - __ BIND(L_int_aligned); - __ shrptr(count, LogBytesPerInt); // size => int_count - __ movl(count_arg, count); // update 'count' - __ jump(RuntimeAddress(int_copy_entry)); - - __ BIND(L_long_aligned); - __ shrptr(count, LogBytesPerLong); // size => qword_count - __ movl(count_arg, count); // update 'count' - __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. - __ pop(rsi); - __ jump(RuntimeAddress(long_copy_entry)); - - return start; - } - - - // Perform range checks on the proposed arraycopy. - // Smashes src_pos and dst_pos. (Uses them up for temps.) - void arraycopy_range_checks(Register src, - Register src_pos, - Register dst, - Register dst_pos, - Address& length, - Label& L_failed) { - BLOCK_COMMENT("arraycopy_range_checks:"); - const Register src_end = src_pos; // source array end position - const Register dst_end = dst_pos; // destination array end position - __ addl(src_end, length); // src_pos + length - __ addl(dst_end, length); // dst_pos + length - - // if (src_pos + length > arrayOop(src)->length() ) FAIL; - __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes())); - __ jcc(Assembler::above, L_failed); - - // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; - __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes())); - __ jcc(Assembler::above, L_failed); - - BLOCK_COMMENT("arraycopy_range_checks done"); - } - - - // - // Generate generic array copy stubs - // - // Input: - // 4(rsp) - src oop - // 8(rsp) - src_pos - // 12(rsp) - dst oop - // 16(rsp) - dst_pos - // 20(rsp) - element count - // - // Output: - // rax, == 0 - success - // rax, == -1^K - failure, where K is partial transfer count - // - address generate_generic_copy(address entry_jbyte_arraycopy, - address entry_jshort_arraycopy, - address entry_jint_arraycopy, - address entry_oop_arraycopy, - address entry_jlong_arraycopy, - address entry_checkcast_arraycopy) { - Label L_failed, L_failed_0, L_objArray; - - { int modulus = CodeEntryAlignment; - int target = modulus - 5; // 5 = sizeof jmp(L_failed) - int advance = target - (__ offset() % modulus); - if (advance < 0) advance += modulus; - if (advance > 0) __ nop(advance); - } - StubGenStubId stub_id = StubGenStubId::generic_arraycopy_id; - StubCodeMark mark(this, stub_id); - - // Short-hop target to L_failed. Makes for denser prologue code. - __ BIND(L_failed_0); - __ jmp(L_failed); - assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); - - __ align(CodeEntryAlignment); - address start = __ pc(); - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - - // bump this on entry, not on exit: - inc_counter_np(SharedRuntime::_generic_array_copy_ctr); - - // Input values - Address SRC (rsp, 12+ 4); - Address SRC_POS (rsp, 12+ 8); - Address DST (rsp, 12+12); - Address DST_POS (rsp, 12+16); - Address LENGTH (rsp, 12+20); - - //----------------------------------------------------------------------- - // Assembler stub will be used for this call to arraycopy - // if the following conditions are met: - // - // (1) src and dst must not be null. - // (2) src_pos must not be negative. - // (3) dst_pos must not be negative. - // (4) length must not be negative. - // (5) src klass and dst klass should be the same and not null. - // (6) src and dst should be arrays. - // (7) src_pos + length must not exceed length of src. - // (8) dst_pos + length must not exceed length of dst. - // - - const Register src = rax; // source array oop - const Register src_pos = rsi; - const Register dst = rdx; // destination array oop - const Register dst_pos = rdi; - const Register length = rcx; // transfer count - - // if (src == null) return -1; - __ movptr(src, SRC); // src oop - __ testptr(src, src); - __ jccb(Assembler::zero, L_failed_0); - - // if (src_pos < 0) return -1; - __ movl2ptr(src_pos, SRC_POS); // src_pos - __ testl(src_pos, src_pos); - __ jccb(Assembler::negative, L_failed_0); - - // if (dst == nullptr) return -1; - __ movptr(dst, DST); // dst oop - __ testptr(dst, dst); - __ jccb(Assembler::zero, L_failed_0); - - // if (dst_pos < 0) return -1; - __ movl2ptr(dst_pos, DST_POS); // dst_pos - __ testl(dst_pos, dst_pos); - __ jccb(Assembler::negative, L_failed_0); - - // if (length < 0) return -1; - __ movl2ptr(length, LENGTH); // length - __ testl(length, length); - __ jccb(Assembler::negative, L_failed_0); - - // if (src->klass() == nullptr) return -1; - Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); - Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes()); - const Register rcx_src_klass = rcx; // array klass - __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes())); - -#ifdef ASSERT - // assert(src->klass() != nullptr); - BLOCK_COMMENT("assert klasses not null"); - { Label L1, L2; - __ testptr(rcx_src_klass, rcx_src_klass); - __ jccb(Assembler::notZero, L2); // it is broken if klass is null - __ bind(L1); - __ stop("broken null klass"); - __ bind(L2); - __ cmpptr(dst_klass_addr, NULL_WORD); - __ jccb(Assembler::equal, L1); // this would be broken also - BLOCK_COMMENT("assert done"); - } -#endif //ASSERT - - // Load layout helper (32-bits) - // - // |array_tag| | header_size | element_type | |log2_element_size| - // 32 30 24 16 8 2 0 - // - // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 - // - - int lh_offset = in_bytes(Klass::layout_helper_offset()); - Address src_klass_lh_addr(rcx_src_klass, lh_offset); - - // Handle objArrays completely differently... - jint objArray_lh = Klass::array_layout_helper(T_OBJECT); - __ cmpl(src_klass_lh_addr, objArray_lh); - __ jcc(Assembler::equal, L_objArray); - - // if (src->klass() != dst->klass()) return -1; - __ cmpptr(rcx_src_klass, dst_klass_addr); - __ jccb(Assembler::notEqual, L_failed_0); - - const Register rcx_lh = rcx; // layout helper - assert(rcx_lh == rcx_src_klass, "known alias"); - __ movl(rcx_lh, src_klass_lh_addr); - - // if (!src->is_Array()) return -1; - __ cmpl(rcx_lh, Klass::_lh_neutral_value); - __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp - - // At this point, it is known to be a typeArray (array_tag 0x3). -#ifdef ASSERT - { Label L; - __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); - __ jcc(Assembler::greaterEqual, L); // signed cmp - __ stop("must be a primitive array"); - __ bind(L); - } -#endif - - assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); - arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); - - // TypeArrayKlass - // - // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); - // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); - // - const Register rsi_offset = rsi; // array offset - const Register src_array = src; // src array offset - const Register dst_array = dst; // dst array offset - const Register rdi_elsize = rdi; // log2 element size - - __ mov(rsi_offset, rcx_lh); - __ shrptr(rsi_offset, Klass::_lh_header_size_shift); - __ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset - __ addptr(src_array, rsi_offset); // src array offset - __ addptr(dst_array, rsi_offset); // dst array offset - __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize - - // next registers should be set before the jump to corresponding stub - const Register from = src; // source array address - const Register to = dst; // destination array address - const Register count = rcx; // elements count - // some of them should be duplicated on stack -#define FROM Address(rsp, 12+ 4) -#define TO Address(rsp, 12+ 8) // Not used now -#define COUNT Address(rsp, 12+12) // Only for oop arraycopy - - BLOCK_COMMENT("scale indexes to element size"); - __ movl2ptr(rsi, SRC_POS); // src_pos - __ shlptr(rsi); // src_pos << rcx (log2 elsize) - assert(src_array == from, ""); - __ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize - __ movl2ptr(rdi, DST_POS); // dst_pos - __ shlptr(rdi); // dst_pos << rcx (log2 elsize) - assert(dst_array == to, ""); - __ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize - __ movptr(FROM, from); // src_addr - __ mov(rdi_elsize, rcx_lh); // log2 elsize - __ movl2ptr(count, LENGTH); // elements count - - BLOCK_COMMENT("choose copy loop based on element size"); - __ cmpl(rdi_elsize, 0); - - __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy)); - __ cmpl(rdi_elsize, LogBytesPerShort); - __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy)); - __ cmpl(rdi_elsize, LogBytesPerInt); - __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy)); -#ifdef ASSERT - __ cmpl(rdi_elsize, LogBytesPerLong); - __ jccb(Assembler::notEqual, L_failed); -#endif - __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. - __ pop(rsi); - __ jump(RuntimeAddress(entry_jlong_arraycopy)); - - __ BIND(L_failed); - __ xorptr(rax, rax); - __ notptr(rax); // return -1 - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - // ObjArrayKlass - __ BIND(L_objArray); - // live at this point: rcx_src_klass, src[_pos], dst[_pos] - - Label L_plain_copy, L_checkcast_copy; - // test array classes for subtyping - __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality - __ jccb(Assembler::notEqual, L_checkcast_copy); - - // Identically typed arrays can be copied without element-wise checks. - assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass); - arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); - - __ BIND(L_plain_copy); - __ movl2ptr(count, LENGTH); // elements count - __ movl2ptr(src_pos, SRC_POS); // reload src_pos - __ lea(from, Address(src, src_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr - __ movl2ptr(dst_pos, DST_POS); // reload dst_pos - __ lea(to, Address(dst, dst_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr - __ movptr(FROM, from); // src_addr - __ movptr(TO, to); // dst_addr - __ movl(COUNT, count); // count - __ jump(RuntimeAddress(entry_oop_arraycopy)); - - __ BIND(L_checkcast_copy); - // live at this point: rcx_src_klass, dst[_pos], src[_pos] - { - // Handy offsets: - int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); - int sco_offset = in_bytes(Klass::super_check_offset_offset()); - - Register rsi_dst_klass = rsi; - Register rdi_temp = rdi; - assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); - assert(rdi_temp == dst_pos, "expected alias w/ dst_pos"); - Address dst_klass_lh_addr(rsi_dst_klass, lh_offset); - - // Before looking at dst.length, make sure dst is also an objArray. - __ movptr(rsi_dst_klass, dst_klass_addr); - __ cmpl(dst_klass_lh_addr, objArray_lh); - __ jccb(Assembler::notEqual, L_failed); - - // It is safe to examine both src.length and dst.length. - __ movl2ptr(src_pos, SRC_POS); // reload rsi - arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); - // (Now src_pos and dst_pos are killed, but not src and dst.) - - // We'll need this temp (don't forget to pop it after the type check). - __ push(rbx); - Register rbx_src_klass = rbx; - - __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx - __ movptr(rsi_dst_klass, dst_klass_addr); - Address super_check_offset_addr(rsi_dst_klass, sco_offset); - Label L_fail_array_check; - generate_type_check(rbx_src_klass, - super_check_offset_addr, dst_klass_addr, - rdi_temp, nullptr, &L_fail_array_check); - // (On fall-through, we have passed the array type check.) - __ pop(rbx); - __ jmp(L_plain_copy); - - __ BIND(L_fail_array_check); - // Reshuffle arguments so we can call checkcast_arraycopy: - - // match initial saves for checkcast_arraycopy - // push(rsi); // already done; see above - // push(rdi); // already done; see above - // push(rbx); // already done; see above - - // Marshal outgoing arguments now, freeing registers. - Address from_arg(rsp, 16+ 4); // from - Address to_arg(rsp, 16+ 8); // to - Address length_arg(rsp, 16+12); // elements count - Address ckoff_arg(rsp, 16+16); // super_check_offset - Address ckval_arg(rsp, 16+20); // super_klass - - Address SRC_POS_arg(rsp, 16+ 8); - Address DST_POS_arg(rsp, 16+16); - Address LENGTH_arg(rsp, 16+20); - // push rbx, changed the incoming offsets (why not just use rbp,??) - // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, ""); - - __ movptr(rbx, Address(rsi_dst_klass, ek_offset)); - __ movl2ptr(length, LENGTH_arg); // reload elements count - __ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos - __ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos - - __ movptr(ckval_arg, rbx); // destination element type - __ movl(rbx, Address(rbx, sco_offset)); - __ movl(ckoff_arg, rbx); // corresponding class check offset - - __ movl(length_arg, length); // outgoing length argument - - __ lea(from, Address(src, src_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); - __ movptr(from_arg, from); - - __ lea(to, Address(dst, dst_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); - __ movptr(to_arg, to); - __ jump(RuntimeAddress(entry_checkcast_arraycopy)); - } - - return start; - } - - void generate_arraycopy_stubs() { - address entry; - address entry_jbyte_arraycopy; - address entry_jshort_arraycopy; - address entry_jint_arraycopy; - address entry_oop_arraycopy; - address entry_jlong_arraycopy; - address entry_checkcast_arraycopy; - - StubRoutines::_arrayof_jbyte_disjoint_arraycopy = - generate_disjoint_copy(StubGenStubId::arrayof_jbyte_disjoint_arraycopy_id, &entry); - StubRoutines::_arrayof_jbyte_arraycopy = - generate_conjoint_copy(StubGenStubId::arrayof_jbyte_arraycopy_id, entry, nullptr); - StubRoutines::_jbyte_disjoint_arraycopy = - generate_disjoint_copy(StubGenStubId::jbyte_disjoint_arraycopy_id, &entry); - StubRoutines::_jbyte_arraycopy = - generate_conjoint_copy(StubGenStubId::jbyte_arraycopy_id, entry, &entry_jbyte_arraycopy); - - StubRoutines::_arrayof_jshort_disjoint_arraycopy = - generate_disjoint_copy(StubGenStubId::arrayof_jshort_disjoint_arraycopy_id, &entry); - StubRoutines::_arrayof_jshort_arraycopy = - generate_conjoint_copy(StubGenStubId::arrayof_jshort_arraycopy_id, entry, nullptr); - StubRoutines::_jshort_disjoint_arraycopy = - generate_disjoint_copy(StubGenStubId::jshort_disjoint_arraycopy_id, &entry); - StubRoutines::_jshort_arraycopy = - generate_conjoint_copy(StubGenStubId::jshort_arraycopy_id, entry, &entry_jshort_arraycopy); - - // Next arrays are always aligned on 4 bytes at least. - StubRoutines::_jint_disjoint_arraycopy = - generate_disjoint_copy(StubGenStubId::jint_disjoint_arraycopy_id, &entry); - StubRoutines::_jint_arraycopy = - generate_conjoint_copy(StubGenStubId::jint_arraycopy_id, entry, &entry_jint_arraycopy); - - StubRoutines::_oop_disjoint_arraycopy = - generate_disjoint_copy(StubGenStubId::oop_disjoint_arraycopy_id, &entry); - StubRoutines::_oop_arraycopy = - generate_conjoint_copy(StubGenStubId::oop_arraycopy_id, entry, &entry_oop_arraycopy); - - StubRoutines::_oop_disjoint_arraycopy_uninit = - generate_disjoint_copy(StubGenStubId::oop_disjoint_arraycopy_uninit_id, &entry); - StubRoutines::_oop_arraycopy_uninit = - generate_conjoint_copy(StubGenStubId::oop_arraycopy_uninit_id, entry, nullptr); - - StubRoutines::_jlong_disjoint_arraycopy = - generate_disjoint_long_copy(&entry); - StubRoutines::_jlong_arraycopy = - generate_conjoint_long_copy(entry, &entry_jlong_arraycopy); - - StubRoutines::_jbyte_fill = generate_fill(StubGenStubId::jbyte_fill_id); - StubRoutines::_jshort_fill = generate_fill(StubGenStubId::jshort_fill_id); - StubRoutines::_jint_fill = generate_fill(StubGenStubId::jint_fill_id); - StubRoutines::_arrayof_jbyte_fill = generate_fill(StubGenStubId::arrayof_jbyte_fill_id); - StubRoutines::_arrayof_jshort_fill = generate_fill(StubGenStubId::arrayof_jshort_fill_id); - StubRoutines::_arrayof_jint_fill = generate_fill(StubGenStubId::arrayof_jint_fill_id); - - StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; - StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; - StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; - StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; - - StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; - StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; - StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; - StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; - - StubRoutines::_checkcast_arraycopy = - generate_checkcast_copy(StubGenStubId::checkcast_arraycopy_id, &entry_checkcast_arraycopy); - StubRoutines::_checkcast_arraycopy_uninit = - generate_checkcast_copy(StubGenStubId::checkcast_arraycopy_uninit_id, nullptr); - - StubRoutines::_unsafe_arraycopy = - generate_unsafe_copy(entry_jbyte_arraycopy, - entry_jshort_arraycopy, - entry_jint_arraycopy, - entry_jlong_arraycopy); - - StubRoutines::_generic_arraycopy = - generate_generic_copy( entry_jbyte_arraycopy, - entry_jshort_arraycopy, - entry_jint_arraycopy, - entry_oop_arraycopy, - entry_jlong_arraycopy, - entry_checkcast_arraycopy); - } - - // AES intrinsic stubs - enum {AESBlockSize = 16}; - - address key_shuffle_mask_addr() { - return (address)KEY_SHUFFLE_MASK; - } - - address counter_shuffle_mask_addr() { - return (address)COUNTER_SHUFFLE_MASK; - } - - // Utility routine for loading a 128-bit key word in little endian format - // can optionally specify that the shuffle mask is already in an xmmregister - void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) { - __ movdqu(xmmdst, Address(key, offset)); - if (xmm_shuf_mask != xnoreg) { - __ pshufb(xmmdst, xmm_shuf_mask); - } else { - __ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr())); - } - } - - // aesenc using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesenc(xmmdst, xmmtmp); - } - - // aesdec using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesdec(xmmdst, xmmtmp); - } - - // Utility routine for increase 128bit counter (iv in CTR mode) - // XMM_128bit, D3, D2, D1, D0 - void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) { - __ pextrd(reg, xmmdst, 0x0); - __ addl(reg, inc_delta); - __ pinsrd(xmmdst, reg, 0x0); - __ jcc(Assembler::carryClear, next_block); // jump if no carry - - __ pextrd(reg, xmmdst, 0x01); // Carry-> D1 - __ addl(reg, 0x01); - __ pinsrd(xmmdst, reg, 0x01); - __ jcc(Assembler::carryClear, next_block); // jump if no carry - - __ pextrd(reg, xmmdst, 0x02); // Carry-> D2 - __ addl(reg, 0x01); - __ pinsrd(xmmdst, reg, 0x02); - __ jcc(Assembler::carryClear, next_block); // jump if no carry - - __ pextrd(reg, xmmdst, 0x03); // Carry -> D3 - __ addl(reg, 0x01); - __ pinsrd(xmmdst, reg, 0x03); - - __ BIND(next_block); // next instruction - } - - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // - address generate_aescrypt_encryptBlock() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::aescrypt_encryptBlock_id; - StubCodeMark mark(this, stub_id); - Label L_doLast; - address start = __ pc(); - - const Register from = rdx; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register keylen = rax; - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - - const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_key_shuf_mask = xmm1; - const XMMRegister xmm_temp1 = xmm2; - const XMMRegister xmm_temp2 = xmm3; - const XMMRegister xmm_temp3 = xmm4; - const XMMRegister xmm_temp4 = xmm5; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ movptr(from, from_param); - __ movptr(key, key_param); - - // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} - __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input - __ movptr(to, to_param); - - // For encryption, the java expanded key ordering is just what we need - - load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); - __ pxor(xmm_result, xmm_temp1); - - load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - __ aesenc(xmm_result, xmm_temp3); - __ aesenc(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - __ aesenc(xmm_result, xmm_temp3); - __ aesenc(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); - - __ cmpl(keylen, 44); - __ jccb(Assembler::equal, L_doLast); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); - - __ cmpl(keylen, 52); - __ jccb(Assembler::equal, L_doLast); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); - - __ BIND(L_doLast); - __ aesenc(xmm_result, xmm_temp1); - __ aesenclast(xmm_result, xmm_temp2); - __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // - address generate_aescrypt_decryptBlock() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::aescrypt_decryptBlock_id; - StubCodeMark mark(this, stub_id); - Label L_doLast; - address start = __ pc(); - - const Register from = rdx; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register keylen = rax; - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - - const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_key_shuf_mask = xmm1; - const XMMRegister xmm_temp1 = xmm2; - const XMMRegister xmm_temp2 = xmm3; - const XMMRegister xmm_temp3 = xmm4; - const XMMRegister xmm_temp4 = xmm5; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ movptr(from, from_param); - __ movptr(key, key_param); - - // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} - __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_result, Address(from, 0)); - __ movptr(to, to_param); - - // for decryption java expanded key ordering is rotated one position from what we want - // so we start from 0x10 here and hit 0x00 last - // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); - - __ pxor (xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - __ aesdec(xmm_result, xmm_temp3); - __ aesdec(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); - - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - __ aesdec(xmm_result, xmm_temp3); - __ aesdec(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); - - __ cmpl(keylen, 44); - __ jccb(Assembler::equal, L_doLast); - - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); - - __ cmpl(keylen, 52); - __ jccb(Assembler::equal, L_doLast); - - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); - - __ BIND(L_doLast); - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - - // for decryption the aesdeclast operation is always on key+0x00 - __ aesdeclast(xmm_result, xmm_temp3); - __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - void handleSOERegisters(bool saving) { - const int saveFrameSizeInBytes = 4 * wordSize; - const Address saved_rbx (rbp, -3 * wordSize); - const Address saved_rsi (rbp, -2 * wordSize); - const Address saved_rdi (rbp, -1 * wordSize); - - if (saving) { - __ subptr(rsp, saveFrameSizeInBytes); - __ movptr(saved_rsi, rsi); - __ movptr(saved_rdi, rdi); - __ movptr(saved_rbx, rbx); - } else { - // restoring - __ movptr(rsi, saved_rsi); - __ movptr(rdi, saved_rdi); - __ movptr(rbx, saved_rbx); - } - } - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - r vector byte array address - // c_rarg4 - input length - // - // Output: - // rax - input length - // - address generate_cipherBlockChaining_encryptAESCrypt() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::cipherBlockChaining_encryptAESCrypt_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; - const Register from = rsi; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register rvec = rdi; // r byte array initialized from initvector array address - // and left with the results of the last encryption block - const Register len_reg = rbx; // src len (must be multiple of blocksize 16) - const Register pos = rax; - - // xmm register assignments for the loops below - const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - // first 6 keys preloaded into xmm2-xmm7 - const int XMM_REG_NUM_KEY_FIRST = 2; - const int XMM_REG_NUM_KEY_LAST = 7; - const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); - - __ enter(); // required for proper stackwalking of RuntimeStub frame - handleSOERegisters(true /*saving*/); - - // load registers from incoming parameters - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - const Address rvec_param (rbp, 8+12); - const Address len_param (rbp, 8+16); - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); - __ movptr(rvec , rvec_param); - __ movptr(len_reg , len_param); - - const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - // load up xmm regs 2 thru 7 with keys 0-5 - for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); - offset += 0x10; - } - - __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec - - // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) - __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ cmpl(rax, 44); - __ jcc(Assembler::notEqual, L_key_192_256); - - // 128 bit code follows here - __ movl(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_128); - __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input - __ pxor (xmm_result, xmm_temp); // xor with the current r vector - - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - __ aesenc(xmm_result, as_XMMRegister(rnum)); - } - for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, key_offset); - } - load_key(xmm_temp, key, 0xa0); - __ aesenclast(xmm_result, xmm_temp); - - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jcc(Assembler::notEqual, L_loopTop_128); - - __ BIND(L_exit); - __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object - - handleSOERegisters(false /*restoring*/); - __ movptr(rax, len_param); // return length - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - __ BIND(L_key_192_256); - // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) - __ cmpl(rax, 52); - __ jcc(Assembler::notEqual, L_key_256); - - // 192-bit code follows here (could be changed to use more xmm registers) - __ movl(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_192); - __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input - __ pxor (xmm_result, xmm_temp); // xor with the current r vector - - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - __ aesenc(xmm_result, as_XMMRegister(rnum)); - } - for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, key_offset); - } - load_key(xmm_temp, key, 0xc0); - __ aesenclast(xmm_result, xmm_temp); - - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jcc(Assembler::notEqual, L_loopTop_192); - __ jmp(L_exit); - - __ BIND(L_key_256); - // 256-bit code follows here (could be changed to use more xmm registers) - __ movl(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_256); - __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input - __ pxor (xmm_result, xmm_temp); // xor with the current r vector - - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - __ aesenc(xmm_result, as_XMMRegister(rnum)); - } - for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, key_offset); - } - load_key(xmm_temp, key, 0xe0); - __ aesenclast(xmm_result, xmm_temp); - - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jcc(Assembler::notEqual, L_loopTop_256); - __ jmp(L_exit); - - return start; - } - - - // CBC AES Decryption. - // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. - // - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - r vector byte array address - // c_rarg4 - input length - // - // Output: - // rax - input length - // - - address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::cipherBlockChaining_decryptAESCrypt_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - const Register from = rsi; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register rvec = rdi; // r byte array initialized from initvector array address - // and left with the results of the last encryption block - const Register len_reg = rbx; // src len (must be multiple of blocksize 16) - const Register pos = rax; - - const int PARALLEL_FACTOR = 4; - const int ROUNDS[3] = { 10, 12, 14 }; //aes rounds for key128, key192, key256 - - Label L_exit; - Label L_singleBlock_loopTop[3]; //128, 192, 256 - Label L_multiBlock_loopTop[3]; //128, 192, 256 - - const XMMRegister xmm_prev_block_cipher = xmm0; // holds cipher of previous block - const XMMRegister xmm_key_shuf_mask = xmm1; - - const XMMRegister xmm_key_tmp0 = xmm2; - const XMMRegister xmm_key_tmp1 = xmm3; - - // registers holding the six results in the parallelized loop - const XMMRegister xmm_result0 = xmm4; - const XMMRegister xmm_result1 = xmm5; - const XMMRegister xmm_result2 = xmm6; - const XMMRegister xmm_result3 = xmm7; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - handleSOERegisters(true /*saving*/); - - // load registers from incoming parameters - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - const Address rvec_param (rbp, 8+12); - const Address len_param (rbp, 8+16); - - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); - __ movptr(rvec , rvec_param); - __ movptr(len_reg , len_param); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec - - __ xorptr(pos, pos); - - // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) - // rvec is reused - __ movl(rvec, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ cmpl(rvec, 52); - __ jcc(Assembler::equal, L_multiBlock_loopTop[1]); - __ cmpl(rvec, 60); - __ jcc(Assembler::equal, L_multiBlock_loopTop[2]); - -#define DoFour(opc, src_reg) \ - __ opc(xmm_result0, src_reg); \ - __ opc(xmm_result1, src_reg); \ - __ opc(xmm_result2, src_reg); \ - __ opc(xmm_result3, src_reg); \ - - for (int k = 0; k < 3; ++k) { - __ align(OptoLoopAlignment); - __ BIND(L_multiBlock_loopTop[k]); - __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left - __ jcc(Assembler::less, L_singleBlock_loopTop[k]); - - __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers - __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); - __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); - __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); - - // the java expanded key ordering is rotated one position from what we want - // so we start from 0x10 here and hit 0x00 last - load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask); - DoFour(pxor, xmm_key_tmp0); //xor with first key - // do the aes dec rounds - for (int rnum = 1; rnum <= ROUNDS[k];) { - //load two keys at a time - //k1->0x20, ..., k9->0xa0, k10->0x00 - load_key(xmm_key_tmp1, key, (rnum + 1) * 0x10, xmm_key_shuf_mask); - load_key(xmm_key_tmp0, key, ((rnum + 2) % (ROUNDS[k] + 1)) * 0x10, xmm_key_shuf_mask); // hit 0x00 last! - DoFour(aesdec, xmm_key_tmp1); - rnum++; - if (rnum != ROUNDS[k]) { - DoFour(aesdec, xmm_key_tmp0); - } - else { - DoFour(aesdeclast, xmm_key_tmp0); - } - rnum++; - } - - // for each result, xor with the r vector of previous cipher block - __ pxor(xmm_result0, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize)); - __ pxor(xmm_result1, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize)); - __ pxor(xmm_result2, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize)); - __ pxor(xmm_result3, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks - - // store 4 results into the next 64 bytes of output - __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); - __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); - __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); - __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); - - __ addptr(pos, 4 * AESBlockSize); - __ subptr(len_reg, 4 * AESBlockSize); - __ jmp(L_multiBlock_loopTop[k]); - - //singleBlock starts here - __ align(OptoLoopAlignment); - __ BIND(L_singleBlock_loopTop[k]); - __ cmpptr(len_reg, 0); // any blocks left? - __ jcc(Assembler::equal, L_exit); - __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input - __ movdqa(xmm_result1, xmm_result0); - - load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask); - __ pxor(xmm_result0, xmm_key_tmp0); - // do the aes dec rounds - for (int rnum = 1; rnum < ROUNDS[k]; rnum++) { - // the java expanded key ordering is rotated one position from what we want - load_key(xmm_key_tmp0, key, (rnum + 1) * 0x10, xmm_key_shuf_mask); - __ aesdec(xmm_result0, xmm_key_tmp0); - } - load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask); - __ aesdeclast(xmm_result0, xmm_key_tmp0); - __ pxor(xmm_result0, xmm_prev_block_cipher); // xor with the current r vector - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result0); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ movdqa(xmm_prev_block_cipher, xmm_result1); // set up next r vector with cipher input from this block - - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jmp(L_singleBlock_loopTop[k]); - }//for 128/192/256 - - __ BIND(L_exit); - __ movptr(rvec, rvec_param); // restore this since reused earlier - __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object - handleSOERegisters(false /*restoring*/); - __ movptr(rax, len_param); // return length - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - // CTR AES crypt. - // In 32-bit stub, parallelize 4 blocks at a time - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - counter vector byte array address - // c_rarg4 - input length - // - // Output: - // rax - input length - // - address generate_counterMode_AESCrypt_Parallel() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::counterMode_AESCrypt_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - const Register from = rsi; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register counter = rdi; // counter byte array initialized from initvector array address - // and updated with the incremented counter in the end - const Register len_reg = rbx; - const Register pos = rax; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - handleSOERegisters(true /*saving*/); // save rbx, rsi, rdi - - // load registers from incoming parameters - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - const Address rvec_param (rbp, 8+12); - const Address len_param (rbp, 8+16); - const Address saved_counter_param(rbp, 8 + 20); - const Address used_addr_param(rbp, 8 + 24); - - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(len_reg , len_param); - - // Use the partially used encrpyted counter from last invocation - Label L_exit_preLoop, L_preLoop_start; - - // Use the registers 'counter' and 'key' here in this preloop - // to hold of last 2 params 'used' and 'saved_encCounter_start' - Register used = counter; - Register saved_encCounter_start = key; - Register used_addr = saved_encCounter_start; - - __ movptr(used_addr, used_addr_param); - __ movptr(used, Address(used_addr, 0)); - __ movptr(saved_encCounter_start, saved_counter_param); - - __ BIND(L_preLoop_start); - __ cmpptr(used, 16); - __ jcc(Assembler::aboveEqual, L_exit_preLoop); - __ cmpptr(len_reg, 0); - __ jcc(Assembler::lessEqual, L_exit_preLoop); - __ movb(rax, Address(saved_encCounter_start, used)); - __ xorb(rax, Address(from, 0)); - __ movb(Address(to, 0), rax); - __ addptr(from, 1); - __ addptr(to, 1); - __ addptr(used, 1); - __ subptr(len_reg, 1); - - __ jmp(L_preLoop_start); - - __ BIND(L_exit_preLoop); - __ movptr(used_addr, used_addr_param); - __ movptr(used_addr, used_addr_param); - __ movl(Address(used_addr, 0), used); - - // load the parameters 'key' and 'counter' - __ movptr(key, key_param); - __ movptr(counter, rvec_param); - - // xmm register assignments for the loops below - const XMMRegister xmm_curr_counter = xmm0; - const XMMRegister xmm_counter_shuf_mask = xmm1; // need to be reloaded - const XMMRegister xmm_key_shuf_mask = xmm2; // need to be reloaded - const XMMRegister xmm_key = xmm3; - const XMMRegister xmm_result0 = xmm4; - const XMMRegister xmm_result1 = xmm5; - const XMMRegister xmm_result2 = xmm6; - const XMMRegister xmm_result3 = xmm7; - const XMMRegister xmm_from0 = xmm1; //reuse XMM register - const XMMRegister xmm_from1 = xmm2; - const XMMRegister xmm_from2 = xmm3; - const XMMRegister xmm_from3 = xmm4; - - //for key_128, key_192, key_256 - const int rounds[3] = {10, 12, 14}; - Label L_singleBlockLoopTop[3]; - Label L_multiBlock_loopTop[3]; - Label L_key192_top, L_key256_top; - Label L_incCounter[3][4]; // 3: different key length, 4: 4 blocks at a time - Label L_incCounter_single[3]; //for single block, key128, key192, key256 - Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3]; - Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3]; - - Label L_exit; - const int PARALLEL_FACTOR = 4; //because of the limited register number - - // initialize counter with initial counter - __ movdqu(xmm_curr_counter, Address(counter, 0x00)); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase - - // key length could be only {11, 13, 15} * 4 = {44, 52, 60} - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ cmpl(rax, 52); - __ jcc(Assembler::equal, L_key192_top); - __ cmpl(rax, 60); - __ jcc(Assembler::equal, L_key256_top); - - //key128 begins here - __ movptr(pos, 0); // init pos before L_multiBlock_loopTop - -#define CTR_DoFour(opc, src_reg) \ - __ opc(xmm_result0, src_reg); \ - __ opc(xmm_result1, src_reg); \ - __ opc(xmm_result2, src_reg); \ - __ opc(xmm_result3, src_reg); - - // k == 0 : generate code for key_128 - // k == 1 : generate code for key_192 - // k == 2 : generate code for key_256 - for (int k = 0; k < 3; ++k) { - //multi blocks starts here - __ align(OptoLoopAlignment); - __ BIND(L_multiBlock_loopTop[k]); - __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left - __ jcc(Assembler::less, L_singleBlockLoopTop[k]); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - - //load, then increase counters - CTR_DoFour(movdqa, xmm_curr_counter); - __ push(rbx); - inc_counter(rbx, xmm_result1, 0x01, L_incCounter[k][0]); - inc_counter(rbx, xmm_result2, 0x02, L_incCounter[k][1]); - inc_counter(rbx, xmm_result3, 0x03, L_incCounter[k][2]); - inc_counter(rbx, xmm_curr_counter, 0x04, L_incCounter[k][3]); - __ pop (rbx); - - load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); // load Round 0 key. interleaving for better performance - - CTR_DoFour(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR - CTR_DoFour(pxor, xmm_key); //PXOR with Round 0 key - - for (int i = 1; i < rounds[k]; ++i) { - load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask); - CTR_DoFour(aesenc, xmm_key); - } - load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask); - CTR_DoFour(aesenclast, xmm_key); - - // get next PARALLEL_FACTOR blocks into xmm_from registers - __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); - __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); - __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); - - // PXOR with input text - __ pxor(xmm_result0, xmm_from0); //result0 is xmm4 - __ pxor(xmm_result1, xmm_from1); - __ pxor(xmm_result2, xmm_from2); - - // store PARALLEL_FACTOR results into the next 64 bytes of output - __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); - __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); - __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); - - // do it here after xmm_result0 is saved, because xmm_from3 reuse the same register of xmm_result0. - __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); - __ pxor(xmm_result3, xmm_from3); - __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); - - __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text - __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length - __ jmp(L_multiBlock_loopTop[k]); - - // singleBlock starts here - __ align(OptoLoopAlignment); - __ BIND(L_singleBlockLoopTop[k]); - __ cmpptr(len_reg, 0); - __ jcc(Assembler::equal, L_exit); - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - __ movdqa(xmm_result0, xmm_curr_counter); - load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); - __ push(rbx);//rbx is used for increasing counter - inc_counter(rbx, xmm_curr_counter, 0x01, L_incCounter_single[k]); - __ pop (rbx); - __ pshufb(xmm_result0, xmm_counter_shuf_mask); - __ pxor(xmm_result0, xmm_key); - for (int i = 1; i < rounds[k]; i++) { - load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask); - __ aesenc(xmm_result0, xmm_key); - } - load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask); - __ aesenclast(xmm_result0, xmm_key); - __ cmpptr(len_reg, AESBlockSize); - __ jcc(Assembler::less, L_processTail_insr[k]); - __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); - __ pxor(xmm_result0, xmm_from0); - __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jmp(L_singleBlockLoopTop[k]); - - __ BIND(L_processTail_insr[k]); // Process the tail part of the input array - __ addptr(pos, len_reg); // 1. Insert bytes from src array into xmm_from0 register - __ testptr(len_reg, 8); - __ jcc(Assembler::zero, L_processTail_4_insr[k]); - __ subptr(pos,8); - __ pinsrd(xmm_from0, Address(from, pos), 0); - __ pinsrd(xmm_from0, Address(from, pos, Address::times_1, 4), 1); - __ BIND(L_processTail_4_insr[k]); - __ testptr(len_reg, 4); - __ jcc(Assembler::zero, L_processTail_2_insr[k]); - __ subptr(pos,4); - __ pslldq(xmm_from0, 4); - __ pinsrd(xmm_from0, Address(from, pos), 0); - __ BIND(L_processTail_2_insr[k]); - __ testptr(len_reg, 2); - __ jcc(Assembler::zero, L_processTail_1_insr[k]); - __ subptr(pos, 2); - __ pslldq(xmm_from0, 2); - __ pinsrw(xmm_from0, Address(from, pos), 0); - __ BIND(L_processTail_1_insr[k]); - __ testptr(len_reg, 1); - __ jcc(Assembler::zero, L_processTail_exit_insr[k]); - __ subptr(pos, 1); - __ pslldq(xmm_from0, 1); - __ pinsrb(xmm_from0, Address(from, pos), 0); - __ BIND(L_processTail_exit_insr[k]); - - __ movptr(saved_encCounter_start, saved_counter_param); - __ movdqu(Address(saved_encCounter_start, 0), xmm_result0); // 2. Perform pxor of the encrypted counter and plaintext Bytes. - __ pxor(xmm_result0, xmm_from0); // Also the encrypted counter is saved for next invocation. - - __ testptr(len_reg, 8); - __ jcc(Assembler::zero, L_processTail_4_extr[k]); // 3. Extract bytes from xmm_result0 into the dest. array - __ pextrd(Address(to, pos), xmm_result0, 0); - __ pextrd(Address(to, pos, Address::times_1, 4), xmm_result0, 1); - __ psrldq(xmm_result0, 8); - __ addptr(pos, 8); - __ BIND(L_processTail_4_extr[k]); - __ testptr(len_reg, 4); - __ jcc(Assembler::zero, L_processTail_2_extr[k]); - __ pextrd(Address(to, pos), xmm_result0, 0); - __ psrldq(xmm_result0, 4); - __ addptr(pos, 4); - __ BIND(L_processTail_2_extr[k]); - __ testptr(len_reg, 2); - __ jcc(Assembler::zero, L_processTail_1_extr[k]); - __ pextrb(Address(to, pos), xmm_result0, 0); - __ pextrb(Address(to, pos, Address::times_1, 1), xmm_result0, 1); - __ psrldq(xmm_result0, 2); - __ addptr(pos, 2); - __ BIND(L_processTail_1_extr[k]); - __ testptr(len_reg, 1); - __ jcc(Assembler::zero, L_processTail_exit_extr[k]); - __ pextrb(Address(to, pos), xmm_result0, 0); - - __ BIND(L_processTail_exit_extr[k]); - __ movptr(used_addr, used_addr_param); - __ movl(Address(used_addr, 0), len_reg); - __ jmp(L_exit); - } - - __ BIND(L_exit); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back. - __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back - handleSOERegisters(false /*restoring*/); - __ movptr(rax, len_param); // return length - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - __ BIND (L_key192_top); - __ movptr(pos, 0); // init pos before L_multiBlock_loopTop - __ jmp(L_multiBlock_loopTop[1]); //key192 - - __ BIND (L_key256_top); - __ movptr(pos, 0); // init pos before L_multiBlock_loopTop - __ jmp(L_multiBlock_loopTop[2]); //key192 - - return start; - } - - // ofs and limit are use for multi-block byte array. - // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) - address generate_md5_implCompress(StubGenStubId stub_id) { - bool multi_block; - switch(stub_id) { - case StubGenStubId::md5_implCompress_id: - multi_block = false; - break; - case StubGenStubId::md5_implCompressMB_id: - multi_block = true; - break; - default: - ShouldNotReachHere(); - } - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - const Register buf_param = rbp; - const Address state_param(rsp, 0 * wordSize); - const Address ofs_param (rsp, 1 * wordSize); - const Address limit_param(rsp, 2 * wordSize); - - __ enter(); - __ push(rbx); - __ push(rdi); - __ push(rsi); - __ push(rbp); - __ subptr(rsp, 3 * wordSize); - - __ movptr(rsi, Address(rbp, 8 + 4)); - __ movptr(state_param, rsi); - if (multi_block) { - __ movptr(rsi, Address(rbp, 8 + 8)); - __ movptr(ofs_param, rsi); - __ movptr(rsi, Address(rbp, 8 + 12)); - __ movptr(limit_param, rsi); - } - __ movptr(buf_param, Address(rbp, 8 + 0)); // do it last because it override rbp - __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); - - __ addptr(rsp, 3 * wordSize); - __ pop(rbp); - __ pop(rsi); - __ pop(rdi); - __ pop(rbx); - __ leave(); - __ ret(0); - return start; - } - - address generate_upper_word_mask() { - __ align64(); - StubGenStubId stub_id = StubGenStubId::upper_word_mask_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0xFFFFFFFF, relocInfo::none, 0); - return start; - } - - address generate_shuffle_byte_flip_mask() { - __ align64(); - StubGenStubId stub_id = StubGenStubId::shuffle_byte_flip_mask_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x0c0d0e0f, relocInfo::none, 0); - __ emit_data(0x08090a0b, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - return start; - } - - // ofs and limit are use for multi-block byte array. - // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) - address generate_sha1_implCompress(StubGenStubId stub_id) { - bool multi_block; - switch(stub_id) { - case StubGenStubId::sha1_implCompress_id: - multi_block = false; - break; - case StubGenStubId::sha1_implCompressMB_id: - multi_block = true; - break; - default: - ShouldNotReachHere(); - } - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Register buf = rax; - Register state = rdx; - Register ofs = rcx; - Register limit = rdi; - - const Address buf_param(rbp, 8 + 0); - const Address state_param(rbp, 8 + 4); - const Address ofs_param(rbp, 8 + 8); - const Address limit_param(rbp, 8 + 12); - - const XMMRegister abcd = xmm0; - const XMMRegister e0 = xmm1; - const XMMRegister e1 = xmm2; - const XMMRegister msg0 = xmm3; - - const XMMRegister msg1 = xmm4; - const XMMRegister msg2 = xmm5; - const XMMRegister msg3 = xmm6; - const XMMRegister shuf_mask = xmm7; - - __ enter(); - __ subptr(rsp, 8 * wordSize); - handleSOERegisters(true /*saving*/); - - __ movptr(buf, buf_param); - __ movptr(state, state_param); - if (multi_block) { - __ movptr(ofs, ofs_param); - __ movptr(limit, limit_param); - } - - __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, - buf, state, ofs, limit, rsp, multi_block); - - handleSOERegisters(false /*restoring*/); - __ addptr(rsp, 8 * wordSize); - __ leave(); - __ ret(0); - return start; - } - - address generate_pshuffle_byte_flip_mask() { - __ align64(); - StubGenStubId stub_id = StubGenStubId::pshuffle_byte_flip_mask_id; - StubCodeMark mark(this, stub_id); - address start = __ pc(); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090a0b, relocInfo::none, 0); - __ emit_data(0x0c0d0e0f, relocInfo::none, 0); - return start; - } - - // ofs and limit are use for multi-block byte array. - // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) - address generate_sha256_implCompress(StubGenStubId stub_id) { - bool multi_block; - switch(stub_id) { - case StubGenStubId::sha256_implCompress_id: - multi_block = false; - break; - case StubGenStubId::sha256_implCompressMB_id: - multi_block = true; - break; - default: - ShouldNotReachHere(); - } - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, stub_id); - address start = __ pc(); - - Register buf = rbx; - Register state = rsi; - Register ofs = rdx; - Register limit = rcx; - - const Address buf_param(rbp, 8 + 0); - const Address state_param(rbp, 8 + 4); - const Address ofs_param(rbp, 8 + 8); - const Address limit_param(rbp, 8 + 12); - - const XMMRegister msg = xmm0; - const XMMRegister state0 = xmm1; - const XMMRegister state1 = xmm2; - const XMMRegister msgtmp0 = xmm3; - - const XMMRegister msgtmp1 = xmm4; - const XMMRegister msgtmp2 = xmm5; - const XMMRegister msgtmp3 = xmm6; - const XMMRegister msgtmp4 = xmm7; - - __ enter(); - __ subptr(rsp, 8 * wordSize); - handleSOERegisters(true /*saving*/); - __ movptr(buf, buf_param); - __ movptr(state, state_param); - if (multi_block) { - __ movptr(ofs, ofs_param); - __ movptr(limit, limit_param); - } - - __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, - buf, state, ofs, limit, rsp, multi_block); - - handleSOERegisters(false); - __ addptr(rsp, 8 * wordSize); - __ leave(); - __ ret(0); - return start; - } - - // byte swap x86 long - address ghash_long_swap_mask_addr() { - return (address)GHASH_LONG_SWAP_MASK; - } - - // byte swap x86 byte array - address ghash_byte_swap_mask_addr() { - return (address)GHASH_BYTE_SWAP_MASK; - } - - /* Single and multi-block ghash operations */ - address generate_ghash_processBlocks() { - assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); - __ align(CodeEntryAlignment); - Label L_ghash_loop, L_exit; - StubGenStubId stub_id = StubGenStubId::ghash_processBlocks_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - const Register state = rdi; - const Register subkeyH = rsi; - const Register data = rdx; - const Register blocks = rcx; - - const Address state_param(rbp, 8+0); - const Address subkeyH_param(rbp, 8+4); - const Address data_param(rbp, 8+8); - const Address blocks_param(rbp, 8+12); - - const XMMRegister xmm_temp0 = xmm0; - const XMMRegister xmm_temp1 = xmm1; - const XMMRegister xmm_temp2 = xmm2; - const XMMRegister xmm_temp3 = xmm3; - const XMMRegister xmm_temp4 = xmm4; - const XMMRegister xmm_temp5 = xmm5; - const XMMRegister xmm_temp6 = xmm6; - const XMMRegister xmm_temp7 = xmm7; - - __ enter(); - handleSOERegisters(true); // Save registers - - __ movptr(state, state_param); - __ movptr(subkeyH, subkeyH_param); - __ movptr(data, data_param); - __ movptr(blocks, blocks_param); - - __ movdqu(xmm_temp0, Address(state, 0)); - __ pshufb(xmm_temp0, ExternalAddress(ghash_long_swap_mask_addr())); - - __ movdqu(xmm_temp1, Address(subkeyH, 0)); - __ pshufb(xmm_temp1, ExternalAddress(ghash_long_swap_mask_addr())); - - __ BIND(L_ghash_loop); - __ movdqu(xmm_temp2, Address(data, 0)); - __ pshufb(xmm_temp2, ExternalAddress(ghash_byte_swap_mask_addr())); - - __ pxor(xmm_temp0, xmm_temp2); - - // - // Multiply with the hash key - // - __ movdqu(xmm_temp3, xmm_temp0); - __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 - __ movdqu(xmm_temp4, xmm_temp0); - __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 - - __ movdqu(xmm_temp5, xmm_temp0); - __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 - __ movdqu(xmm_temp6, xmm_temp0); - __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 - - __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 - - __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 - __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right - __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left - __ pxor(xmm_temp3, xmm_temp5); - __ pxor(xmm_temp6, xmm_temp4); // Register pair holds the result - // of the carry-less multiplication of - // xmm0 by xmm1. - - // We shift the result of the multiplication by one bit position - // to the left to cope for the fact that the bits are reversed. - __ movdqu(xmm_temp7, xmm_temp3); - __ movdqu(xmm_temp4, xmm_temp6); - __ pslld (xmm_temp3, 1); - __ pslld(xmm_temp6, 1); - __ psrld(xmm_temp7, 31); - __ psrld(xmm_temp4, 31); - __ movdqu(xmm_temp5, xmm_temp7); - __ pslldq(xmm_temp4, 4); - __ pslldq(xmm_temp7, 4); - __ psrldq(xmm_temp5, 12); - __ por(xmm_temp3, xmm_temp7); - __ por(xmm_temp6, xmm_temp4); - __ por(xmm_temp6, xmm_temp5); - - // - // First phase of the reduction - // - // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts - // independently. - __ movdqu(xmm_temp7, xmm_temp3); - __ movdqu(xmm_temp4, xmm_temp3); - __ movdqu(xmm_temp5, xmm_temp3); - __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 - __ pslld(xmm_temp4, 30); // packed right shift shifting << 30 - __ pslld(xmm_temp5, 25); // packed right shift shifting << 25 - __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions - __ pxor(xmm_temp7, xmm_temp5); - __ movdqu(xmm_temp4, xmm_temp7); - __ pslldq(xmm_temp7, 12); - __ psrldq(xmm_temp4, 4); - __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete - - // - // Second phase of the reduction - // - // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these - // shift operations. - __ movdqu(xmm_temp2, xmm_temp3); - __ movdqu(xmm_temp7, xmm_temp3); - __ movdqu(xmm_temp5, xmm_temp3); - __ psrld(xmm_temp2, 1); // packed left shifting >> 1 - __ psrld(xmm_temp7, 2); // packed left shifting >> 2 - __ psrld(xmm_temp5, 7); // packed left shifting >> 7 - __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions - __ pxor(xmm_temp2, xmm_temp5); - __ pxor(xmm_temp2, xmm_temp4); - __ pxor(xmm_temp3, xmm_temp2); - __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 - - __ decrement(blocks); - __ jcc(Assembler::zero, L_exit); - __ movdqu(xmm_temp0, xmm_temp6); - __ addptr(data, 16); - __ jmp(L_ghash_loop); - - __ BIND(L_exit); - // Byte swap 16-byte result - __ pshufb(xmm_temp6, ExternalAddress(ghash_long_swap_mask_addr())); - __ movdqu(Address(state, 0), xmm_temp6); // store the result - - handleSOERegisters(false); // restore registers - __ leave(); - __ ret(0); - return start; - } - - /** - * Arguments: - * - * Inputs: - * rsp(4) - int crc - * rsp(8) - byte* buf - * rsp(12) - int length - * - * Output: - * rax - int crc result - */ - address generate_updateBytesCRC32() { - assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); - - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::updateBytesCRC32_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - const Register crc = rdx; // crc - const Register buf = rsi; // source java byte array address - const Register len = rcx; // length - const Register table = rdi; // crc_table address (reuse register) - const Register tmp = rbx; - assert_different_registers(crc, buf, len, table, tmp, rax); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ push(rbx); - - Address crc_arg(rbp, 8 + 0); - Address buf_arg(rbp, 8 + 4); - Address len_arg(rbp, 8 + 8); - - // Load up: - __ movl(crc, crc_arg); - __ movptr(buf, buf_arg); - __ movl(len, len_arg); - - __ kernel_crc32(crc, buf, len, table, tmp); - - __ movl(rax, crc); - __ pop(rbx); - __ pop(rdi); - __ pop(rsi); - __ vzeroupper(); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - /** - * Arguments: - * - * Inputs: - * rsp(4) - int crc - * rsp(8) - byte* buf - * rsp(12) - int length - * rsp(16) - table_start - optional (present only when doing a library_calll, - * not used by x86 algorithm) - * - * Output: - * rax - int crc result - */ - address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) { - assert(UseCRC32CIntrinsics, "need SSE4_2"); - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::updateBytesCRC32C_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - const Register crc = rax; // crc - const Register buf = rcx; // source java byte array address - const Register len = rdx; // length - const Register d = rbx; - const Register g = rsi; - const Register h = rdi; - const Register empty = noreg; // will never be used, in order not - // to change a signature for crc32c_IPL_Alg2_Alt2 - // between 64/32 I'm just keeping it here - assert_different_registers(crc, buf, len, d, g, h); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 + - // we need to add additional 4 because __ enter - // have just pushed ebp on a stack - Address buf_arg(rsp, 4 + 4 + 4); - Address len_arg(rsp, 4 + 4 + 8); - // Load up: - __ movl(crc, crc_arg); - __ movl(buf, buf_arg); - __ movl(len, len_arg); - __ push(d); - __ push(g); - __ push(h); - __ crc32c_ipl_alg2_alt2(crc, buf, len, - d, g, h, - empty, empty, empty, - xmm0, xmm1, xmm2, - is_pclmulqdq_supported); - __ pop(h); - __ pop(g); - __ pop(d); - __ vzeroupper(); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - address generate_libmExp() { - StubGenStubId stub_id = StubGenStubId::dexp_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_exp(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmLog() { - StubGenStubId stub_id = StubGenStubId::dlog_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_log(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmLog10() { - StubGenStubId stub_id = StubGenStubId::dlog10_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_log10(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmPow() { - StubGenStubId stub_id = StubGenStubId::dpow_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_pow(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libm_reduce_pi04l() { - StubGenStubId stub_id = StubGenStubId::dlibm_reduce_pi04l_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ libm_reduce_pi04l(rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); - - return start; - - } - - address generate_libm_sin_cos_huge() { - StubGenStubId stub_id = StubGenStubId::dlibm_sin_cos_huge_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ libm_sincos_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); - - return start; - - } - - address generate_libmSin() { - StubGenStubId stub_id = StubGenStubId::dsin_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_sin(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rbx, rdx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmCos() { - StubGenStubId stub_id = StubGenStubId::dcos_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_cos(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libm_tan_cot_huge() { - StubGenStubId stub_id = StubGenStubId::dlibm_tan_cot_huge_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ libm_tancot_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); - - return start; - - } - - address generate_libmTan() { - StubGenStubId stub_id = StubGenStubId::dtan_id; - StubCodeMark mark(this, stub_id); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_tan(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_method_entry_barrier() { - __ align(CodeEntryAlignment); - StubGenStubId stub_id = StubGenStubId::method_entry_barrier_id; - StubCodeMark mark(this, stub_id); - - Label deoptimize_label; - - address start = __ pc(); - - __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing - - BLOCK_COMMENT("Entry:"); - __ enter(); // save rbp - - // save rbx, because we want to use that value. - // We could do without it but then we depend on the number of slots used by pusha - __ push(rbx); - - __ lea(rbx, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for rbx - this should be the return address - - __ pusha(); - - // xmm0 and xmm1 may be used for passing float/double arguments - - if (UseSSE >= 2) { - const int xmm_size = wordSize * 4; - __ subptr(rsp, xmm_size * 2); - __ movdbl(Address(rsp, xmm_size * 1), xmm1); - __ movdbl(Address(rsp, xmm_size * 0), xmm0); - } else if (UseSSE >= 1) { - const int xmm_size = wordSize * 2; - __ subptr(rsp, xmm_size * 2); - __ movflt(Address(rsp, xmm_size * 1), xmm1); - __ movflt(Address(rsp, xmm_size * 0), xmm0); - } - - __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(BarrierSetNMethod::nmethod_stub_entry_barrier)), rbx); - - if (UseSSE >= 2) { - const int xmm_size = wordSize * 4; - __ movdbl(xmm0, Address(rsp, xmm_size * 0)); - __ movdbl(xmm1, Address(rsp, xmm_size * 1)); - __ addptr(rsp, xmm_size * 2); - } else if (UseSSE >= 1) { - const int xmm_size = wordSize * 2; - __ movflt(xmm0, Address(rsp, xmm_size * 0)); - __ movflt(xmm1, Address(rsp, xmm_size * 1)); - __ addptr(rsp, xmm_size * 2); - } - - __ cmpl(rax, 1); // 1 means deoptimize - __ jcc(Assembler::equal, deoptimize_label); - - __ popa(); - __ pop(rbx); - - __ leave(); - - __ addptr(rsp, 1 * wordSize); // cookie - __ ret(0); - - __ BIND(deoptimize_label); - - __ popa(); - __ pop(rbx); - - __ leave(); - - // this can be taken out, but is good for verification purposes. getting a SIGSEGV - // here while still having a correct stack is valuable - __ testptr(rsp, Address(rsp, 0)); - - __ movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier - __ jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point - - return start; - } - - private: - - void create_control_words() { - // Round to nearest, 53-bit mode, exceptions masked - StubRoutines::x86::_fpu_cntrl_wrd_std = 0x027F; - // Round to zero, 53-bit mode, exception mased - StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0x0D7F; - // Round to nearest, 24-bit mode, exceptions masked - StubRoutines::x86::_fpu_cntrl_wrd_24 = 0x007F; - // Round to nearest, 64-bit mode, exceptions masked, flags specialized - StubRoutines::x86::_mxcsr_std = EnableX86ECoreOpts ? 0x1FBF : 0x1F80; - // Note: the following two constants are 80-bit values - // layout is critical for correct loading by FPU. - // Bias for strict fp multiply/divide - StubRoutines::x86::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 - StubRoutines::x86::_fpu_subnormal_bias1[1]= 0x80000000; - StubRoutines::x86::_fpu_subnormal_bias1[2]= 0x03ff; - // Un-Bias for strict fp multiply/divide - StubRoutines::x86::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 - StubRoutines::x86::_fpu_subnormal_bias2[1]= 0x80000000; - StubRoutines::x86::_fpu_subnormal_bias2[2]= 0x7bff; - } - - address generate_cont_thaw() { - if (!Continuations::enabled()) return nullptr; - Unimplemented(); - return nullptr; - } - - address generate_cont_returnBarrier() { - if (!Continuations::enabled()) return nullptr; - Unimplemented(); - return nullptr; - } - - address generate_cont_returnBarrier_exception() { - if (!Continuations::enabled()) return nullptr; - Unimplemented(); - return nullptr; - } - - //--------------------------------------------------------------------------- - // Initialization - - void generate_initial_stubs() { - // Generates all stubs and initializes the entry points - - //------------------------------------------------------------------------------------------------------------------------ - // entry points that exist in all platforms - // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than - // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. - StubRoutines::_forward_exception_entry = generate_forward_exception(); - - StubRoutines::_call_stub_entry = - generate_call_stub(StubRoutines::_call_stub_return_address); - // is referenced by megamorphic call - StubRoutines::_catch_exception_entry = generate_catch_exception(); - - // platform dependent - create_control_words(); - - // Initialize table for copy memory (arraycopy) check. - if (UnsafeMemoryAccess::_table == nullptr) { - UnsafeMemoryAccess::create_table(16 + 4); // 16 for copyMemory; 4 for setMemory - } - - StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); - StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd(); - StubRoutines::x86::_d2i_wrapper = generate_d2i_wrapper(T_INT, CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); - StubRoutines::x86::_d2l_wrapper = generate_d2i_wrapper(T_LONG, CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); - - if (UseCRC32Intrinsics) { - // set table address before stub generation which use it - StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; - StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); - } - - if (UseCRC32CIntrinsics) { - bool supports_clmul = VM_Version::supports_clmul(); - StubRoutines::x86::generate_CRC32C_table(supports_clmul); - StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table; - StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul); - } - if (VM_Version::supports_sse2() && UseLibmIntrinsic && InlineIntrinsics) { - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { - StubRoutines::_dexp = generate_libmExp(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) { - StubRoutines::_dlog = generate_libmLog(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) { - StubRoutines::_dlog10 = generate_libmLog10(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) { - StubRoutines::_dpow = generate_libmPow(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || - vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) || - vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { - StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || - vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { - StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { - StubRoutines::_dsin = generate_libmSin(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { - StubRoutines::_dcos = generate_libmCos(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { - StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge(); - StubRoutines::_dtan = generate_libmTan(); - } - } - } - - void generate_continuation_stubs() { - // Continuation stubs: - StubRoutines::_cont_thaw = generate_cont_thaw(); - StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier(); - StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception(); - } - - void generate_final_stubs() { - // Generates all stubs and initializes the entry points - - // support for verify_oop (must happen after universe_init) - StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); - - // arraycopy stubs used by compilers - generate_arraycopy_stubs(); - - StubRoutines::_method_entry_barrier = generate_method_entry_barrier(); - } - - void generate_compiler_stubs() { -#if COMPILER2_OR_JVMCI - - // entry points that are C2/JVMCI specific - - StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask(StubGenStubId::vector_float_sign_mask_id, 0x7FFFFFFF); - StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask(StubGenStubId::vector_float_sign_flip_id, 0x80000000); - StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double(StubGenStubId::vector_double_sign_mask_id, 0x7FFFFFFF, 0xFFFFFFFF); - StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double(StubGenStubId::vector_double_sign_flip_id, 0x80000000, 0x00000000); - StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask(StubGenStubId::vector_short_to_byte_mask_id, 0x00ff00ff); - StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask(StubGenStubId::vector_int_to_byte_mask_id, 0x000000ff); - StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask(StubGenStubId::vector_int_to_short_mask_id, 0x0000ffff); - StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32(StubGenStubId::vector_32_bit_mask_id, Assembler::AVX_512bit, - 0xFFFFFFFF, 0, 0, 0); - StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32(StubGenStubId::vector_64_bit_mask_id, Assembler::AVX_512bit, - 0xFFFFFFFF, 0xFFFFFFFF, 0, 0); - StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask(StubGenStubId::vector_int_shuffle_mask_id, 0x03020100); - StubRoutines::x86::_vector_byte_shuffle_mask = generate_vector_byte_shuffle_mask(); - StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask(StubGenStubId::vector_short_shuffle_mask_id, 0x01000100); - StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask_long_double(StubGenStubId::vector_long_shuffle_mask_id, 0x00000001, 0x0); - StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask(); - StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double(StubGenStubId::vector_long_sign_mask_id, 0x80000000, 0x00000000); - StubRoutines::x86::_vector_all_bits_set = generate_vector_mask(StubGenStubId::vector_all_bits_set_id, 0xFFFFFFFF); - StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask(StubGenStubId::vector_int_mask_cmp_bits_id, 0x00000001); - StubRoutines::x86::_vector_iota_indices = generate_iota_indices(); - StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut(); - StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut(); - StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long(); - StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int(); - StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short(); - - if (VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) { - // lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight. - StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut(); - } - - // don't bother generating these AES intrinsic stubs unless global flag is set - if (UseAESIntrinsics) { - StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); - StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); - StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); - StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); - } - - if (UseAESCTRIntrinsics) { - StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); - } - - if (UseMD5Intrinsics) { - StubRoutines::_md5_implCompress = generate_md5_implCompress(StubGenStubId::md5_implCompress_id); - StubRoutines::_md5_implCompressMB = generate_md5_implCompress(StubGenStubId::md5_implCompressMB_id); - } - if (UseSHA1Intrinsics) { - StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); - StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); - StubRoutines::_sha1_implCompress = generate_sha1_implCompress(StubGenStubId::sha1_implCompress_id); - StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(StubGenStubId::sha1_implCompressMB_id); - } - if (UseSHA256Intrinsics) { - StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256; - StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); - StubRoutines::_sha256_implCompress = generate_sha256_implCompress(StubGenStubId::sha256_implCompress_id); - StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(StubGenStubId::sha256_implCompressMB_id); - } - - // Generate GHASH intrinsics code - if (UseGHASHIntrinsics) { - StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); - } -#endif // COMPILER2_OR_JVMCI - } - - - public: - StubGenerator(CodeBuffer* code, StubGenBlobId blob_id) : StubCodeGenerator(code, blob_id) { - switch(blob_id) { - case initial_id: - generate_initial_stubs(); - break; - case continuation_id: - generate_continuation_stubs(); - break; - case compiler_id: - generate_compiler_stubs(); - break; - case final_id: - generate_final_stubs(); - break; - default: - fatal("unexpected blob id: %d", blob_id); - break; - }; - } -}; // end class declaration - -void StubGenerator_generate(CodeBuffer* code, StubGenBlobId blob_id) { - StubGenerator g(code, blob_id); -} diff --git a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp b/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp deleted file mode 100644 index 810f421f141..00000000000 --- a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "runtime/deoptimization.hpp" -#include "runtime/frame.inline.hpp" -#include "runtime/javaThread.hpp" -#include "runtime/stubRoutines.hpp" - -// Implementation of the platform-specific part of StubRoutines - for -// a description of how to extend it, see the stubRoutines.hpp file. - -jint StubRoutines::x86::_fpu_cntrl_wrd_std = 0; -jint StubRoutines::x86::_fpu_cntrl_wrd_24 = 0; -jint StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0; - -jint StubRoutines::x86::_mxcsr_std = 0; - -jint StubRoutines::x86::_fpu_subnormal_bias1[3] = { 0, 0, 0 }; -jint StubRoutines::x86::_fpu_subnormal_bias2[3] = { 0, 0, 0 }; - diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp deleted file mode 100644 index df8633bdd15..00000000000 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp +++ /dev/null @@ -1,509 +0,0 @@ -/* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "asm/macroAssembler.hpp" -#include "compiler/disassembler.hpp" -#include "interpreter/interp_masm.hpp" -#include "interpreter/interpreter.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "interpreter/templateInterpreterGenerator.hpp" -#include "runtime/arguments.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" - -#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> - - -address TemplateInterpreterGenerator::generate_slow_signature_handler() { - address entry = __ pc(); - // rbx,: method - // rcx: temporary - // rdi: pointer to locals - // rsp: end of copied parameters area - __ mov(rcx, rsp); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), rbx, rdi, rcx); - __ ret(0); - return entry; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.update(int crc, int b) - */ -address TemplateInterpreterGenerator::generate_CRC32_update_entry() { - assert(UseCRC32Intrinsics, "this intrinsic is not supported"); - address entry = __ pc(); - - // rbx: Method* - // rsi: senderSP must preserved for slow path, set SP to it on fast path - // rdx: scratch - // rdi: scratch - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - __ get_thread(rdi); - __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = rax; // crc - const Register val = rdx; // source java byte value - const Register tbl = rdi; // scratch - - // Arguments are reversed on java expression stack - __ movl(val, Address(rsp, wordSize)); // byte value - __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC - - __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); - __ notl(crc); // ~crc - __ update_byte_crc32(crc, val, tbl); - __ notl(crc); // ~crc - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - */ -address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - assert(UseCRC32Intrinsics, "this intrinsic is not supported"); - address entry = __ pc(); - - // rbx,: Method* - // rsi: senderSP must preserved for slow path, set SP to it on fast path - // rdx: scratch - // rdi: scratch - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - __ get_thread(rdi); - __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = rax; // crc - const Register buf = rdx; // source java byte array address - const Register len = rdi; // length - - // value x86_32 - // interp. arg ptr ESP + 4 - // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - // 3 2 1 0 - // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - // 4 2,3 1 0 - - // Arguments are reversed on java expression stack - __ movl(len, Address(rsp, 4 + 0)); // Length - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC - } else { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC - } - - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; -} - -/** -* Method entry for static native methods: -* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) -* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) -*/ -address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - assert(UseCRC32CIntrinsics, "this intrinsic is not supported"); - address entry = __ pc(); - // Load parameters - const Register crc = rax; // crc - const Register buf = rcx; // source java byte array address - const Register len = rdx; // length - const Register end = len; - - // value x86_32 - // interp. arg ptr ESP + 4 - // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end) - // 3 2 1 0 - // int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end) - // 4 2,3 1 0 - - // Arguments are reversed on java expression stack - __ movl(end, Address(rsp, 4 + 0)); // end - __ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC - } else { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC - } - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); - // result in rax - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry; -} - -/** - * Method entry for static native method: - * java.lang.Float.intBitsToFloat(int bits) - */ -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { - if (UseSSE >= 1) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load 'bits' into xmm0 (interpreter returns results in xmm0) - __ movflt(xmm0, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - -/** - * Method entry for static native method: - * java.lang.Float.floatToRawIntBits(float value) - */ -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { - if (UseSSE >= 1) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load the parameter (a floating-point value) into rax. - __ movl(rax, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - - -/** - * Method entry for static native method: - * java.lang.Double.longBitsToDouble(long bits) - */ -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { - if (UseSSE >= 2) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load 'bits' into xmm0 (interpreter returns results in xmm0) - __ movdbl(xmm0, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - -/** - * Method entry for static native method: - * java.lang.Double.doubleToRawLongBits(double value) - */ -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { - if (UseSSE >= 2) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load the parameter (a floating-point value) into rax. - __ movl(rdx, Address(rsp, 2*wordSize)); - __ movl(rax, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - -/** - * Method entry for static method: - * java.lang.Float.float16ToFloat(short floatBinary16) - */ -address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { - assert(VM_Version::supports_float16(), "this intrinsic is not supported"); - address entry = __ pc(); - - // rsi: the sender's SP - - // Load value into xmm0 and convert - __ movswl(rax, Address(rsp, wordSize)); - __ flt16_to_flt(xmm0, rax); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; -} - -/** - * Method entry for static method: - * java.lang.Float.floatToFloat16(float value) - */ -address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { - assert(VM_Version::supports_float16(), "this intrinsic is not supported"); - address entry = __ pc(); - - // rsi: the sender's SP - - // Load value into xmm0, convert and put result into rax - __ movflt(xmm0, Address(rsp, wordSize)); - __ flt_to_flt16(rax, xmm0, xmm1); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; -} - -address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { - - // rbx,: Method* - // rcx: scratrch - // rsi: sender sp - - address entry_point = __ pc(); - - // These don't need a safepoint check because they aren't virtually - // callable. We won't enter these intrinsics from compiled code. - // If in the future we added an intrinsic which was virtually callable - // we'd have to worry about how to safepoint so that this code is used. - - // mathematical functions inlined by compiler - // (interpreter must provide identical implementation - // in order to avoid monotonicity bugs when switching - // from interpreter to compiler in the middle of some - // computation) - // - // stack: [ ret adr ] <-- rsp - // [ lo(arg) ] - // [ hi(arg) ] - // - if (kind == Interpreter::java_lang_math_tanh) { - return nullptr; - } - - if (kind == Interpreter::java_lang_math_fmaD) { - if (!UseFMA) { - return nullptr; // Generate a vanilla entry - } - __ movdbl(xmm2, Address(rsp, 5 * wordSize)); - __ movdbl(xmm1, Address(rsp, 3 * wordSize)); - __ movdbl(xmm0, Address(rsp, 1 * wordSize)); - __ fmad(xmm0, xmm1, xmm2, xmm0); - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry_point; - } else if (kind == Interpreter::java_lang_math_fmaF) { - if (!UseFMA) { - return nullptr; // Generate a vanilla entry - } - __ movflt(xmm2, Address(rsp, 3 * wordSize)); - __ movflt(xmm1, Address(rsp, 2 * wordSize)); - __ movflt(xmm0, Address(rsp, 1 * wordSize)); - __ fmaf(xmm0, xmm1, xmm2, xmm0); - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry_point; - } - - __ fld_d(Address(rsp, 1*wordSize)); - switch (kind) { - case Interpreter::java_lang_math_sin : - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (VM_Version::supports_sse2() && StubRoutines::dsin() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_cos : - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (VM_Version::supports_sse2() && StubRoutines::dcos() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_tan : - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dtan() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_sqrt: - __ fsqrt(); - break; - case Interpreter::java_lang_math_abs: - __ fabs(); - break; - case Interpreter::java_lang_math_log: - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dlog() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_log10: - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dlog10() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_pow: - __ fld_d(Address(rsp, 3*wordSize)); // second argument - __ subptr(rsp, 4 * wordSize); - __ fstp_d(Address(rsp, 0)); - __ fstp_d(Address(rsp, 2 * wordSize)); - if (StubRoutines::dpow() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); - } - __ addptr(rsp, 4 * wordSize); - break; - case Interpreter::java_lang_math_exp: - __ subptr(rsp, 2*wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dexp() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); - } - __ addptr(rsp, 2*wordSize); - break; - default : - ShouldNotReachHere(); - } - - // return double result in xmm0 for interpreter and compilers. - if (UseSSE >= 2) { - __ subptr(rsp, 2*wordSize); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ addptr(rsp, 2*wordSize); - } - - // done, result in FPU ST(0) or XMM0 - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry_point; -} - -// Not supported -address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; } - diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp deleted file mode 100644 index 6dd2ddd5874..00000000000 --- a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "prims/upcallLinker.hpp" - -address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, - BasicType* out_sig_bt, int total_out_args, - BasicType ret_type, - jobject jabi, jobject jconv, - bool needs_return_buffer, int ret_buf_size) { - ShouldNotCallThis(); - return nullptr; -} diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp deleted file mode 100644 index 3e70a45b58b..00000000000 --- a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "asm/macroAssembler.hpp" -#include "code/compiledIC.hpp" -#include "code/vtableStubs.hpp" -#include "interp_masm_x86.hpp" -#include "memory/resourceArea.hpp" -#include "oops/instanceKlass.hpp" -#include "oops/klassVtable.hpp" -#include "runtime/sharedRuntime.hpp" -#include "vmreg_x86.inline.hpp" -#ifdef COMPILER2 -#include "opto/runtime.hpp" -#endif - -// machine-dependent part of VtableStubs: create VtableStub of correct size and -// initialize its code - -#define __ masm-> - -#ifndef PRODUCT -extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); -#endif - -// These stubs are used by the compiler only. -// Argument registers, which must be preserved: -// rcx - receiver (always first argument) -// rdx - second argument (if any) -// Other registers that might be usable: -// rax - inline cache register (is interface for itable stub) -// rbx - method (used when calling out to interpreter) -// Available now, but may become callee-save at some point: -// rsi, rdi -// Note that rax and rdx are also used for return values. - -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. - const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); - // Can be null if there is no free space in the code cache. - if (s == nullptr) { - return nullptr; - } - - // Count unused bytes in instruction sequences of variable size. - // We add them to the computed buffer size in order to avoid - // overflow in subsequently generated stubs. - address start_pc; - int slop_bytes = 0; - int slop_delta = 0; - // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. - const int index_dependent_slop = 0; - - ResourceMark rm; - CodeBuffer cb(s->entry_point(), stub_code_length); - MacroAssembler* masm = new MacroAssembler(&cb); - -#if (!defined(PRODUCT) && defined(COMPILER2)) - if (CountCompiledCalls) { - __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); - } -#endif - - // get receiver (need to skip return address on top of stack) - assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); - - // get receiver klass - address npe_addr = __ pc(); - __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); - -#ifndef PRODUCT - if (DebugVtables) { - Label L; - start_pc = __ pc(); - // check offset vs vtable length - __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size()); - slop_delta = 10 - (__ pc() - start_pc); // cmpl varies in length, depending on data - slop_bytes += slop_delta; - assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); - - __ jcc(Assembler::greater, L); - __ movl(rbx, vtable_index); - // VTABLE TODO: find upper bound for call_VM length. - start_pc = __ pc(); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx); - slop_delta = 500 - (__ pc() - start_pc); - slop_bytes += slop_delta; - assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); - __ bind(L); - } -#endif // PRODUCT - - const Register method = rbx; - - // load Method* and target address - start_pc = __ pc(); - __ lookup_virtual_method(rax, vtable_index, method); - slop_delta = 6 - (int)(__ pc() - start_pc); - slop_bytes += slop_delta; - assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); - -#ifndef PRODUCT - if (DebugVtables) { - Label L; - __ cmpptr(method, NULL_WORD); - __ jcc(Assembler::equal, L); - __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD); - __ jcc(Assembler::notZero, L); - __ stop("Vtable entry is null"); - __ bind(L); - } -#endif // PRODUCT - - // rax: receiver klass - // method (rbx): Method* - // rcx: receiver - address ame_addr = __ pc(); - __ jmp( Address(method, Method::from_compiled_offset())); - - masm->flush(); - slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets - bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); - - return s; -} - - -VtableStub* VtableStubs::create_itable_stub(int itable_index) { - // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. - const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); - // Can be null if there is no free space in the code cache. - if (s == nullptr) { - return nullptr; - } - // Count unused bytes in instruction sequences of variable size. - // We add them to the computed buffer size in order to avoid - // overflow in subsequently generated stubs. - address start_pc; - int slop_bytes = 0; - int slop_delta = 0; - const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32). - (itable_index < 32) ? 3 : 0; // index == 0 generates even shorter code. - - ResourceMark rm; - CodeBuffer cb(s->entry_point(), stub_code_length); - MacroAssembler* masm = new MacroAssembler(&cb); - -#if (!defined(PRODUCT) && defined(COMPILER2)) - if (CountCompiledCalls) { - __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); - } -#endif /* PRODUCT */ - - // Entry arguments: - // rax: CompiledICData - // rcx: Receiver - - // Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi - // (If we need to make rsi, rdi callee-save, do a push/pop here.) - const Register recv_klass_reg = rsi; - const Register holder_klass_reg = rax; // declaring interface klass (DEFC) - const Register resolved_klass_reg = rdi; // resolved interface klass (REFC) - const Register temp_reg = rdx; - const Register method = rbx; - const Register icdata_reg = rax; - const Register receiver = rcx; - - __ movptr(resolved_klass_reg, Address(icdata_reg, CompiledICData::itable_refc_klass_offset())); - __ movptr(holder_klass_reg, Address(icdata_reg, CompiledICData::itable_defc_klass_offset())); - - Label L_no_such_interface; - - // get receiver klass (also an implicit null-check) - assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); - address npe_addr = __ pc(); - __ load_klass(recv_klass_reg, rcx, noreg); - - start_pc = __ pc(); - __ push(rdx); // temp_reg - - // Receiver subtype check against REFC. - // Get selected method from declaring class and itable index - __ lookup_interface_method_stub(recv_klass_reg, // input - holder_klass_reg, // input - resolved_klass_reg, // input - method, // output - temp_reg, - noreg, - receiver, // input (x86_32 only: to restore recv_klass value) - itable_index, - L_no_such_interface); - const ptrdiff_t lookupSize = __ pc() - start_pc; - - // We expect we need index_dependent_slop extra bytes. Reason: - // The emitted code in lookup_interface_method changes when itable_index exceeds 31. - // For windows, a narrow estimate was found to be 104. Other OSes not tested. - const ptrdiff_t estimate = 104; - const ptrdiff_t codesize = lookupSize + index_dependent_slop; - slop_delta = (int)(estimate - codesize); - slop_bytes += slop_delta; - assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); - - // method (rbx): Method* - // rcx: receiver - -#ifdef ASSERT - if (DebugVtables) { - Label L1; - __ cmpptr(method, NULL_WORD); - __ jcc(Assembler::equal, L1); - __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD); - __ jcc(Assembler::notZero, L1); - __ stop("Method* is null"); - __ bind(L1); - } -#endif // ASSERT - - __ pop(rdx); - address ame_addr = __ pc(); - __ jmp(Address(method, Method::from_compiled_offset())); - - __ bind(L_no_such_interface); - // Handle IncompatibleClassChangeError in itable stubs. - // More detailed error message. - // We force resolving of the call site by jumping to the "handle - // wrong method" stub, and so let the interpreter runtime do all the - // dirty work. - __ pop(rdx); - __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - - masm->flush(); - slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets - bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop); - - return s; -} - -int VtableStub::pd_code_alignment() { - // x86 cache line size is 64 bytes, but we want to limit alignment loss. - const unsigned int icache_line_size = wordSize; - return icache_line_size; -} diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad deleted file mode 100644 index 0b8dee7392a..00000000000 --- a/src/hotspot/cpu/x86/x86_32.ad +++ /dev/null @@ -1,13702 +0,0 @@ -// -// Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// X86 Architecture Description File - -//----------REGISTER DEFINITION BLOCK------------------------------------------ -// This information is used by the matcher and the register allocator to -// describe individual registers and classes of registers within the target -// architecture. - -register %{ -//----------Architecture Description Register Definitions---------------------- -// General Registers -// "reg_def" name ( register save type, C convention save type, -// ideal register type, encoding ); -// Register Save Types: -// -// NS = No-Save: The register allocator assumes that these registers -// can be used without saving upon entry to the method, & -// that they do not need to be saved at call sites. -// -// SOC = Save-On-Call: The register allocator assumes that these registers -// can be used without saving upon entry to the method, -// but that they must be saved at call sites. -// -// SOE = Save-On-Entry: The register allocator assumes that these registers -// must be saved before using them upon entry to the -// method, but they do not need to be saved at call -// sites. -// -// AS = Always-Save: The register allocator assumes that these registers -// must be saved before using them upon entry to the -// method, & that they must be saved at call sites. -// -// Ideal Register Type is used to determine how to save & restore a -// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get -// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. -// -// The encoding number is the actual bit-pattern placed into the opcodes. - -// General Registers -// Previously set EBX, ESI, and EDI as save-on-entry for java code -// Turn off SOE in java-code due to frequent use of uncommon-traps. -// Now that allocator is better, turn on ESI and EDI as SOE registers. - -reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); -reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); -reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); -reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); -// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code -reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); -reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); -reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); -reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); - -// Float registers. We treat TOS/FPR0 special. It is invisible to the -// allocator, and only shows up in the encodings. -reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); -reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); -// Ok so here's the trick FPR1 is really st(0) except in the midst -// of emission of assembly for a machnode. During the emission the fpu stack -// is pushed making FPR1 == st(1) temporarily. However at any safepoint -// the stack will not have this element so FPR1 == st(0) from the -// oopMap viewpoint. This same weirdness with numbering causes -// instruction encoding to have to play games with the register -// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation -// where it does flt->flt moves to see an example -// -reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); -reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); -reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); -reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); -reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); -reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); -reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); -reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); -reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); -reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); -reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); -reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); -reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); -reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); -// -// Empty fill registers, which are never used, but supply alignment to xmm regs -// -reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); -reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); -reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); -reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); -reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); -reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); -reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); -reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); - -// Specify priority of register selection within phases of register -// allocation. Highest priority is first. A useful heuristic is to -// give registers a low priority when they are required by machine -// instructions, like EAX and EDX. Registers which are used as -// pairs must fall on an even boundary (witness the FPR#L's in this list). -// For the Intel integer registers, the equivalent Long pairs are -// EDX:EAX, EBX:ECX, and EDI:EBP. -alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, - FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, - FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, - FPR6L, FPR6H, FPR7L, FPR7H, - FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); - - -//----------Architecture Description Register Classes-------------------------- -// Several register classes are automatically defined based upon information in -// this architecture description. -// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) -// -// Class for no registers (empty set). -reg_class no_reg(); - -// Class for all registers -reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); -// Class for all registers (excluding EBP) -reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); -// Dynamic register class that selects at runtime between register classes -// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). -// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; -reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class for general registers -reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); -// Class for general registers (excluding EBP). -// It is also safe for use by tailjumps (we don't want to allocate in ebp). -// Used also if the PreserveFramePointer flag is true. -reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); -// Dynamic register class that selects between int_reg and int_reg_no_ebp. -reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class of "X" registers -reg_class int_x_reg(EBX, ECX, EDX, EAX); - -// Class of registers that can appear in an address with no offset. -// EBP and ESP require an extra instruction byte for zero offset. -// Used in fast-unlock -reg_class p_reg(EDX, EDI, ESI, EBX); - -// Class for general registers excluding ECX -reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); -// Class for general registers excluding ECX (and EBP) -reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); -// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. -reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class for general registers excluding EAX -reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); - -// Class for general registers excluding EAX and EBX. -reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); -// Class for general registers excluding EAX and EBX (and EBP) -reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); -// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. -reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class of EAX (for multiply and divide operations) -reg_class eax_reg(EAX); - -// Class of EBX (for atomic add) -reg_class ebx_reg(EBX); - -// Class of ECX (for shift and JCXZ operations and cmpLTMask) -reg_class ecx_reg(ECX); - -// Class of EDX (for multiply and divide operations) -reg_class edx_reg(EDX); - -// Class of EDI (for synchronization) -reg_class edi_reg(EDI); - -// Class of ESI (for synchronization) -reg_class esi_reg(ESI); - -// Singleton class for stack pointer -reg_class sp_reg(ESP); - -// Singleton class for instruction pointer -// reg_class ip_reg(EIP); - -// Class of integer register pairs -reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); -// Class of integer register pairs (excluding EBP and EDI); -reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); -// Dynamic register class that selects between long_reg and long_reg_no_ebp. -reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class of integer register pairs that aligns with calling convention -reg_class eadx_reg( EAX,EDX ); -reg_class ebcx_reg( ECX,EBX ); -reg_class ebpd_reg( EBP,EDI ); - -// Not AX or DX, used in divides -reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); -// Not AX or DX (and neither EBP), used in divides -reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); -// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. -reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); - -// Floating point registers. Notice FPR0 is not a choice. -// FPR0 is not ever allocated; we use clever encodings to fake -// a 2-address instructions out of Intels FP stack. -reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); - -reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, - FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, - FPR7L,FPR7H ); - -reg_class fp_flt_reg0( FPR1L ); -reg_class fp_dbl_reg0( FPR1L,FPR1H ); -reg_class fp_dbl_reg1( FPR2L,FPR2H ); -reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, - FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); - -%} - - -//----------SOURCE BLOCK------------------------------------------------------- -// This is a block of C++ code which provides values, functions, and -// definitions necessary in the rest of the architecture description -source_hpp %{ -// Must be visible to the DFA in dfa_x86_32.cpp -extern bool is_operand_hi32_zero(Node* n); -%} - -source %{ -#define RELOC_IMM32 Assembler::imm_operand -#define RELOC_DISP32 Assembler::disp32_operand - -#define __ masm-> - -// How to find the high register of a Long pair, given the low register -#define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) -#define HIGH_FROM_LOW_ENC(x) ((x)+2) - -// These masks are used to provide 128-bit aligned bitmasks to the XMM -// instructions, to allow sign-masking or sign-bit flipping. They allow -// fast versions of NegF/NegD and AbsF/AbsD. - -void reg_mask_init() {} - -// Note: 'double' and 'long long' have 32-bits alignment on x86. -static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { - // Use the expression (adr)&(~0xF) to provide 128-bits aligned address - // of 128-bits operands for SSE instructions. - jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); - // Store the value to a 128-bits operand. - operand[0] = lo; - operand[1] = hi; - return operand; -} - -// Buffer for 128-bits masks used by SSE instructions. -static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) - -// Static initialization during VM startup. -static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); -static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); -static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); -static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); - -// Offset hacking within calls. -static int pre_call_resets_size() { - int size = 0; - Compile* C = Compile::current(); - if (C->in_24_bit_fp_mode()) { - size += 6; // fldcw - } - if (VM_Version::supports_vzeroupper()) { - size += 3; // vzeroupper - } - return size; -} - -// !!!!! Special hack to get all type of calls to specify the byte offset -// from the start of the call to the point where the return address -// will point. -int MachCallStaticJavaNode::ret_addr_offset() { - return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points -} - -int MachCallDynamicJavaNode::ret_addr_offset() { - return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points -} - -static int sizeof_FFree_Float_Stack_All = -1; - -int MachCallRuntimeNode::ret_addr_offset() { - assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); - return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); -} - -// -// Compute padding required for nodes which need alignment -// - -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallStaticJavaDirectNode::compute_padding(int current_offset) const { - current_offset += pre_call_resets_size(); // skip fldcw, if any - current_offset += 1; // skip call opcode byte - return align_up(current_offset, alignment_required()) - current_offset; -} - -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { - current_offset += pre_call_resets_size(); // skip fldcw, if any - current_offset += 5; // skip MOV instruction - current_offset += 1; // skip call opcode byte - return align_up(current_offset, alignment_required()) - current_offset; -} - -// EMIT_RM() -void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) { - unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); - __ emit_int8(c); -} - -// EMIT_CC() -void emit_cc(C2_MacroAssembler *masm, int f1, int f2) { - unsigned char c = (unsigned char)( f1 | f2 ); - __ emit_int8(c); -} - -// EMIT_OPCODE() -void emit_opcode(C2_MacroAssembler *masm, int code) { - __ emit_int8((unsigned char) code); -} - -// EMIT_OPCODE() w/ relocation information -void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) { - __ relocate(__ inst_mark() + offset, reloc); - emit_opcode(masm, code); -} - -// EMIT_D8() -void emit_d8(C2_MacroAssembler *masm, int d8) { - __ emit_int8((unsigned char) d8); -} - -// EMIT_D16() -void emit_d16(C2_MacroAssembler *masm, int d16) { - __ emit_int16(d16); -} - -// EMIT_D32() -void emit_d32(C2_MacroAssembler *masm, int d32) { - __ emit_int32(d32); -} - -// emit 32 bit value and construct relocation entry from relocInfo::relocType -void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc, - int format) { - __ relocate(__ inst_mark(), reloc, format); - __ emit_int32(d32); -} - -// emit 32 bit value and construct relocation entry from RelocationHolder -void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec, - int format) { -#ifdef ASSERT - if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { - assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); - } -#endif - __ relocate(__ inst_mark(), rspec, format); - __ emit_int32(d32); -} - -// Access stack slot for load or store -void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) { - emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src]) - if( -128 <= disp && disp <= 127 ) { - emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte - emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte - emit_d8 (masm, disp); // Displacement // R/M byte - } else { - emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte - emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte - emit_d32(masm, disp); // Displacement // R/M byte - } -} - - // rRegI ereg, memory mem) %{ // emit_reg_mem -void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { - // There is no index & no scale, use form without SIB byte - if ((index == 0x4) && - (scale == 0) && (base != ESP_enc)) { - // If no displacement, mode is 0x0; unless base is [EBP] - if ( (displace == 0) && (base != EBP_enc) ) { - emit_rm(masm, 0x0, reg_encoding, base); - } - else { // If 8-bit displacement, mode 0x1 - if ((displace >= -128) && (displace <= 127) - && (disp_reloc == relocInfo::none) ) { - emit_rm(masm, 0x1, reg_encoding, base); - emit_d8(masm, displace); - } - else { // If 32-bit displacement - if (base == -1) { // Special flag for absolute address - emit_rm(masm, 0x0, reg_encoding, 0x5); - // (manual lies; no SIB needed here) - if ( disp_reloc != relocInfo::none ) { - emit_d32_reloc(masm, displace, disp_reloc, 1); - } else { - emit_d32 (masm, displace); - } - } - else { // Normal base + offset - emit_rm(masm, 0x2, reg_encoding, base); - if ( disp_reloc != relocInfo::none ) { - emit_d32_reloc(masm, displace, disp_reloc, 1); - } else { - emit_d32 (masm, displace); - } - } - } - } - } - else { // Else, encode with the SIB byte - // If no displacement, mode is 0x0; unless base is [EBP] - if (displace == 0 && (base != EBP_enc)) { // If no displacement - emit_rm(masm, 0x0, reg_encoding, 0x4); - emit_rm(masm, scale, index, base); - } - else { // If 8-bit displacement, mode 0x1 - if ((displace >= -128) && (displace <= 127) - && (disp_reloc == relocInfo::none) ) { - emit_rm(masm, 0x1, reg_encoding, 0x4); - emit_rm(masm, scale, index, base); - emit_d8(masm, displace); - } - else { // If 32-bit displacement - if (base == 0x04 ) { - emit_rm(masm, 0x2, reg_encoding, 0x4); - emit_rm(masm, scale, index, 0x04); - } else { - emit_rm(masm, 0x2, reg_encoding, 0x4); - emit_rm(masm, scale, index, base); - } - if ( disp_reloc != relocInfo::none ) { - emit_d32_reloc(masm, displace, disp_reloc, 1); - } else { - emit_d32 (masm, displace); - } - } - } - } -} - - -void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) { - if( dst_encoding == src_encoding ) { - // reg-reg copy, use an empty encoding - } else { - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, dst_encoding, src_encoding ); - } -} - -void emit_cmpfp_fixup(MacroAssembler* masm) { - Label exit; - __ jccb(Assembler::noParity, exit); - __ pushf(); - // - // comiss/ucomiss instructions set ZF,PF,CF flags and - // zero OF,AF,SF for NaN values. - // Fixup flags by zeroing ZF,PF so that compare of NaN - // values returns 'less than' result (CF is set). - // Leave the rest of flags unchanged. - // - // 7 6 5 4 3 2 1 0 - // |S|Z|r|A|r|P|r|C| (r - reserved bit) - // 0 0 1 0 1 0 1 1 (0x2B) - // - __ andl(Address(rsp, 0), 0xffffff2b); - __ popf(); - __ bind(exit); -} - -static void emit_cmpfp3(MacroAssembler* masm, Register dst) { - Label done; - __ movl(dst, -1); - __ jcc(Assembler::parity, done); - __ jcc(Assembler::below, done); - __ setb(Assembler::notEqual, dst); - __ movzbl(dst, dst); - __ bind(done); -} - - -//============================================================================= -const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; - -int ConstantTable::calculate_table_base_offset() const { - return 0; // absolute addressing, no offset -} - -bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } -void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { - ShouldNotReachHere(); -} - -void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const { - // Empty encoding -} - -uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { - return 0; -} - -#ifndef PRODUCT -void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { - st->print("# MachConstantBaseNode (empty encoding)"); -} -#endif - - -//============================================================================= -#ifndef PRODUCT -void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { - Compile* C = ra_->C; - - int framesize = C->output()->frame_size_in_bytes(); - int bangsize = C->output()->bang_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove wordSize for return addr which is already pushed. - framesize -= wordSize; - - if (C->output()->need_stack_bang(bangsize)) { - framesize -= wordSize; - st->print("# stack bang (%d bytes)", bangsize); - st->print("\n\t"); - st->print("PUSH EBP\t# Save EBP"); - if (PreserveFramePointer) { - st->print("\n\t"); - st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); - } - if (framesize) { - st->print("\n\t"); - st->print("SUB ESP, #%d\t# Create frame",framesize); - } - } else { - st->print("SUB ESP, #%d\t# Create frame",framesize); - st->print("\n\t"); - framesize -= wordSize; - st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); - if (PreserveFramePointer) { - st->print("\n\t"); - st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); - if (framesize > 0) { - st->print("\n\t"); - st->print("ADD EBP, #%d", framesize); - } - } - } - - if (VerifyStackAtCalls) { - st->print("\n\t"); - framesize -= wordSize; - st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); - } - - if( C->in_24_bit_fp_mode() ) { - st->print("\n\t"); - st->print("FLDCW \t# load 24 bit fpu control word"); - } - if (UseSSE >= 2 && VerifyFPU) { - st->print("\n\t"); - st->print("# verify FPU stack (must be clean on entry)"); - } - -#ifdef ASSERT - if (VerifyStackAtCalls) { - st->print("\n\t"); - st->print("# stack alignment check"); - } -#endif - st->cr(); -} -#endif - - -void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - Compile* C = ra_->C; - - int framesize = C->output()->frame_size_in_bytes(); - int bangsize = C->output()->bang_size_in_bytes(); - - __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr); - - C->output()->set_frame_complete(__ offset()); - - if (C->has_mach_constant_base_node()) { - // NOTE: We set the table base offset here because users might be - // emitted before MachConstantBaseNode. - ConstantTable& constant_table = C->output()->constant_table(); - constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); - } -} - -uint MachPrologNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); // too many variables; just compute it the hard way -} - -int MachPrologNode::reloc() const { - return 0; // a large enough number -} - -//============================================================================= -#ifndef PRODUCT -void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { - Compile *C = ra_->C; - int framesize = C->output()->frame_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove two words for return addr and rbp, - framesize -= 2*wordSize; - - if (C->max_vector_size() > 16) { - st->print("VZEROUPPER"); - st->cr(); st->print("\t"); - } - if (C->in_24_bit_fp_mode()) { - st->print("FLDCW standard control word"); - st->cr(); st->print("\t"); - } - if (framesize) { - st->print("ADD ESP,%d\t# Destroy frame",framesize); - st->cr(); st->print("\t"); - } - st->print_cr("POPL EBP"); st->print("\t"); - if (do_polling() && C->is_method_compilation()) { - st->print("CMPL rsp, poll_offset[thread] \n\t" - "JA #safepoint_stub\t" - "# Safepoint: poll for GC"); - } -} -#endif - -void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - Compile *C = ra_->C; - - if (C->max_vector_size() > 16) { - // Clear upper bits of YMM registers when current compiled code uses - // wide vectors to avoid AVX <-> SSE transition penalty during call. - __ vzeroupper(); - } - // If method set FPU control word, restore to standard control word - if (C->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - - int framesize = C->output()->frame_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove two words for return addr and rbp, - framesize -= 2*wordSize; - - // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here - - if (framesize >= 128) { - emit_opcode(masm, 0x81); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d32(masm, framesize); - } else if (framesize) { - emit_opcode(masm, 0x83); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, framesize); - } - - emit_opcode(masm, 0x58 | EBP_enc); - - if (StackReservedPages > 0 && C->has_reserved_stack_access()) { - __ reserved_stack_check(); - } - - if (do_polling() && C->is_method_compilation()) { - Register thread = as_Register(EBX_enc); - __ get_thread(thread); - Label dummy_label; - Label* code_stub = &dummy_label; - if (!C->output()->in_scratch_emit_size()) { - C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); - C->output()->add_stub(stub); - code_stub = &stub->entry(); - } - __ set_inst_mark(); - __ relocate(relocInfo::poll_return_type); - __ clear_inst_mark(); - __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); - } -} - -uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - -int MachEpilogNode::reloc() const { - return 0; // a large enough number -} - -const Pipeline * MachEpilogNode::pipeline() const { - return MachNode::pipeline_class(); -} - -//============================================================================= - -enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; -static enum RC rc_class( OptoReg::Name reg ) { - - if( !OptoReg::is_valid(reg) ) return rc_bad; - if (OptoReg::is_stack(reg)) return rc_stack; - - VMReg r = OptoReg::as_VMReg(reg); - if (r->is_Register()) return rc_int; - if (r->is_FloatRegister()) { - assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); - return rc_float; - } - if (r->is_KRegister()) return rc_kreg; - assert(r->is_XMMRegister(), "must be"); - return rc_xmm; -} - -static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg, - int opcode, const char *op_str, int size, outputStream* st ) { - if( masm ) { - masm->set_inst_mark(); - emit_opcode (masm, opcode ); - encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); - masm->clear_inst_mark(); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( opcode == 0x8B || opcode == 0x89 ) { // MOV - if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); - else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); - } else { // FLD, FST, PUSH, POP - st->print("%s [ESP + #%d]",op_str,offset); - } -#endif - } - int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - return size+3+offset_size; -} - -// Helper for XMM registers. Extra opcode bits, limited syntax. -static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, - int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { - int in_size_in_bits = Assembler::EVEX_32bit; - int evex_encoding = 0; - if (reg_lo+1 == reg_hi) { - in_size_in_bits = Assembler::EVEX_64bit; - evex_encoding = Assembler::VEX_W; - } - if (masm) { - // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, - // it maps more cases to single byte displacement - __ set_managed(); - if (reg_lo+1 == reg_hi) { // double move? - if (is_load) { - __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); - } else { - __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); - } - } else { - if (is_load) { - __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); - } else { - __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); - } - } -#ifndef PRODUCT - } else if (!do_size) { - if (size != 0) st->print("\n\t"); - if (reg_lo+1 == reg_hi) { // double move? - if (is_load) st->print("%s %s,[ESP + #%d]", - UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", - Matcher::regName[reg_lo], offset); - else st->print("MOVSD [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); - } else { - if (is_load) st->print("MOVSS %s,[ESP + #%d]", - Matcher::regName[reg_lo], offset); - else st->print("MOVSS [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); - } -#endif - } - bool is_single_byte = false; - if ((UseAVX > 2) && (offset != 0)) { - is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); - } - int offset_size = 0; - if (UseAVX > 2 ) { - offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); - } else { - offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - } - size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX - // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. - return size+5+offset_size; -} - - -static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, - int src_hi, int dst_hi, int size, outputStream* st ) { - if (masm) { - // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. - __ set_managed(); - if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? - __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), - as_XMMRegister(Matcher::_regEncode[src_lo])); - } else { - __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), - as_XMMRegister(Matcher::_regEncode[src_lo])); - } -#ifndef PRODUCT - } else if (!do_size) { - if (size != 0) st->print("\n\t"); - if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers - if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? - st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } else { - st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } - } else { - if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? - st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } else { - st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } - } -#endif - } - // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. - // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. - int sz = (UseAVX > 2) ? 6 : 4; - if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && - UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; - return size + sz; -} - -static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, - int src_hi, int dst_hi, int size, outputStream* st ) { - // 32-bit - if (masm) { - // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. - __ set_managed(); - __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); -#ifndef PRODUCT - } else if (!do_size) { - st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); -#endif - } - return (UseAVX> 2) ? 6 : 4; -} - - -static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, - int src_hi, int dst_hi, int size, outputStream* st ) { - // 32-bit - if (masm) { - // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. - __ set_managed(); - __ movdl(as_Register(Matcher::_regEncode[dst_lo]), - as_XMMRegister(Matcher::_regEncode[src_lo])); -#ifndef PRODUCT - } else if (!do_size) { - st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); -#endif - } - return (UseAVX> 2) ? 6 : 4; -} - -static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) { - if( masm ) { - emit_opcode(masm, 0x8B ); - emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); -#endif - } - return size+2; -} - -static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, - int offset, int size, outputStream* st ) { - if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there - if( masm ) { - emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) - emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] ); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("FLD %s",Matcher::regName[src_lo]); -#endif - } - size += 2; - } - - int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; - const char *op_str; - int op; - if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? - op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; - op = 0xDD; - } else { // 32-bit store - op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; - op = 0xD9; - assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); - } - - return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st); -} - -// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. -static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, - int src_hi, int dst_hi, uint ireg, outputStream* st); - -void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, - int stack_offset, int reg, uint ireg, outputStream* st); - -static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset, - int dst_offset, uint ireg, outputStream* st) { - if (masm) { - switch (ireg) { - case Op_VecS: - __ pushl(Address(rsp, src_offset)); - __ popl (Address(rsp, dst_offset)); - break; - case Op_VecD: - __ pushl(Address(rsp, src_offset)); - __ popl (Address(rsp, dst_offset)); - __ pushl(Address(rsp, src_offset+4)); - __ popl (Address(rsp, dst_offset+4)); - break; - case Op_VecX: - __ movdqu(Address(rsp, -16), xmm0); - __ movdqu(xmm0, Address(rsp, src_offset)); - __ movdqu(Address(rsp, dst_offset), xmm0); - __ movdqu(xmm0, Address(rsp, -16)); - break; - case Op_VecY: - __ vmovdqu(Address(rsp, -32), xmm0); - __ vmovdqu(xmm0, Address(rsp, src_offset)); - __ vmovdqu(Address(rsp, dst_offset), xmm0); - __ vmovdqu(xmm0, Address(rsp, -32)); - break; - case Op_VecZ: - __ evmovdquq(Address(rsp, -64), xmm0, 2); - __ evmovdquq(xmm0, Address(rsp, src_offset), 2); - __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); - __ evmovdquq(xmm0, Address(rsp, -64), 2); - break; - default: - ShouldNotReachHere(); - } -#ifndef PRODUCT - } else { - switch (ireg) { - case Op_VecS: - st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" - "popl [rsp + #%d]", - src_offset, dst_offset); - break; - case Op_VecD: - st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" - "popq [rsp + #%d]\n\t" - "pushl [rsp + #%d]\n\t" - "popq [rsp + #%d]", - src_offset, dst_offset, src_offset+4, dst_offset+4); - break; - case Op_VecX: - st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" - "movdqu xmm0, [rsp + #%d]\n\t" - "movdqu [rsp + #%d], xmm0\n\t" - "movdqu xmm0, [rsp - #16]", - src_offset, dst_offset); - break; - case Op_VecY: - st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" - "vmovdqu xmm0, [rsp + #%d]\n\t" - "vmovdqu [rsp + #%d], xmm0\n\t" - "vmovdqu xmm0, [rsp - #32]", - src_offset, dst_offset); - break; - case Op_VecZ: - st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" - "vmovdqu xmm0, [rsp + #%d]\n\t" - "vmovdqu [rsp + #%d], xmm0\n\t" - "vmovdqu xmm0, [rsp - #64]", - src_offset, dst_offset); - break; - default: - ShouldNotReachHere(); - } -#endif - } -} - -uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { - // Get registers to move - OptoReg::Name src_second = ra_->get_reg_second(in(1)); - OptoReg::Name src_first = ra_->get_reg_first(in(1)); - OptoReg::Name dst_second = ra_->get_reg_second(this ); - OptoReg::Name dst_first = ra_->get_reg_first(this ); - - enum RC src_second_rc = rc_class(src_second); - enum RC src_first_rc = rc_class(src_first); - enum RC dst_second_rc = rc_class(dst_second); - enum RC dst_first_rc = rc_class(dst_first); - - assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); - - // Generate spill code! - int size = 0; - - if( src_first == dst_first && src_second == dst_second ) - return size; // Self copy, no move - - if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { - uint ireg = ideal_reg(); - assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); - assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); - assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); - if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { - // mem -> mem - int src_offset = ra_->reg2offset(src_first); - int dst_offset = ra_->reg2offset(dst_first); - vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st); - } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { - vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st); - } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { - int stack_offset = ra_->reg2offset(dst_first); - vec_spill_helper(masm, false, stack_offset, src_first, ireg, st); - } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { - int stack_offset = ra_->reg2offset(src_first); - vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st); - } else { - ShouldNotReachHere(); - } - return 0; - } - - // -------------------------------------- - // Check for mem-mem move. push/pop to move. - if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { - if( src_second == dst_first ) { // overlapping stack copy ranges - assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); - src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits - } - // move low bits - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); - if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); - } - return size; - } - - // -------------------------------------- - // Check for integer reg-reg copy - if( src_first_rc == rc_int && dst_first_rc == rc_int ) - size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st); - - // Check for integer store - if( src_first_rc == rc_int && dst_first_rc == rc_stack ) - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); - - // Check for integer load - if( src_first_rc == rc_stack && dst_first_rc == rc_int ) - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); - - // Check for integer reg-xmm reg copy - if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), - "no 64 bit integer-float reg moves" ); - return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); - } - // -------------------------------------- - // Check for float reg-reg copy - if( src_first_rc == rc_float && dst_first_rc == rc_float ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || - (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); - if( masm ) { - - // Note the mucking with the register encode to compensate for the 0/1 - // indexing issue mentioned in a comment in the reg_def sections - // for FPR registers many lines above here. - - if( src_first != FPR1L_num ) { - emit_opcode (masm, 0xD9 ); // FLD ST(i) - emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 ); - emit_opcode (masm, 0xDD ); // FSTP ST(i) - emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); - } else { - emit_opcode (masm, 0xDD ); // FST ST(i) - emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 ); - } -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); - else st->print( "FST %s", Matcher::regName[dst_first]); -#endif - } - return size + ((src_first != FPR1L_num) ? 2+2 : 2); - } - - // Check for float store - if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { - return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); - } - - // Check for float load - if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { - int offset = ra_->reg2offset(src_first); - const char *op_str; - int op; - if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? - op_str = "FLD_D"; - op = 0xDD; - } else { // 32-bit load - op_str = "FLD_S"; - op = 0xD9; - assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); - } - if( masm ) { - masm->set_inst_mark(); - emit_opcode (masm, op ); - encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); - emit_opcode (masm, 0xDD ); // FSTP ST(i) - emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); - masm->clear_inst_mark(); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); -#endif - } - int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - return size + 3+offset_size+2; - } - - // Check for xmm reg-reg copy - if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || - (src_first+1 == src_second && dst_first+1 == dst_second), - "no non-adjacent float-moves" ); - return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); - } - - // Check for xmm reg-integer reg copy - if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), - "no 64 bit float-integer reg moves" ); - return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); - } - - // Check for xmm store - if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { - return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); - } - - // Check for float xmm load - if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { - return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); - } - - // Copy from float reg to xmm reg - if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { - // copy to the top of stack from floating point reg - // and use LEA to preserve flags - if( masm ) { - emit_opcode(masm,0x8D); // LEA ESP,[ESP-8] - emit_rm(masm, 0x1, ESP_enc, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d8(masm,0xF8); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("LEA ESP,[ESP-8]"); -#endif - } - size += 4; - - size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st); - - // Copy from the temp memory to the xmm reg. - size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st); - - if( masm ) { - emit_opcode(masm,0x8D); // LEA ESP,[ESP+8] - emit_rm(masm, 0x1, ESP_enc, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d8(masm,0x08); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("LEA ESP,[ESP+8]"); -#endif - } - size += 4; - return size; - } - - // AVX-512 opmask specific spilling. - if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { - assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); - assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); - int offset = ra_->reg2offset(src_first); - if (masm != nullptr) { - __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); -#ifndef PRODUCT - } else { - st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); -#endif - } - return 0; - } - - if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { - assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); - assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); - int offset = ra_->reg2offset(dst_first); - if (masm != nullptr) { - __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); -#ifndef PRODUCT - } else { - st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); -#endif - } - return 0; - } - - if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { - Unimplemented(); - return 0; - } - - if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { - Unimplemented(); - return 0; - } - - if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { - assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); - assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); - if (masm != nullptr) { - __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); -#ifndef PRODUCT - } else { - st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); -#endif - } - return 0; - } - - assert( size > 0, "missed a case" ); - - // -------------------------------------------------------------------- - // Check for second bits still needing moving. - if( src_second == dst_second ) - return size; // Self copy; no move - assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); - - // Check for second word int-int move - if( src_second_rc == rc_int && dst_second_rc == rc_int ) - return impl_mov_helper(masm,do_size,src_second,dst_second,size, st); - - // Check for second word integer store - if( src_second_rc == rc_int && dst_second_rc == rc_stack ) - return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); - - // Check for second word integer load - if( dst_second_rc == rc_int && src_second_rc == rc_stack ) - return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); - - Unimplemented(); - return 0; // Mute compiler -} - -#ifndef PRODUCT -void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { - implementation( nullptr, ra_, false, st ); -} -#endif - -void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - implementation( masm, ra_, false, nullptr ); -} - -uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); -} - - -//============================================================================= -#ifndef PRODUCT -void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { - int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); - int reg = ra_->get_reg_first(this); - st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); -} -#endif - -void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); - int reg = ra_->get_encode(this); - if( offset >= 128 ) { - emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] - emit_rm(masm, 0x2, reg, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d32(masm, offset); - } - else { - emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] - emit_rm(masm, 0x1, reg, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d8(masm, offset); - } -} - -uint BoxLockNode::size(PhaseRegAlloc *ra_) const { - int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); - if( offset >= 128 ) { - return 7; - } - else { - return 4; - } -} - -//============================================================================= -#ifndef PRODUCT -void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { - st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); - st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); - st->print_cr("\tNOP"); - st->print_cr("\tNOP"); - if( !OptoBreakpoint ) - st->print_cr("\tNOP"); -} -#endif - -void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - __ ic_check(CodeEntryAlignment); -} - -uint MachUEPNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - - -//============================================================================= - -// Vector calling convention not supported. -bool Matcher::supports_vector_calling_convention() { - return false; -} - -OptoRegPair Matcher::vector_return_value(uint ideal_reg) { - Unimplemented(); - return OptoRegPair(0, 0); -} - -// Is this branch offset short enough that a short branch can be used? -// -// NOTE: If the platform does not provide any short branch variants, then -// this method should return false for offset 0. -bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { - // The passed offset is relative to address of the branch. - // On 86 a branch displacement is calculated relative to address - // of a next instruction. - offset -= br_size; - - // the short version of jmpConUCF2 contains multiple branches, - // making the reach slightly less - if (rule == jmpConUCF2_rule) - return (-126 <= offset && offset <= 125); - return (-128 <= offset && offset <= 127); -} - -// Return whether or not this register is ever used as an argument. This -// function is used on startup to build the trampoline stubs in generateOptoStub. -// Registers not mentioned will be killed by the VM call in the trampoline, and -// arguments in those registers not be available to the callee. -bool Matcher::can_be_java_arg( int reg ) { - if( reg == ECX_num || reg == EDX_num ) return true; - if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; - if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; - return false; -} - -bool Matcher::is_spillable_arg( int reg ) { - return can_be_java_arg(reg); -} - -uint Matcher::int_pressure_limit() -{ - return (INTPRESSURE == -1) ? 6 : INTPRESSURE; -} - -uint Matcher::float_pressure_limit() -{ - return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; -} - -bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { - // Use hardware integer DIV instruction when - // it is faster than a code which use multiply. - // Only when constant divisor fits into 32 bit - // (min_jint is excluded to get only correct - // positive 32 bit values from negative). - return VM_Version::has_fast_idiv() && - (divisor == (int)divisor && divisor != min_jint); -} - -// Register for DIVI projection of divmodI -RegMask Matcher::divI_proj_mask() { - return EAX_REG_mask(); -} - -// Register for MODI projection of divmodI -RegMask Matcher::modI_proj_mask() { - return EDX_REG_mask(); -} - -// Register for DIVL projection of divmodL -RegMask Matcher::divL_proj_mask() { - ShouldNotReachHere(); - return RegMask(); -} - -// Register for MODL projection of divmodL -RegMask Matcher::modL_proj_mask() { - ShouldNotReachHere(); - return RegMask(); -} - -const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return NO_REG_mask(); -} - -// Returns true if the high 32 bits of the value is known to be zero. -bool is_operand_hi32_zero(Node* n) { - int opc = n->Opcode(); - if (opc == Op_AndL) { - Node* o2 = n->in(2); - if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { - return true; - } - } - if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { - return true; - } - return false; -} - -%} - -//----------ENCODING BLOCK----------------------------------------------------- -// This block specifies the encoding classes used by the compiler to output -// byte streams. Encoding classes generate functions which are called by -// Machine Instruction Nodes in order to generate the bit encoding of the -// instruction. Operands specify their base encoding interface with the -// interface keyword. There are currently supported four interfaces, -// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an -// operand to generate a function which returns its register number when -// queried. CONST_INTER causes an operand to generate a function which -// returns the value of the constant when queried. MEMORY_INTER causes an -// operand to generate four functions which return the Base Register, the -// Index Register, the Scale Value, and the Offset Value of the operand when -// queried. COND_INTER causes an operand to generate six functions which -// return the encoding code (ie - encoding bits for the instruction) -// associated with each basic boolean condition for a conditional instruction. -// Instructions specify two basic values for encoding. They use the -// ins_encode keyword to specify their encoding class (which must be one of -// the class names specified in the encoding block), and they use the -// opcode keyword to specify, in order, their primary, secondary, and -// tertiary opcode. Only the opcode sections which a particular instruction -// needs for encoding need to be specified. -encode %{ - // Build emit functions for each basic byte or larger field in the intel - // encoding scheme (opcode, rm, sib, immediate), and call them from C++ - // code in the enc_class source block. Emit functions will live in the - // main source block for now. In future, we can generalize this by - // adding a syntax that specifies the sizes of fields in an order, - // so that the adlc can build the emit functions automagically - - // Set instruction mark in MacroAssembler. This is used only in - // instructions that emit bytes directly to the CodeBuffer wraped - // in the MacroAssembler. Should go away once all "instruct" are - // patched to emit bytes only using methods in MacroAssembler. - enc_class SetInstMark %{ - __ set_inst_mark(); - %} - - enc_class ClearInstMark %{ - __ clear_inst_mark(); - %} - - // Emit primary opcode - enc_class OpcP %{ - emit_opcode(masm, $primary); - %} - - // Emit secondary opcode - enc_class OpcS %{ - emit_opcode(masm, $secondary); - %} - - // Emit opcode directly - enc_class Opcode(immI d8) %{ - emit_opcode(masm, $d8$$constant); - %} - - enc_class SizePrefix %{ - emit_opcode(masm,0x66); - %} - - enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) - emit_opcode(masm,$opcode$$constant); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class mov_r32_imm0( rRegI dst ) %{ - emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 - emit_d32 ( masm, 0x0 ); // imm32==0x0 - %} - - enc_class cdq_enc %{ - // Full implementation of Java idiv and irem; checks for - // special case as described in JVM spec., p.243 & p.271. - // - // normal case special case - // - // input : rax,: dividend min_int - // reg: divisor -1 - // - // output: rax,: quotient (= rax, idiv reg) min_int - // rdx: remainder (= rax, irem reg) 0 - // - // Code sequnce: - // - // 81 F8 00 00 00 80 cmp rax,80000000h - // 0F 85 0B 00 00 00 jne normal_case - // 33 D2 xor rdx,edx - // 83 F9 FF cmp rcx,0FFh - // 0F 84 03 00 00 00 je done - // normal_case: - // 99 cdq - // F7 F9 idiv rax,ecx - // done: - // - emit_opcode(masm,0x81); emit_d8(masm,0xF8); - emit_opcode(masm,0x00); emit_d8(masm,0x00); - emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h - emit_opcode(masm,0x0F); emit_d8(masm,0x85); - emit_opcode(masm,0x0B); emit_d8(masm,0x00); - emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case - emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx - emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh - emit_opcode(masm,0x0F); emit_d8(masm,0x84); - emit_opcode(masm,0x03); emit_d8(masm,0x00); - emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done - // normal_case: - emit_opcode(masm,0x99); // cdq - // idiv (note: must be emitted by the user of this rule) - // normal: - %} - - // Dense encoding for older common ops - enc_class Opc_plus(immI opcode, rRegI reg) %{ - emit_opcode(masm, $opcode$$constant + $reg$$reg); - %} - - - // Opcde enc_class for 8/32 bit immediate instructions with sign-extension - enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { - emit_opcode(masm, $primary | 0x02); - } - else { // If 32-bit immediate - emit_opcode(masm, $primary); - } - %} - - enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m - // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { - emit_opcode(masm, $primary | 0x02); } - else { // If 32-bit immediate - emit_opcode(masm, $primary); - } - // Emit r/m byte with secondary opcode, after primary opcode. - emit_rm(masm, 0x3, $secondary, $dst$$reg); - %} - - enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits - // Check for 8-bit immediate, and set sign extend bit in opcode - if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { - $$$emit8$imm$$constant; - } - else { // If 32-bit immediate - // Output immediate - $$$emit32$imm$$constant; - } - %} - - enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ - // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - int con = (int)$imm$$constant; // Throw away top bits - emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); - // Emit r/m byte with secondary opcode, after primary opcode. - emit_rm(masm, 0x3, $secondary, $dst$$reg); - if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); - else emit_d32(masm,con); - %} - - enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ - // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - int con = (int)($imm$$constant >> 32); // Throw away bottom bits - emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); - // Emit r/m byte with tertiary opcode, after primary opcode. - emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); - if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); - else emit_d32(masm,con); - %} - - enc_class OpcSReg (rRegI dst) %{ // BSWAP - emit_cc(masm, $secondary, $dst$$reg ); - %} - - enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP - int destlo = $dst$$reg; - int desthi = HIGH_FROM_LOW_ENC(destlo); - // bswap lo - emit_opcode(masm, 0x0F); - emit_cc(masm, 0xC8, destlo); - // bswap hi - emit_opcode(masm, 0x0F); - emit_cc(masm, 0xC8, desthi); - // xchg lo and hi - emit_opcode(masm, 0x87); - emit_rm(masm, 0x3, destlo, desthi); - %} - - enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... - emit_rm(masm, 0x3, $secondary, $div$$reg ); - %} - - enc_class enc_cmov(cmpOp cop ) %{ // CMOV - $$$emit8$primary; - emit_cc(masm, $secondary, $cop$$cmpcode); - %} - - enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV - int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); - emit_d8(masm, op >> 8 ); - emit_d8(masm, op & 255); - %} - - // emulate a CMOV with a conditional branch around a MOV - enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV - // Invert sense of branch from sense of CMOV - emit_cc( masm, 0x70, ($cop$$cmpcode^1) ); - emit_d8( masm, $brOffs$$constant ); - %} - - enc_class enc_PartialSubtypeCheck( ) %{ - Register Redi = as_Register(EDI_enc); // result register - Register Reax = as_Register(EAX_enc); // super class - Register Recx = as_Register(ECX_enc); // killed - Register Resi = as_Register(ESI_enc); // sub class - Label miss; - - // NB: Callers may assume that, when $result is a valid register, - // check_klass_subtype_slow_path sets it to a nonzero value. - __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, - nullptr, &miss, - /*set_cond_codes:*/ true); - if ($primary) { - __ xorptr(Redi, Redi); - } - __ bind(miss); - %} - - enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All - int start = __ offset(); - if (UseSSE >= 2) { - if (VerifyFPU) { - __ verify_FPU(0, "must be empty in SSE2+ mode"); - } - } else { - // External c_calling_convention expects the FPU stack to be 'clean'. - // Compiled code leaves it dirty. Do cleanup now. - __ empty_FPU_stack(); - } - if (sizeof_FFree_Float_Stack_All == -1) { - sizeof_FFree_Float_Stack_All = __ offset() - start; - } else { - assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); - } - %} - - enc_class Verify_FPU_For_Leaf %{ - if( VerifyFPU ) { - __ verify_FPU( -3, "Returning from Runtime Leaf call"); - } - %} - - enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf - // This is the instruction starting address for relocation info. - __ set_inst_mark(); - $$$emit8$primary; - // CALL directly to the runtime - emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), - runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - - if (UseSSE >= 2) { - BasicType rt = tf()->return_type(); - - if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { - // A C runtime call where the return value is unused. In SSE2+ - // mode the result needs to be removed from the FPU stack. It's - // likely that this function call could be removed by the - // optimizer if the C function is a pure function. - __ ffree(0); - } else if (rt == T_FLOAT) { - __ lea(rsp, Address(rsp, -4)); - __ fstp_s(Address(rsp, 0)); - __ movflt(xmm0, Address(rsp, 0)); - __ lea(rsp, Address(rsp, 4)); - } else if (rt == T_DOUBLE) { - __ lea(rsp, Address(rsp, -8)); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ lea(rsp, Address(rsp, 8)); - } - } - %} - - enc_class pre_call_resets %{ - // If method sets FPU control word restore it here - debug_only(int off0 = __ offset()); - if (ra_->C->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - // Clear upper bits of YMM registers when current compiled code uses - // wide vectors to avoid AVX <-> SSE transition penalty during call. - __ vzeroupper(); - debug_only(int off1 = __ offset()); - assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); - %} - - enc_class post_call_FPU %{ - // If method sets FPU control word do it here also - if (Compile::current()->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); - } - %} - - enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL - // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine - // who we intended to call. - __ set_inst_mark(); - $$$emit8$primary; - - if (!_method) { - emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), - runtime_call_Relocation::spec(), - RELOC_IMM32); - __ clear_inst_mark(); - __ post_call_nop(); - } else { - int method_index = resolved_method_index(masm); - RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) - : static_call_Relocation::spec(method_index); - emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), - rspec, RELOC_DISP32); - __ post_call_nop(); - address mark = __ inst_mark(); - if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { - // Calls of the same statically bound method can share - // a stub to the interpreter. - __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off()); - __ clear_inst_mark(); - } else { - // Emit stubs for static call. - address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark); - __ clear_inst_mark(); - if (stub == nullptr) { - ciEnv::current()->record_failure("CodeCache is full"); - return; - } - } - } - %} - - enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL - __ ic_call((address)$meth$$method, resolved_method_index(masm)); - __ post_call_nop(); - %} - - enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL - int disp = in_bytes(Method::from_compiled_offset()); - assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); - - // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] - __ set_inst_mark(); - $$$emit8$primary; - emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte - emit_d8(masm, disp); // Displacement - __ clear_inst_mark(); - __ post_call_nop(); - %} - - enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR - $$$emit8$primary; - emit_rm(masm, 0x3, $secondary, $dst$$reg); - $$$emit8$shift$$constant; - %} - - enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - emit_opcode(masm, 0xB8 + $dst$$reg); - $$$emit32$src$$constant; - %} - - enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - emit_opcode(masm, $primary + $dst$$reg); - $$$emit32$src$$constant; - %} - - enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - int dst_enc = $dst$$reg; - int src_con = $src$$constant & 0x0FFFFFFFFL; - if (src_con == 0) { - // xor dst, dst - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, dst_enc, dst_enc); - } else { - emit_opcode(masm, $primary + dst_enc); - emit_d32(masm, src_con); - } - %} - - enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - int dst_enc = $dst$$reg + 2; - int src_con = ((julong)($src$$constant)) >> 32; - if (src_con == 0) { - // xor dst, dst - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, dst_enc, dst_enc); - } else { - emit_opcode(masm, $primary + dst_enc); - emit_d32(masm, src_con); - } - %} - - - // Encode a reg-reg copy. If it is useless, then empty encoding. - enc_class enc_Copy( rRegI dst, rRegI src ) %{ - encode_Copy( masm, $dst$$reg, $src$$reg ); - %} - - enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ - encode_Copy( masm, $dst$$reg, $src$$reg ); - %} - - enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) - $$$emit8$primary; - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) - $$$emit8$secondary; - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class Con32 (immI src) %{ // Con32(storeImmI) - // Output immediate - $$$emit32$src$$constant; - %} - - enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm - // Output Float immediate bits - jfloat jf = $src$$constant; - int jf_as_bits = jint_cast( jf ); - emit_d32(masm, jf_as_bits); - %} - - enc_class Con32F_as_bits(immF src) %{ // storeX_imm - // Output Float immediate bits - jfloat jf = $src$$constant; - int jf_as_bits = jint_cast( jf ); - emit_d32(masm, jf_as_bits); - %} - - enc_class Con16 (immI src) %{ // Con16(storeImmI) - // Output immediate - $$$emit16$src$$constant; - %} - - enc_class Con_d32(immI src) %{ - emit_d32(masm,$src$$constant); - %} - - enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) - // Output immediate memory reference - emit_rm(masm, 0x00, $t1$$reg, 0x05 ); - emit_d32(masm, 0x00); - %} - - enc_class lock_prefix( ) %{ - emit_opcode(masm,0xF0); // [Lock] - %} - - // Cmp-xchg long value. - // Note: we need to swap rbx, and rcx before and after the - // cmpxchg8 instruction because the instruction uses - // rcx as the high order word of the new value to store but - // our register encoding uses rbx,. - enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ - - // XCHG rbx,ecx - emit_opcode(masm,0x87); - emit_opcode(masm,0xD9); - // [Lock] - emit_opcode(masm,0xF0); - // CMPXCHG8 [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xC7); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - // XCHG rbx,ecx - emit_opcode(masm,0x87); - emit_opcode(masm,0xD9); - %} - - enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ - // [Lock] - emit_opcode(masm,0xF0); - - // CMPXCHG [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xB1); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - %} - - enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ - // [Lock] - emit_opcode(masm,0xF0); - - // CMPXCHGB [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xB0); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - %} - - enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ - // [Lock] - emit_opcode(masm,0xF0); - - // 16-bit mode - emit_opcode(masm, 0x66); - - // CMPXCHGW [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xB1); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - %} - - enc_class enc_flags_ne_to_boolean( iRegI res ) %{ - int res_encoding = $res$$reg; - - // MOV res,0 - emit_opcode( masm, 0xB8 + res_encoding); - emit_d32( masm, 0 ); - // JNE,s fail - emit_opcode(masm,0x75); - emit_d8(masm, 5 ); - // MOV res,1 - emit_opcode( masm, 0xB8 + res_encoding); - emit_d32( masm, 1 ); - // fail: - %} - - enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem - int reg_encoding = $ereg$$reg; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - %} - - enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem - int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp + 4; // Offset is 4 further in memory - assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); - encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none); - %} - - enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ - int r1, r2; - if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } - else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } - emit_opcode(masm,0x0F); - emit_opcode(masm,$tertiary); - emit_rm(masm, 0x3, r1, r2); - emit_d8(masm,$cnt$$constant); - emit_d8(masm,$primary); - emit_rm(masm, 0x3, $secondary, r1); - emit_d8(masm,$cnt$$constant); - %} - - enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ - emit_opcode( masm, 0x8B ); // Move - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); - if( $cnt$$constant > 32 ) { // Shift, if not by zero - emit_d8(masm,$primary); - emit_rm(masm, 0x3, $secondary, $dst$$reg); - emit_d8(masm,$cnt$$constant-32); - } - emit_d8(masm,$primary); - emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); - emit_d8(masm,31); - %} - - enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ - int r1, r2; - if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } - else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } - - emit_opcode( masm, 0x8B ); // Move r1,r2 - emit_rm(masm, 0x3, r1, r2); - if( $cnt$$constant > 32 ) { // Shift, if not by zero - emit_opcode(masm,$primary); - emit_rm(masm, 0x3, $secondary, r1); - emit_d8(masm,$cnt$$constant-32); - } - emit_opcode(masm,0x33); // XOR r2,r2 - emit_rm(masm, 0x3, r2, r2); - %} - - // Clone of RegMem but accepts an extra parameter to access each - // half of a double in memory; it never needs relocation info. - enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ - emit_opcode(masm,$opcode$$constant); - int reg_encoding = $rm_reg$$reg; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp + $disp_for_half$$constant; - relocInfo::relocType disp_reloc = relocInfo::none; - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - %} - - // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! - // - // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant - // and it never needs relocation information. - // Frequently used to move data between FPU's Stack Top and memory. - enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ - int rm_byte_opcode = $rm_opcode$$constant; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none); - %} - - enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ - int rm_byte_opcode = $rm_opcode$$constant; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); - %} - - enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea - int reg_encoding = $dst$$reg; - int base = $src0$$reg; // 0xFFFFFFFF indicates no base - int index = 0x04; // 0x04 indicates no index - int scale = 0x00; // 0x00 indicates no scale - int displace = $src1$$constant; // 0x00 indicates no displacement - relocInfo::relocType disp_reloc = relocInfo::none; - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - %} - - enc_class min_enc (rRegI dst, rRegI src) %{ // MIN - // Compare dst,src - emit_opcode(masm,0x3B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - // jmp dst < src around move - emit_opcode(masm,0x7C); - emit_d8(masm,2); - // move dst,src - emit_opcode(masm,0x8B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class max_enc (rRegI dst, rRegI src) %{ // MAX - // Compare dst,src - emit_opcode(masm,0x3B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - // jmp dst > src around move - emit_opcode(masm,0x7F); - emit_d8(masm,2); - // move dst,src - emit_opcode(masm,0x8B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class enc_FPR_store(memory mem, regDPR src) %{ - // If src is FPR1, we can just FST to store it. - // Else we need to FLD it to FPR1, then FSTP to store/pop it. - int reg_encoding = 0x2; // Just store - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - if( $src$$reg != FPR1L_enc ) { - reg_encoding = 0x3; // Store & pop - emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) - emit_d8( masm, 0xC0-1+$src$$reg ); - } - __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand - emit_opcode(masm,$primary); - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - __ clear_inst_mark(); - %} - - enc_class neg_reg(rRegI dst) %{ - // NEG $dst - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0x03, $dst$$reg ); - %} - - enc_class setLT_reg(eCXRegI dst) %{ - // SETLT $dst - emit_opcode(masm,0x0F); - emit_opcode(masm,0x9C); - emit_rm( masm, 0x3, 0x4, $dst$$reg ); - %} - - enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT - int tmpReg = $tmp$$reg; - - // SUB $p,$q - emit_opcode(masm,0x2B); - emit_rm(masm, 0x3, $p$$reg, $q$$reg); - // SBB $tmp,$tmp - emit_opcode(masm,0x1B); - emit_rm(masm, 0x3, tmpReg, tmpReg); - // AND $tmp,$y - emit_opcode(masm,0x23); - emit_rm(masm, 0x3, tmpReg, $y$$reg); - // ADD $p,$tmp - emit_opcode(masm,0x03); - emit_rm(masm, 0x3, $p$$reg, tmpReg); - %} - - enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ - // TEST shift,32 - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0, ECX_enc); - emit_d32(masm,0x20); - // JEQ,s small - emit_opcode(masm, 0x74); - emit_d8(masm, 0x04); - // MOV $dst.hi,$dst.lo - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); - // CLR $dst.lo - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, $dst$$reg, $dst$$reg); -// small: - // SHLD $dst.hi,$dst.lo,$shift - emit_opcode(masm,0x0F); - emit_opcode(masm,0xA5); - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); - // SHL $dst.lo,$shift" - emit_opcode(masm,0xD3); - emit_rm(masm, 0x3, 0x4, $dst$$reg ); - %} - - enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ - // TEST shift,32 - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0, ECX_enc); - emit_d32(masm,0x20); - // JEQ,s small - emit_opcode(masm, 0x74); - emit_d8(masm, 0x04); - // MOV $dst.lo,$dst.hi - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // CLR $dst.hi - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); -// small: - // SHRD $dst.lo,$dst.hi,$shift - emit_opcode(masm,0x0F); - emit_opcode(masm,0xAD); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); - // SHR $dst.hi,$shift" - emit_opcode(masm,0xD3); - emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); - %} - - enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ - // TEST shift,32 - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0, ECX_enc); - emit_d32(masm,0x20); - // JEQ,s small - emit_opcode(masm, 0x74); - emit_d8(masm, 0x05); - // MOV $dst.lo,$dst.hi - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // SAR $dst.hi,31 - emit_opcode(masm, 0xC1); - emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); - emit_d8(masm, 0x1F ); -// small: - // SHRD $dst.lo,$dst.hi,$shift - emit_opcode(masm,0x0F); - emit_opcode(masm,0xAD); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); - // SAR $dst.hi,$shift" - emit_opcode(masm,0xD3); - emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); - %} - - - // ----------------- Encodings for floating point unit ----------------- - // May leave result in FPU-TOS or FPU reg depending on opcodes - enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV - $$$emit8$primary; - emit_rm(masm, 0x3, $secondary, $src$$reg ); - %} - - // Pop argument in FPR0 with FSTP ST(0) - enc_class PopFPU() %{ - emit_opcode( masm, 0xDD ); - emit_d8( masm, 0xD8 ); - %} - - // !!!!! equivalent to Pop_Reg_F - enc_class Pop_Reg_DPR( regDPR dst ) %{ - emit_opcode( masm, 0xDD ); // FSTP ST(i) - emit_d8( masm, 0xD8+$dst$$reg ); - %} - - enc_class Push_Reg_DPR( regDPR dst ) %{ - emit_opcode( masm, 0xD9 ); - emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1) - %} - - enc_class strictfp_bias1( regDPR dst ) %{ - emit_opcode( masm, 0xDB ); // FLD m80real - emit_opcode( masm, 0x2D ); - emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); - emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 - emit_opcode( masm, 0xC8+$dst$$reg ); - %} - - enc_class strictfp_bias2( regDPR dst ) %{ - emit_opcode( masm, 0xDB ); // FLD m80real - emit_opcode( masm, 0x2D ); - emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); - emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 - emit_opcode( masm, 0xC8+$dst$$reg ); - %} - - // Special case for moving an integer register to a stack slot. - enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS - store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp ); - %} - - // Special case for moving a register to a stack slot. - enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS - // Opcode already emitted - emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte - emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte - emit_d32(masm, $dst$$disp); // Displacement - %} - - // Push the integer in stackSlot 'src' onto FP-stack - enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] - store_to_stackslot( masm, $primary, $secondary, $src$$disp ); - %} - - // Push FPU's TOS float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] - store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp ); - %} - - // Same as Pop_Mem_F except for opcode - // Push FPU's TOS double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] - store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp ); - %} - - enc_class Pop_Reg_FPR( regFPR dst ) %{ - emit_opcode( masm, 0xDD ); // FSTP ST(i) - emit_d8( masm, 0xD8+$dst$$reg ); - %} - - enc_class Push_Reg_FPR( regFPR dst ) %{ - emit_opcode( masm, 0xD9 ); // FLD ST(i-1) - emit_d8( masm, 0xC0-1+$dst$$reg ); - %} - - // Push FPU's float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ - int pop = 0x02; - if ($src$$reg != FPR1L_enc) { - emit_opcode( masm, 0xD9 ); // FLD ST(i-1) - emit_d8( masm, 0xC0-1+$src$$reg ); - pop = 0x03; - } - store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST

_S [ESP+dst] - %} - - // Push FPU's double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ - int pop = 0x02; - if ($src$$reg != FPR1L_enc) { - emit_opcode( masm, 0xD9 ); // FLD ST(i-1) - emit_d8( masm, 0xC0-1+$src$$reg ); - pop = 0x03; - } - store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST

_D [ESP+dst] - %} - - // Push FPU's double to a FPU-stack-slot, and pop FPU-stack - enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ - int pop = 0xD0 - 1; // -1 since we skip FLD - if ($src$$reg != FPR1L_enc) { - emit_opcode( masm, 0xD9 ); // FLD ST(src-1) - emit_d8( masm, 0xC0-1+$src$$reg ); - pop = 0xD8; - } - emit_opcode( masm, 0xDD ); - emit_d8( masm, pop+$dst$$reg ); // FST

ST(i) - %} - - - enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ - // load dst in FPR0 - emit_opcode( masm, 0xD9 ); - emit_d8( masm, 0xC0-1+$dst$$reg ); - if ($src$$reg != FPR1L_enc) { - // fincstp - emit_opcode (masm, 0xD9); - emit_opcode (masm, 0xF7); - // swap src with FPR1: - // FXCH FPR1 with src - emit_opcode(masm, 0xD9); - emit_d8(masm, 0xC8-1+$src$$reg ); - // fdecstp - emit_opcode (masm, 0xD9); - emit_opcode (masm, 0xF6); - } - %} - - enc_class Push_ResultD(regD dst) %{ - __ fstp_d(Address(rsp, 0)); - __ movdbl($dst$$XMMRegister, Address(rsp, 0)); - __ addptr(rsp, 8); - %} - - enc_class Push_ResultF(regF dst, immI d8) %{ - __ fstp_s(Address(rsp, 0)); - __ movflt($dst$$XMMRegister, Address(rsp, 0)); - __ addptr(rsp, $d8$$constant); - %} - - enc_class Push_SrcD(regD src) %{ - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - %} - - enc_class push_stack_temp_qword() %{ - __ subptr(rsp, 8); - %} - - enc_class pop_stack_temp_qword() %{ - __ addptr(rsp, 8); - %} - - enc_class push_xmm_to_fpr1(regD src) %{ - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - %} - - enc_class fnstsw_sahf_skip_parity() %{ - // fnstsw ax - emit_opcode( masm, 0xDF ); - emit_opcode( masm, 0xE0 ); - // sahf - emit_opcode( masm, 0x9E ); - // jnp ::skip - emit_opcode( masm, 0x7B ); - emit_opcode( masm, 0x05 ); - %} - - enc_class fpu_flags() %{ - // fnstsw_ax - emit_opcode( masm, 0xDF); - emit_opcode( masm, 0xE0); - // test ax,0x0400 - emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate - emit_opcode( masm, 0xA9 ); - emit_d16 ( masm, 0x0400 ); - // // // This sequence works, but stalls for 12-16 cycles on PPro - // // test rax,0x0400 - // emit_opcode( masm, 0xA9 ); - // emit_d32 ( masm, 0x00000400 ); - // - // jz exit (no unordered comparison) - emit_opcode( masm, 0x74 ); - emit_d8 ( masm, 0x02 ); - // mov ah,1 - treat as LT case (set carry flag) - emit_opcode( masm, 0xB4 ); - emit_d8 ( masm, 0x01 ); - // sahf - emit_opcode( masm, 0x9E); - %} - - enc_class cmpF_P6_fixup() %{ - // Fixup the integer flags in case comparison involved a NaN - // - // JNP exit (no unordered comparison, P-flag is set by NaN) - emit_opcode( masm, 0x7B ); - emit_d8 ( masm, 0x03 ); - // MOV AH,1 - treat as LT case (set carry flag) - emit_opcode( masm, 0xB4 ); - emit_d8 ( masm, 0x01 ); - // SAHF - emit_opcode( masm, 0x9E); - // NOP // target for branch to avoid branch to branch - emit_opcode( masm, 0x90); - %} - -// fnstsw_ax(); -// sahf(); -// movl(dst, nan_result); -// jcc(Assembler::parity, exit); -// movl(dst, less_result); -// jcc(Assembler::below, exit); -// movl(dst, equal_result); -// jcc(Assembler::equal, exit); -// movl(dst, greater_result); - -// less_result = 1; -// greater_result = -1; -// equal_result = 0; -// nan_result = -1; - - enc_class CmpF_Result(rRegI dst) %{ - // fnstsw_ax(); - emit_opcode( masm, 0xDF); - emit_opcode( masm, 0xE0); - // sahf - emit_opcode( masm, 0x9E); - // movl(dst, nan_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, -1 ); - // jcc(Assembler::parity, exit); - emit_opcode( masm, 0x7A ); - emit_d8 ( masm, 0x13 ); - // movl(dst, less_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, -1 ); - // jcc(Assembler::below, exit); - emit_opcode( masm, 0x72 ); - emit_d8 ( masm, 0x0C ); - // movl(dst, equal_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, 0 ); - // jcc(Assembler::equal, exit); - emit_opcode( masm, 0x74 ); - emit_d8 ( masm, 0x05 ); - // movl(dst, greater_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, 1 ); - %} - - - // Compare the longs and set flags - // BROKEN! Do Not use as-is - enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ - // CMP $src1.hi,$src2.hi - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); - // JNE,s done - emit_opcode(masm,0x75); - emit_d8(masm, 2 ); - // CMP $src1.lo,$src2.lo - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); -// done: - %} - - enc_class convert_int_long( regL dst, rRegI src ) %{ - // mov $dst.lo,$src - int dst_encoding = $dst$$reg; - int src_encoding = $src$$reg; - encode_Copy( masm, dst_encoding , src_encoding ); - // mov $dst.hi,$src - encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); - // sar $dst.hi,31 - emit_opcode( masm, 0xC1 ); - emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); - emit_d8(masm, 0x1F ); - %} - - enc_class convert_long_double( eRegL src ) %{ - // push $src.hi - emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); - // push $src.lo - emit_opcode(masm, 0x50+$src$$reg ); - // fild 64-bits at [SP] - emit_opcode(masm,0xdf); - emit_d8(masm, 0x6C); - emit_d8(masm, 0x24); - emit_d8(masm, 0x00); - // pop stack - emit_opcode(masm, 0x83); // add SP, #8 - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, 0x8); - %} - - enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ - // IMUL EDX:EAX,$src1 - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x5, $src1$$reg ); - // SAR EDX,$cnt-32 - int shift_count = ((int)$cnt$$constant) - 32; - if (shift_count > 0) { - emit_opcode(masm, 0xC1); - emit_rm(masm, 0x3, 7, $dst$$reg ); - emit_d8(masm, shift_count); - } - %} - - // this version doesn't have add sp, 8 - enc_class convert_long_double2( eRegL src ) %{ - // push $src.hi - emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); - // push $src.lo - emit_opcode(masm, 0x50+$src$$reg ); - // fild 64-bits at [SP] - emit_opcode(masm,0xdf); - emit_d8(masm, 0x6C); - emit_d8(masm, 0x24); - emit_d8(masm, 0x00); - %} - - enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ - // Basic idea: long = (long)int * (long)int - // IMUL EDX:EAX, src - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x5, $src$$reg); - %} - - enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ - // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) - // MUL EDX:EAX, src - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x4, $src$$reg); - %} - - enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ - // Basic idea: lo(result) = lo(x_lo * y_lo) - // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) - // MOV $tmp,$src.lo - encode_Copy( masm, $tmp$$reg, $src$$reg ); - // IMUL $tmp,EDX - emit_opcode( masm, 0x0F ); - emit_opcode( masm, 0xAF ); - emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // MOV EDX,$src.hi - encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); - // IMUL EDX,EAX - emit_opcode( masm, 0x0F ); - emit_opcode( masm, 0xAF ); - emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); - // ADD $tmp,EDX - emit_opcode( masm, 0x03 ); - emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // MUL EDX:EAX,$src.lo - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x4, $src$$reg ); - // ADD EDX,ESI - emit_opcode( masm, 0x03 ); - emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); - %} - - enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ - // Basic idea: lo(result) = lo(src * y_lo) - // hi(result) = hi(src * y_lo) + lo(src * y_hi) - // IMUL $tmp,EDX,$src - emit_opcode( masm, 0x6B ); - emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - emit_d8( masm, (int)$src$$constant ); - // MOV EDX,$src - emit_opcode(masm, 0xB8 + EDX_enc); - emit_d32( masm, (int)$src$$constant ); - // MUL EDX:EAX,EDX - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x4, EDX_enc ); - // ADD EDX,ESI - emit_opcode( masm, 0x03 ); - emit_rm( masm, 0x3, EDX_enc, $tmp$$reg ); - %} - - enc_class long_div( eRegL src1, eRegL src2 ) %{ - // PUSH src1.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); - // PUSH src1.lo - emit_opcode(masm, 0x50+$src1$$reg ); - // PUSH src2.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); - // PUSH src2.lo - emit_opcode(masm, 0x50+$src2$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Restore stack - emit_opcode(masm, 0x83); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, 4*4); - %} - - enc_class long_mod( eRegL src1, eRegL src2 ) %{ - // PUSH src1.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); - // PUSH src1.lo - emit_opcode(masm, 0x50+$src1$$reg ); - // PUSH src2.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); - // PUSH src2.lo - emit_opcode(masm, 0x50+$src2$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Restore stack - emit_opcode(masm, 0x83); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, 4*4); - %} - - enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ - // MOV $tmp,$src.lo - emit_opcode(masm, 0x8B); - emit_rm(masm, 0x3, $tmp$$reg, $src$$reg); - // OR $tmp,$src.hi - emit_opcode(masm, 0x0B); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ - // CMP $src1.lo,$src2.lo - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); - // JNE,s skip - emit_cc(masm, 0x70, 0x5); - emit_d8(masm,2); - // CMP $src1.hi,$src2.hi - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); - %} - - enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ - // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); - // MOV $tmp,$src1.hi - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); - // SBB $tmp,$src2.hi\t! Compute flags for long compare - emit_opcode( masm, 0x1B ); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); - %} - - enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ - // XOR $tmp,$tmp - emit_opcode(masm,0x33); // XOR - emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg); - // CMP $tmp,$src.lo - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $tmp$$reg, $src$$reg ); - // SBB $tmp,$src.hi - emit_opcode( masm, 0x1B ); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); - %} - - // Sniff, sniff... smells like Gnu Superoptimizer - enc_class neg_long( eRegL dst ) %{ - emit_opcode(masm,0xF7); // NEG hi - emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); - emit_opcode(masm,0xF7); // NEG lo - emit_rm (masm,0x3, 0x3, $dst$$reg ); - emit_opcode(masm,0x83); // SBB hi,0 - emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); - emit_d8 (masm,0 ); - %} - - enc_class enc_pop_rdx() %{ - emit_opcode(masm,0x5A); - %} - - enc_class enc_rethrow() %{ - __ set_inst_mark(); - emit_opcode(masm, 0xE9); // jmp entry - emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4, - runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - %} - - - // Convert a double to an int. Java semantics require we do complex - // manglelations in the corner cases. So we set the rounding mode to - // 'zero', store the darned double down as an int, and reset the - // rounding mode to 'nearest'. The hardware throws an exception which - // patches up the correct value directly to the stack. - enc_class DPR2I_encoding( regDPR src ) %{ - // Flip to round-to-zero mode. We attempted to allow invalid-op - // exceptions here, so that a NAN or other corner-case value will - // thrown an exception (but normal values get converted at full speed). - // However, I2C adapters and other float-stack manglers leave pending - // invalid-op exceptions hanging. We would have to clear them before - // enabling them and that is more expensive than just testing for the - // invalid value Intel stores down in the corner cases. - emit_opcode(masm,0xD9); // FLDCW trunc - emit_opcode(masm,0x2D); - emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); - // Allocate a word - emit_opcode(masm,0x83); // SUB ESP,4 - emit_opcode(masm,0xEC); - emit_d8(masm,0x04); - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as an int, popping the FPU stack - emit_opcode(masm,0xDB); // FISTP [ESP] - emit_opcode(masm,0x1C); - emit_d8(masm,0x24); - // Restore the rounding mode; mask the exception - emit_opcode(masm,0xD9); // FLDCW std/24-bit mode - emit_opcode(masm,0x2D); - emit_d32( masm, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(masm,0x58); // POP EAX - emit_opcode(masm,0x3D); // CMP EAX,imm - emit_d32 (masm,0x80000000); // 0x80000000 - emit_opcode(masm,0x75); // JNE around_slow_call - emit_d8 (masm,0x07); // Size of slow_call - // Push src onto stack slow-path - emit_opcode(masm,0xD9 ); // FLD ST(i) - emit_d8 (masm,0xC0-1+$src$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Carry on here... - %} - - enc_class DPR2L_encoding( regDPR src ) %{ - emit_opcode(masm,0xD9); // FLDCW trunc - emit_opcode(masm,0x2D); - emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); - // Allocate a word - emit_opcode(masm,0x83); // SUB ESP,8 - emit_opcode(masm,0xEC); - emit_d8(masm,0x08); - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as a long, popping the FPU stack - emit_opcode(masm,0xDF); // FISTP [ESP] - emit_opcode(masm,0x3C); - emit_d8(masm,0x24); - // Restore the rounding mode; mask the exception - emit_opcode(masm,0xD9); // FLDCW std/24-bit mode - emit_opcode(masm,0x2D); - emit_d32( masm, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(masm,0x58); // POP EAX - emit_opcode(masm,0x5A); // POP EDX - emit_opcode(masm,0x81); // CMP EDX,imm - emit_d8 (masm,0xFA); // rdx - emit_d32 (masm,0x80000000); // 0x80000000 - emit_opcode(masm,0x75); // JNE around_slow_call - emit_d8 (masm,0x07+4); // Size of slow_call - emit_opcode(masm,0x85); // TEST EAX,EAX - emit_opcode(masm,0xC0); // 2/rax,/rax, - emit_opcode(masm,0x75); // JNE around_slow_call - emit_d8 (masm,0x07); // Size of slow_call - // Push src onto stack slow-path - emit_opcode(masm,0xD9 ); // FLD ST(i) - emit_d8 (masm,0xC0-1+$src$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Carry on here... - %} - - enc_class FMul_ST_reg( eRegFPR src1 ) %{ - // Operand was loaded from memory into fp ST (stack top) - // FMUL ST,$src /* D8 C8+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC8 + $src1$$reg); - %} - - enc_class FAdd_ST_reg( eRegFPR src2 ) %{ - // FADDP ST,src2 /* D8 C0+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC0 + $src2$$reg); - //could use FADDP src2,fpST /* DE C0+i */ - %} - - enc_class FAddP_reg_ST( eRegFPR src2 ) %{ - // FADDP src2,ST /* DE C0+i */ - emit_opcode(masm, 0xDE); - emit_opcode(masm, 0xC0 + $src2$$reg); - %} - - enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ - // Operand has been loaded into fp ST (stack top) - // FSUB ST,$src1 - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xE0 + $src1$$reg); - - // FDIV - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xF0 + $src2$$reg); - %} - - enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ - // Operand was loaded from memory into fp ST (stack top) - // FADD ST,$src /* D8 C0+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC0 + $src1$$reg); - - // FMUL ST,src2 /* D8 C*+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC8 + $src2$$reg); - %} - - - enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ - // Operand was loaded from memory into fp ST (stack top) - // FADD ST,$src /* D8 C0+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC0 + $src1$$reg); - - // FMULP src2,ST /* DE C8+i */ - emit_opcode(masm, 0xDE); - emit_opcode(masm, 0xC8 + $src2$$reg); - %} - - // Atomically load the volatile long - enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ - emit_opcode(masm,0xDF); - int rm_byte_opcode = 0x05; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); - store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp ); - %} - - // Volatile Store Long. Must be atomic, so move it into - // the FP TOS and then do a 64-bit FIST. Has to probe the - // target address before the store (for null-ptr checks) - // so the memory operand is used twice in the encoding. - enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ - store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp ); - __ set_inst_mark(); // Mark start of FIST in case $mem has an oop - emit_opcode(masm,0xDF); - int rm_byte_opcode = 0x07; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); - __ clear_inst_mark(); - %} - -%} - - -//----------FRAME-------------------------------------------------------------- -// Definition of frame structure and management information. -// -// S T A C K L A Y O U T Allocators stack-slot number -// | (to get allocators register number -// G Owned by | | v add OptoReg::stack0()) -// r CALLER | | -// o | +--------+ pad to even-align allocators stack-slot -// w V | pad0 | numbers; owned by CALLER -// t -----------+--------+----> Matcher::_in_arg_limit, unaligned -// h ^ | in | 5 -// | | args | 4 Holes in incoming args owned by SELF -// | | | | 3 -// | | +--------+ -// V | | old out| Empty on Intel, window on Sparc -// | old |preserve| Must be even aligned. -// | SP-+--------+----> Matcher::_old_SP, even aligned -// | | in | 3 area for Intel ret address -// Owned by |preserve| Empty on Sparc. -// SELF +--------+ -// | | pad2 | 2 pad to align old SP -// | +--------+ 1 -// | | locks | 0 -// | +--------+----> OptoReg::stack0(), even aligned -// | | pad1 | 11 pad to align new SP -// | +--------+ -// | | | 10 -// | | spills | 9 spills -// V | | 8 (pad0 slot for callee) -// -----------+--------+----> Matcher::_out_arg_limit, unaligned -// ^ | out | 7 -// | | args | 6 Holes in outgoing args owned by CALLEE -// Owned by +--------+ -// CALLEE | new out| 6 Empty on Intel, window on Sparc -// | new |preserve| Must be even-aligned. -// | SP-+--------+----> Matcher::_new_SP, even aligned -// | | | -// -// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is -// known from SELF's arguments and the Java calling convention. -// Region 6-7 is determined per call site. -// Note 2: If the calling convention leaves holes in the incoming argument -// area, those holes are owned by SELF. Holes in the outgoing area -// are owned by the CALLEE. Holes should not be necessary in the -// incoming area, as the Java calling convention is completely under -// the control of the AD file. Doubles can be sorted and packed to -// avoid holes. Holes in the outgoing arguments may be necessary for -// varargs C calling conventions. -// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is -// even aligned with pad0 as needed. -// Region 6 is even aligned. Region 6-7 is NOT even aligned; -// region 6-11 is even aligned; it may be padded out more so that -// the region from SP to FP meets the minimum stack alignment. - -frame %{ - // These three registers define part of the calling convention - // between compiled code and the interpreter. - inline_cache_reg(EAX); // Inline Cache Register - - // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] - cisc_spilling_operand_name(indOffset32); - - // Number of stack slots consumed by locking an object - sync_stack_slots(1); - - // Compiled code's Frame Pointer - frame_pointer(ESP); - // Interpreter stores its frame pointer in a register which is - // stored to the stack by I2CAdaptors. - // I2CAdaptors convert from interpreted java to compiled java. - interpreter_frame_pointer(EBP); - - // Stack alignment requirement - // Alignment size in bytes (128-bit -> 16 bytes) - stack_alignment(StackAlignmentInBytes); - - // Number of outgoing stack slots killed above the out_preserve_stack_slots - // for calls to C. Supports the var-args backing area for register parms. - varargs_C_out_slots_killed(0); - - // The after-PROLOG location of the return address. Location of - // return address specifies a type (REG or STACK) and a number - // representing the register number (i.e. - use a register name) or - // stack slot. - // Ret Addr is on stack in slot 0 if no locks or verification or alignment. - // Otherwise, it is above the locks and verification slot and alignment word - return_addr(STACK - 1 + - align_up((Compile::current()->in_preserve_stack_slots() + - Compile::current()->fixed_slots()), - stack_alignment_in_slots())); - - // Location of C & interpreter return values - c_return_value %{ - assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); - static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; - static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; - - // in SSE2+ mode we want to keep the FPU stack clean so pretend - // that C functions return float and double results in XMM0. - if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0_num); - if( ideal_reg == Op_RegF && UseSSE>=2 ) - return OptoRegPair(OptoReg::Bad,XMM0_num); - - return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); - %} - - // Location of return values - return_value %{ - assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); - static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; - static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; - if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0_num); - if( ideal_reg == Op_RegF && UseSSE>=1 ) - return OptoRegPair(OptoReg::Bad,XMM0_num); - return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); - %} - -%} - -//----------ATTRIBUTES--------------------------------------------------------- -//----------Operand Attributes------------------------------------------------- -op_attrib op_cost(0); // Required cost attribute - -//----------Instruction Attributes--------------------------------------------- -ins_attrib ins_cost(100); // Required cost attribute -ins_attrib ins_size(8); // Required size attribute (in bits) -ins_attrib ins_short_branch(0); // Required flag: is this instruction a - // non-matching short branch variant of some - // long branch? -ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) - // specifies the alignment that some part of the instruction (not - // necessarily the start) requires. If > 1, a compute_padding() - // function must be provided for the instruction - -//----------OPERANDS----------------------------------------------------------- -// Operand definitions must precede instruction definitions for correct parsing -// in the ADLC because operands constitute user defined types which are used in -// instruction definitions. - -//----------Simple Operands---------------------------------------------------- -// Immediate Operands -// Integer Immediate -operand immI() %{ - match(ConI); - - op_cost(10); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for test vs zero -operand immI_0() %{ - predicate(n->get_int() == 0); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for increment -operand immI_1() %{ - predicate(n->get_int() == 1); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for decrement -operand immI_M1() %{ - predicate(n->get_int() == -1); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Valid scale values for addressing modes -operand immI2() %{ - predicate(0 <= n->get_int() && (n->get_int() <= 3)); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -operand immI8() %{ - predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); - match(ConI); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -operand immU8() %{ - predicate((0 <= n->get_int()) && (n->get_int() <= 255)); - match(ConI); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -operand immI16() %{ - predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); - match(ConI); - - op_cost(10); - format %{ %} - interface(CONST_INTER); -%} - -// Int Immediate non-negative -operand immU31() -%{ - predicate(n->get_int() >= 0); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for long shifts -operand immI_32() %{ - predicate( n->get_int() == 32 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_1_31() %{ - predicate( n->get_int() >= 1 && n->get_int() <= 31 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_32_63() %{ - predicate( n->get_int() >= 32 && n->get_int() <= 63 ); - match(ConI); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immI_2() %{ - predicate( n->get_int() == 2 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_3() %{ - predicate( n->get_int() == 3 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_4() -%{ - predicate(n->get_int() == 4); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_8() -%{ - predicate(n->get_int() == 8); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Pointer Immediate -operand immP() %{ - match(ConP); - - op_cost(10); - format %{ %} - interface(CONST_INTER); -%} - -// Null Pointer Immediate -operand immP0() %{ - predicate( n->get_ptr() == 0 ); - match(ConP); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate -operand immL() %{ - match(ConL); - - op_cost(20); - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate zero -operand immL0() %{ - predicate( n->get_long() == 0L ); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate zero -operand immL_M1() %{ - predicate( n->get_long() == -1L ); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long immediate from 0 to 127. -// Used for a shorter form of long mul by 10. -operand immL_127() %{ - predicate((0 <= n->get_long()) && (n->get_long() <= 127)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate: low 32-bit mask -operand immL_32bits() %{ - predicate(n->get_long() == 0xFFFFFFFFL); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate: low 32-bit mask -operand immL32() %{ - predicate(n->get_long() == (int)(n->get_long())); - match(ConL); - op_cost(20); - - format %{ %} - interface(CONST_INTER); -%} - -//Double Immediate zero -operand immDPR0() %{ - // Do additional (and counter-intuitive) test against NaN to work around VC++ - // bug that generates code such that NaNs compare equal to 0.0 - predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Double Immediate one -operand immDPR1() %{ - predicate( UseSSE<=1 && n->getd() == 1.0 ); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Double Immediate -operand immDPR() %{ - predicate(UseSSE<=1); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -operand immD() %{ - predicate(UseSSE>=2); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Double Immediate zero -operand immD0() %{ - // Do additional (and counter-intuitive) test against NaN to work around VC++ - // bug that generates code such that NaNs compare equal to 0.0 AND do not - // compare equal to -0.0. - predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); - match(ConD); - - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate zero -operand immFPR0() %{ - predicate(UseSSE == 0 && n->getf() == 0.0F); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate one -operand immFPR1() %{ - predicate(UseSSE == 0 && n->getf() == 1.0F); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate -operand immFPR() %{ - predicate( UseSSE == 0 ); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate -operand immF() %{ - predicate(UseSSE >= 1); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate zero. Zero and not -0.0 -operand immF0() %{ - predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Immediates for special shifts (sign extend) - -// Constants for increment -operand immI_16() %{ - predicate( n->get_int() == 16 ); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -operand immI_24() %{ - predicate( n->get_int() == 24 ); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -// Constant for byte-wide masking -operand immI_255() %{ - predicate( n->get_int() == 255 ); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -// Constant for short-wide masking -operand immI_65535() %{ - predicate(n->get_int() == 65535); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -operand kReg() -%{ - constraint(ALLOC_IN_RC(vectmask_reg)); - match(RegVectMask); - format %{%} - interface(REG_INTER); -%} - -// Register Operands -// Integer Register -operand rRegI() %{ - constraint(ALLOC_IN_RC(int_reg)); - match(RegI); - match(xRegI); - match(eAXRegI); - match(eBXRegI); - match(eCXRegI); - match(eDXRegI); - match(eDIRegI); - match(eSIRegI); - - format %{ %} - interface(REG_INTER); -%} - -// Subset of Integer Register -operand xRegI(rRegI reg) %{ - constraint(ALLOC_IN_RC(int_x_reg)); - match(reg); - match(eAXRegI); - match(eBXRegI); - match(eCXRegI); - match(eDXRegI); - - format %{ %} - interface(REG_INTER); -%} - -// Special Registers -operand eAXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(eax_reg)); - match(reg); - match(rRegI); - - format %{ "EAX" %} - interface(REG_INTER); -%} - -// Special Registers -operand eBXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(ebx_reg)); - match(reg); - match(rRegI); - - format %{ "EBX" %} - interface(REG_INTER); -%} - -operand eCXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(ecx_reg)); - match(reg); - match(rRegI); - - format %{ "ECX" %} - interface(REG_INTER); -%} - -operand eDXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(edx_reg)); - match(reg); - match(rRegI); - - format %{ "EDX" %} - interface(REG_INTER); -%} - -operand eDIRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(edi_reg)); - match(reg); - match(rRegI); - - format %{ "EDI" %} - interface(REG_INTER); -%} - -operand nadxRegI() %{ - constraint(ALLOC_IN_RC(nadx_reg)); - match(RegI); - match(eBXRegI); - match(eCXRegI); - match(eSIRegI); - match(eDIRegI); - - format %{ %} - interface(REG_INTER); -%} - -operand ncxRegI() %{ - constraint(ALLOC_IN_RC(ncx_reg)); - match(RegI); - match(eAXRegI); - match(eDXRegI); - match(eSIRegI); - match(eDIRegI); - - format %{ %} - interface(REG_INTER); -%} - -// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg -// // -operand eSIRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(reg); - match(rRegI); - - format %{ "ESI" %} - interface(REG_INTER); -%} - -// Pointer Register -operand anyRegP() %{ - constraint(ALLOC_IN_RC(any_reg)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - match(eRegP); - - format %{ %} - interface(REG_INTER); -%} - -operand eRegP() %{ - constraint(ALLOC_IN_RC(int_reg)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - - format %{ %} - interface(REG_INTER); -%} - -operand rRegP() %{ - constraint(ALLOC_IN_RC(int_reg)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - - format %{ %} - interface(REG_INTER); -%} - -// On windows95, EBP is not safe to use for implicit null tests. -operand eRegP_no_EBP() %{ - constraint(ALLOC_IN_RC(int_reg_no_ebp)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - - op_cost(100); - format %{ %} - interface(REG_INTER); -%} - -operand pRegP() %{ - constraint(ALLOC_IN_RC(p_reg)); - match(RegP); - match(eBXRegP); - match(eDXRegP); - match(eSIRegP); - match(eDIRegP); - - format %{ %} - interface(REG_INTER); -%} - -// Special Registers -// Return a pointer value -operand eAXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(eax_reg)); - match(reg); - format %{ "EAX" %} - interface(REG_INTER); -%} - -// Used in AtomicAdd -operand eBXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(ebx_reg)); - match(reg); - format %{ "EBX" %} - interface(REG_INTER); -%} - -// Tail-call (interprocedural jump) to interpreter -operand eCXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(ecx_reg)); - match(reg); - format %{ "ECX" %} - interface(REG_INTER); -%} - -operand eDXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(edx_reg)); - match(reg); - format %{ "EDX" %} - interface(REG_INTER); -%} - -operand eSIRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(reg); - format %{ "ESI" %} - interface(REG_INTER); -%} - -// Used in rep stosw -operand eDIRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(edi_reg)); - match(reg); - format %{ "EDI" %} - interface(REG_INTER); -%} - -operand eRegL() %{ - constraint(ALLOC_IN_RC(long_reg)); - match(RegL); - match(eADXRegL); - - format %{ %} - interface(REG_INTER); -%} - -operand eADXRegL( eRegL reg ) %{ - constraint(ALLOC_IN_RC(eadx_reg)); - match(reg); - - format %{ "EDX:EAX" %} - interface(REG_INTER); -%} - -operand eBCXRegL( eRegL reg ) %{ - constraint(ALLOC_IN_RC(ebcx_reg)); - match(reg); - - format %{ "EBX:ECX" %} - interface(REG_INTER); -%} - -operand eBDPRegL( eRegL reg ) %{ - constraint(ALLOC_IN_RC(ebpd_reg)); - match(reg); - - format %{ "EBP:EDI" %} - interface(REG_INTER); -%} -// Special case for integer high multiply -operand eADXRegL_low_only() %{ - constraint(ALLOC_IN_RC(eadx_reg)); - match(RegL); - - format %{ "EAX" %} - interface(REG_INTER); -%} - -// Flags register, used as output of compare instructions -operand rFlagsReg() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - - format %{ "EFLAGS" %} - interface(REG_INTER); -%} - -// Flags register, used as output of compare instructions -operand eFlagsReg() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - - format %{ "EFLAGS" %} - interface(REG_INTER); -%} - -// Flags register, used as output of FLOATING POINT compare instructions -operand eFlagsRegU() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - - format %{ "EFLAGS_U" %} - interface(REG_INTER); -%} - -operand eFlagsRegUCF() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - predicate(false); - - format %{ "EFLAGS_U_CF" %} - interface(REG_INTER); -%} - -// Condition Code Register used by long compare -operand flagsReg_long_LTGE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_LTGE" %} - interface(REG_INTER); -%} -operand flagsReg_long_EQNE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_EQNE" %} - interface(REG_INTER); -%} -operand flagsReg_long_LEGT() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_LEGT" %} - interface(REG_INTER); -%} - -// Condition Code Register used by unsigned long compare -operand flagsReg_ulong_LTGE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_U_LTGE" %} - interface(REG_INTER); -%} -operand flagsReg_ulong_EQNE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_U_EQNE" %} - interface(REG_INTER); -%} -operand flagsReg_ulong_LEGT() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_U_LEGT" %} - interface(REG_INTER); -%} - -// Float register operands -operand regDPR() %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_reg)); - match(RegD); - match(regDPR1); - match(regDPR2); - format %{ %} - interface(REG_INTER); -%} - -operand regDPR1(regDPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_reg0)); - match(reg); - format %{ "FPR1" %} - interface(REG_INTER); -%} - -operand regDPR2(regDPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_reg1)); - match(reg); - format %{ "FPR2" %} - interface(REG_INTER); -%} - -operand regnotDPR1(regDPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_notreg0)); - match(reg); - format %{ %} - interface(REG_INTER); -%} - -// Float register operands -operand regFPR() %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_flt_reg)); - match(RegF); - match(regFPR1); - format %{ %} - interface(REG_INTER); -%} - -// Float register operands -operand regFPR1(regFPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_flt_reg0)); - match(reg); - format %{ "FPR1" %} - interface(REG_INTER); -%} - -// XMM Float register operands -operand regF() %{ - predicate( UseSSE>=1 ); - constraint(ALLOC_IN_RC(float_reg_legacy)); - match(RegF); - format %{ %} - interface(REG_INTER); -%} - -operand legRegF() %{ - predicate( UseSSE>=1 ); - constraint(ALLOC_IN_RC(float_reg_legacy)); - match(RegF); - format %{ %} - interface(REG_INTER); -%} - -// Float register operands -operand vlRegF() %{ - constraint(ALLOC_IN_RC(float_reg_vl)); - match(RegF); - - format %{ %} - interface(REG_INTER); -%} - -// XMM Double register operands -operand regD() %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(double_reg_legacy)); - match(RegD); - format %{ %} - interface(REG_INTER); -%} - -// Double register operands -operand legRegD() %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(double_reg_legacy)); - match(RegD); - format %{ %} - interface(REG_INTER); -%} - -operand vlRegD() %{ - constraint(ALLOC_IN_RC(double_reg_vl)); - match(RegD); - - format %{ %} - interface(REG_INTER); -%} - -//----------Memory Operands---------------------------------------------------- -// Direct Memory Operand -operand direct(immP addr) %{ - match(addr); - - format %{ "[$addr]" %} - interface(MEMORY_INTER) %{ - base(0xFFFFFFFF); - index(0x4); - scale(0x0); - disp($addr); - %} -%} - -// Indirect Memory Operand -operand indirect(eRegP reg) %{ - constraint(ALLOC_IN_RC(int_reg)); - match(reg); - - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp(0x0); - %} -%} - -// Indirect Memory Plus Short Offset Operand -operand indOffset8(eRegP reg, immI8 off) %{ - match(AddP reg off); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Long Offset Operand -operand indOffset32(eRegP reg, immI off) %{ - match(AddP reg off); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Long Offset Operand -operand indOffset32X(rRegI reg, immP off) %{ - match(AddP off reg); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Index Register Plus Offset Operand -operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ - match(AddP (AddP reg ireg) off); - - op_cost(10); - format %{"[$reg + $off + $ireg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Index Register Plus Offset Operand -operand indIndex(eRegP reg, rRegI ireg) %{ - match(AddP reg ireg); - - op_cost(10); - format %{"[$reg + $ireg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp(0x0); - %} -%} - -// // ------------------------------------------------------------------------- -// // 486 architecture doesn't support "scale * index + offset" with out a base -// // ------------------------------------------------------------------------- -// // Scaled Memory Operands -// // Indirect Memory Times Scale Plus Offset Operand -// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ -// match(AddP off (LShiftI ireg scale)); -// -// op_cost(10); -// format %{"[$off + $ireg << $scale]" %} -// interface(MEMORY_INTER) %{ -// base(0x4); -// index($ireg); -// scale($scale); -// disp($off); -// %} -// %} - -// Indirect Memory Times Scale Plus Index Register -operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ - match(AddP reg (LShiftI ireg scale)); - - op_cost(10); - format %{"[$reg + $ireg << $scale]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp(0x0); - %} -%} - -// Indirect Memory Times Scale Plus Index Register Plus Offset Operand -operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ - match(AddP (AddP reg (LShiftI ireg scale)) off); - - op_cost(10); - format %{"[$reg + $off + $ireg << $scale]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp($off); - %} -%} - -//----------Load Long Memory Operands------------------------------------------ -// The load-long idiom will use it's address expression again after loading -// the first word of the long. If the load-long destination overlaps with -// registers used in the addressing expression, the 2nd half will be loaded -// from a clobbered address. Fix this by requiring that load-long use -// address registers that do not overlap with the load-long target. - -// load-long support -operand load_long_RegP() %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(RegP); - match(eSIRegP); - op_cost(100); - format %{ %} - interface(REG_INTER); -%} - -// Indirect Memory Operand Long -operand load_long_indirect(load_long_RegP reg) %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(reg); - - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp(0x0); - %} -%} - -// Indirect Memory Plus Long Offset Operand -operand load_long_indOffset32(load_long_RegP reg, immI off) %{ - match(AddP reg off); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -opclass load_long_memory(load_long_indirect, load_long_indOffset32); - - -//----------Special Memory Operands-------------------------------------------- -// Stack Slot Operand - This operand is used for loading and storing temporary -// values on the stack where a match requires a value to -// flow through memory. -operand stackSlotP(sRegP reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotI(sRegI reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotF(sRegF reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotD(sRegD reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotL(sRegL reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -//----------Conditional Branch Operands---------------------------------------- -// Comparison Op - This is the operation of the comparison, and is limited to -// the following set of codes: -// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) -// -// Other attributes of the comparison, such as unsignedness, are specified -// by the comparison instruction that sets a condition code flags register. -// That result is represented by a flags operand whose subtype is appropriate -// to the unsignedness (etc.) of the comparison. -// -// Later, the instruction which matches both the Comparison Op (a Bool) and -// the flags (produced by the Cmp) specifies the coding of the comparison op -// by matching a specific subtype of Bool operand below, such as cmpOpU. - -// Comparison Code -operand cmpOp() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0xC, "l"); - greater_equal(0xD, "ge"); - less_equal(0xE, "le"); - greater(0xF, "g"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Comparison Code, unsigned compare. Used by FP also, with -// C2 (unordered) turned into GT or LT already. The other bits -// C0 and C3 are turned into Carry & Zero flags. -operand cmpOpU() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x2, "b"); - greater_equal(0x3, "nb"); - less_equal(0x6, "be"); - greater(0x7, "nbe"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Floating comparisons that don't require any fixup for the unordered case -operand cmpOpUCF() %{ - match(Bool); - predicate(n->as_Bool()->_test._test == BoolTest::lt || - n->as_Bool()->_test._test == BoolTest::ge || - n->as_Bool()->_test._test == BoolTest::le || - n->as_Bool()->_test._test == BoolTest::gt); - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x2, "b"); - greater_equal(0x3, "nb"); - less_equal(0x6, "be"); - greater(0x7, "nbe"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - - -// Floating comparisons that can be fixed up with extra conditional jumps -operand cmpOpUCF2() %{ - match(Bool); - predicate(n->as_Bool()->_test._test == BoolTest::ne || - n->as_Bool()->_test._test == BoolTest::eq); - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x2, "b"); - greater_equal(0x3, "nb"); - less_equal(0x6, "be"); - greater(0x7, "nbe"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Comparison Code for FP conditional move -operand cmpOp_fcmov() %{ - match(Bool); - - predicate(n->as_Bool()->_test._test != BoolTest::overflow && - n->as_Bool()->_test._test != BoolTest::no_overflow); - format %{ "" %} - interface(COND_INTER) %{ - equal (0x0C8); - not_equal (0x1C8); - less (0x0C0); - greater_equal(0x1C0); - less_equal (0x0D0); - greater (0x1D0); - overflow(0x0, "o"); // not really supported by the instruction - no_overflow(0x1, "no"); // not really supported by the instruction - %} -%} - -// Comparison Code used in long compares -operand cmpOp_commute() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0xF, "g"); - greater_equal(0xE, "le"); - less_equal(0xD, "ge"); - greater(0xC, "l"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Comparison Code used in unsigned long compares -operand cmpOpU_commute() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x7, "nbe"); - greater_equal(0x6, "be"); - less_equal(0x3, "nb"); - greater(0x2, "b"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -//----------OPERAND CLASSES---------------------------------------------------- -// Operand Classes are groups of operands that are used as to simplify -// instruction definitions by not requiring the AD writer to specify separate -// instructions for every form of operand when the instruction accepts -// multiple operand types with the same basic encoding and format. The classic -// case of this is memory operands. - -opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, - indIndex, indIndexScale, indIndexScaleOffset); - -// Long memory operations are encoded in 2 instructions and a +4 offset. -// This means some kind of offset is always required and you cannot use -// an oop as the offset (done when working on static globals). -opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, - indIndex, indIndexScale, indIndexScaleOffset); - - -//----------PIPELINE----------------------------------------------------------- -// Rules which define the behavior of the target architectures pipeline. -pipeline %{ - -//----------ATTRIBUTES--------------------------------------------------------- -attributes %{ - variable_size_instructions; // Fixed size instructions - max_instructions_per_bundle = 3; // Up to 3 instructions per bundle - instruction_unit_size = 1; // An instruction is 1 bytes long - instruction_fetch_unit_size = 16; // The processor fetches one line - instruction_fetch_units = 1; // of 16 bytes - - // List of nop instructions - nops( MachNop ); -%} - -//----------RESOURCES---------------------------------------------------------- -// Resources are the functional units available to the machine - -// Generic P2/P3 pipeline -// 3 decoders, only D0 handles big operands; a "bundle" is the limit of -// 3 instructions decoded per cycle. -// 2 load/store ops per cycle, 1 branch, 1 FPU, -// 2 ALU op, only ALU0 handles mul/div instructions. -resources( D0, D1, D2, DECODE = D0 | D1 | D2, - MS0, MS1, MEM = MS0 | MS1, - BR, FPU, - ALU0, ALU1, ALU = ALU0 | ALU1 ); - -//----------PIPELINE DESCRIPTION----------------------------------------------- -// Pipeline Description specifies the stages in the machine's pipeline - -// Generic P2/P3 pipeline -pipe_desc(S0, S1, S2, S3, S4, S5); - -//----------PIPELINE CLASSES--------------------------------------------------- -// Pipeline Classes describe the stages in which input and output are -// referenced by the hardware pipeline. - -// Naming convention: ialu or fpu -// Then: _reg -// Then: _reg if there is a 2nd register -// Then: _long if it's a pair of instructions implementing a long -// Then: _fat if it requires the big decoder -// Or: _mem if it requires the big decoder and a memory unit. - -// Integer ALU reg operation -pipe_class ialu_reg(rRegI dst) %{ - single_instruction; - dst : S4(write); - dst : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Long ALU reg operation -pipe_class ialu_reg_long(eRegL dst) %{ - instruction_count(2); - dst : S4(write); - dst : S3(read); - DECODE : S0(2); // any 2 decoders - ALU : S3(2); // both alus -%} - -// Integer ALU reg operation using big decoder -pipe_class ialu_reg_fat(rRegI dst) %{ - single_instruction; - dst : S4(write); - dst : S3(read); - D0 : S0; // big decoder only - ALU : S3; // any alu -%} - -// Long ALU reg operation using big decoder -pipe_class ialu_reg_long_fat(eRegL dst) %{ - instruction_count(2); - dst : S4(write); - dst : S3(read); - D0 : S0(2); // big decoder only; twice - ALU : S3(2); // any 2 alus -%} - -// Integer ALU reg-reg operation -pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Long ALU reg-reg operation -pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ - instruction_count(2); - dst : S4(write); - src : S3(read); - DECODE : S0(2); // any 2 decoders - ALU : S3(2); // both alus -%} - -// Integer ALU reg-reg operation -pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - D0 : S0; // big decoder only - ALU : S3; // any alu -%} - -// Long ALU reg-reg operation -pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ - instruction_count(2); - dst : S4(write); - src : S3(read); - D0 : S0(2); // big decoder only; twice - ALU : S3(2); // both alus -%} - -// Integer ALU reg-mem operation -pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ - single_instruction; - dst : S5(write); - mem : S3(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; // any mem -%} - -// Long ALU reg-mem operation -pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ - instruction_count(2); - dst : S5(write); - mem : S3(read); - D0 : S0(2); // big decoder only; twice - ALU : S4(2); // any 2 alus - MEM : S3(2); // both mems -%} - -// Integer mem operation (prefetch) -pipe_class ialu_mem(memory mem) -%{ - single_instruction; - mem : S3(read); - D0 : S0; // big decoder only - MEM : S3; // any mem -%} - -// Integer Store to Memory -pipe_class ialu_mem_reg(memory mem, rRegI src) %{ - single_instruction; - mem : S3(read); - src : S5(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; -%} - -// Long Store to Memory -pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ - instruction_count(2); - mem : S3(read); - src : S5(read); - D0 : S0(2); // big decoder only; twice - ALU : S4(2); // any 2 alus - MEM : S3(2); // Both mems -%} - -// Integer Store to Memory -pipe_class ialu_mem_imm(memory mem) %{ - single_instruction; - mem : S3(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; -%} - -// Integer ALU0 reg-reg operation -pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - D0 : S0; // Big decoder only - ALU0 : S3; // only alu0 -%} - -// Integer ALU0 reg-mem operation -pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ - single_instruction; - dst : S5(write); - mem : S3(read); - D0 : S0; // big decoder only - ALU0 : S4; // ALU0 only - MEM : S3; // any mem -%} - -// Integer ALU reg-reg operation -pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ - single_instruction; - cr : S4(write); - src1 : S3(read); - src2 : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Integer ALU reg-imm operation -pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ - single_instruction; - cr : S4(write); - src1 : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Integer ALU reg-mem operation -pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ - single_instruction; - cr : S4(write); - src1 : S3(read); - src2 : S3(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; -%} - -// Conditional move reg-reg -pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ - instruction_count(4); - y : S4(read); - q : S3(read); - p : S3(read); - DECODE : S0(4); // any decoder -%} - -// Conditional move reg-reg -pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0; // any decoder -%} - -// Conditional move reg-mem -pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0; // any decoder - MEM : S3; -%} - -// Conditional move reg-reg long -pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0(2); // any 2 decoders -%} - -// Conditional move double reg-reg -pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0; // any decoder -%} - -// Float reg-reg operation -pipe_class fpu_reg(regDPR dst) %{ - instruction_count(2); - dst : S3(read); - DECODE : S0(2); // any 2 decoders - FPU : S3; -%} - -// Float reg-reg operation -pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ - instruction_count(2); - dst : S4(write); - src : S3(read); - DECODE : S0(2); // any 2 decoders - FPU : S3; -%} - -// Float reg-reg operation -pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ - instruction_count(3); - dst : S4(write); - src1 : S3(read); - src2 : S3(read); - DECODE : S0(3); // any 3 decoders - FPU : S3(2); -%} - -// Float reg-reg operation -pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ - instruction_count(4); - dst : S4(write); - src1 : S3(read); - src2 : S3(read); - src3 : S3(read); - DECODE : S0(4); // any 3 decoders - FPU : S3(2); -%} - -// Float reg-reg operation -pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ - instruction_count(4); - dst : S4(write); - src1 : S3(read); - src2 : S3(read); - src3 : S3(read); - DECODE : S1(3); // any 3 decoders - D0 : S0; // Big decoder only - FPU : S3(2); - MEM : S3; -%} - -// Float reg-mem operation -pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ - instruction_count(2); - dst : S5(write); - mem : S3(read); - D0 : S0; // big decoder only - DECODE : S1; // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// Float reg-mem operation -pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ - instruction_count(3); - dst : S5(write); - src1 : S3(read); - mem : S3(read); - D0 : S0; // big decoder only - DECODE : S1(2); // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// Float mem-reg operation -pipe_class fpu_mem_reg(memory mem, regDPR src) %{ - instruction_count(2); - src : S5(read); - mem : S3(read); - DECODE : S0; // any decoder for FPU PUSH - D0 : S1; // big decoder only - FPU : S4; - MEM : S3; // any mem -%} - -pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ - instruction_count(3); - src1 : S3(read); - src2 : S3(read); - mem : S3(read); - DECODE : S0(2); // any decoder for FPU PUSH - D0 : S1; // big decoder only - FPU : S4; - MEM : S3; // any mem -%} - -pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ - instruction_count(3); - src1 : S3(read); - src2 : S3(read); - mem : S4(read); - DECODE : S0; // any decoder for FPU PUSH - D0 : S0(2); // big decoder only - FPU : S4; - MEM : S3(2); // any mem -%} - -pipe_class fpu_mem_mem(memory dst, memory src1) %{ - instruction_count(2); - src1 : S3(read); - dst : S4(read); - D0 : S0(2); // big decoder only - MEM : S3(2); // any mem -%} - -pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ - instruction_count(3); - src1 : S3(read); - src2 : S3(read); - dst : S4(read); - D0 : S0(3); // big decoder only - FPU : S4; - MEM : S3(3); // any mem -%} - -pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ - instruction_count(3); - src1 : S4(read); - mem : S4(read); - DECODE : S0; // any decoder for FPU PUSH - D0 : S0(2); // big decoder only - FPU : S4; - MEM : S3(2); // any mem -%} - -// Float load constant -pipe_class fpu_reg_con(regDPR dst) %{ - instruction_count(2); - dst : S5(write); - D0 : S0; // big decoder only for the load - DECODE : S1; // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// Float load constant -pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ - instruction_count(3); - dst : S5(write); - src : S3(read); - D0 : S0; // big decoder only for the load - DECODE : S1(2); // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// UnConditional branch -pipe_class pipe_jmp( label labl ) %{ - single_instruction; - BR : S3; -%} - -// Conditional branch -pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ - single_instruction; - cr : S1(read); - BR : S3; -%} - -// Allocation idiom -pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ - instruction_count(1); force_serialization; - fixed_latency(6); - heap_ptr : S3(read); - DECODE : S0(3); - D0 : S2; - MEM : S3; - ALU : S3(2); - dst : S5(write); - BR : S5; -%} - -// Generic big/slow expanded idiom -pipe_class pipe_slow( ) %{ - instruction_count(10); multiple_bundles; force_serialization; - fixed_latency(100); - D0 : S0(2); - MEM : S3(2); -%} - -// The real do-nothing guy -pipe_class empty( ) %{ - instruction_count(0); -%} - -// Define the class for the Nop node -define %{ - MachNop = empty; -%} - -%} - -//----------INSTRUCTIONS------------------------------------------------------- -// -// match -- States which machine-independent subtree may be replaced -// by this instruction. -// ins_cost -- The estimated cost of this instruction is used by instruction -// selection to identify a minimum cost tree of machine -// instructions that matches a tree of machine-independent -// instructions. -// format -- A string providing the disassembly for this instruction. -// The value of an instruction's operand may be inserted -// by referring to it with a '$' prefix. -// opcode -- Three instruction opcodes may be provided. These are referred -// to within an encode class as $primary, $secondary, and $tertiary -// respectively. The primary opcode is commonly used to -// indicate the type of machine instruction, while secondary -// and tertiary are often used for prefix options or addressing -// modes. -// ins_encode -- A list of encode classes with parameters. The encode class -// name must have been defined in an 'enc_class' specification -// in the encode section of the architecture description. - -// Dummy reg-to-reg vector moves. Removed during post-selection cleanup. -// Load Float -instruct MoveF2LEG(legRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveLEG2F(regF dst, legRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveF2VL(vlRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveVL2F(regF dst, vlRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - - - -// Load Double -instruct MoveD2LEG(legRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveLEG2D(regD dst, legRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveD2VL(vlRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveVL2D(regD dst, vlRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -//----------BSWAP-Instruction-------------------------------------------------- -instruct bytes_reverse_int(rRegI dst) %{ - match(Set dst (ReverseBytesI dst)); - - format %{ "BSWAP $dst" %} - opcode(0x0F, 0xC8); - ins_encode( OpcP, OpcSReg(dst) ); - ins_pipe( ialu_reg ); -%} - -instruct bytes_reverse_long(eRegL dst) %{ - match(Set dst (ReverseBytesL dst)); - - format %{ "BSWAP $dst.lo\n\t" - "BSWAP $dst.hi\n\t" - "XCHG $dst.lo $dst.hi" %} - - ins_cost(125); - ins_encode( bswap_long_bytes(dst) ); - ins_pipe( ialu_reg_reg); -%} - -instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ - match(Set dst (ReverseBytesUS dst)); - effect(KILL cr); - - format %{ "BSWAP $dst\n\t" - "SHR $dst,16\n\t" %} - ins_encode %{ - __ bswapl($dst$$Register); - __ shrl($dst$$Register, 16); - %} - ins_pipe( ialu_reg ); -%} - -instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ - match(Set dst (ReverseBytesS dst)); - effect(KILL cr); - - format %{ "BSWAP $dst\n\t" - "SAR $dst,16\n\t" %} - ins_encode %{ - __ bswapl($dst$$Register); - __ sarl($dst$$Register, 16); - %} - ins_pipe( ialu_reg ); -%} - - -//---------- Zeros Count Instructions ------------------------------------------ - -instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosI src)); - effect(KILL cr); - - format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} - ins_encode %{ - __ lzcntl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(!UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosI src)); - effect(KILL cr); - - format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" - "JNZ skip\n\t" - "MOV $dst, -1\n" - "skip:\n\t" - "NEG $dst\n\t" - "ADD $dst, 31" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label skip; - __ bsrl(Rdst, Rsrc); - __ jccb(Assembler::notZero, skip); - __ movl(Rdst, -1); - __ bind(skip); - __ negl(Rdst); - __ addl(Rdst, BitsPerInt - 1); - %} - ins_pipe(ialu_reg); -%} - -instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" - "JNC done\n\t" - "LZCNT $dst, $src.lo\n\t" - "ADD $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label done; - __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ jccb(Assembler::carryClear, done); - __ lzcntl(Rdst, Rsrc); - __ addl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(!UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" - "JZ msw_is_zero\n\t" - "ADD $dst, 32\n\t" - "JMP not_zero\n" - "msw_is_zero:\n\t" - "BSR $dst, $src.lo\n\t" - "JNZ not_zero\n\t" - "MOV $dst, -1\n" - "not_zero:\n\t" - "NEG $dst\n\t" - "ADD $dst, 63\n" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label msw_is_zero; - Label not_zero; - __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ jccb(Assembler::zero, msw_is_zero); - __ addl(Rdst, BitsPerInt); - __ jmpb(not_zero); - __ bind(msw_is_zero); - __ bsrl(Rdst, Rsrc); - __ jccb(Assembler::notZero, not_zero); - __ movl(Rdst, -1); - __ bind(not_zero); - __ negl(Rdst); - __ addl(Rdst, BitsPerLong - 1); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosI src)); - effect(KILL cr); - - format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} - ins_encode %{ - __ tzcntl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(!UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosI src)); - effect(KILL cr); - - format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" - "JNZ done\n\t" - "MOV $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Label done; - __ bsfl(Rdst, $src$$Register); - __ jccb(Assembler::notZero, done); - __ movl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" - "JNC done\n\t" - "TZCNT $dst, $src.hi\n\t" - "ADD $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label done; - __ tzcntl(Rdst, Rsrc); - __ jccb(Assembler::carryClear, done); - __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ addl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(!UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" - "JNZ done\n\t" - "BSF $dst, $src.hi\n\t" - "JNZ msw_not_zero\n\t" - "MOV $dst, 32\n" - "msw_not_zero:\n\t" - "ADD $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label msw_not_zero; - Label done; - __ bsfl(Rdst, Rsrc); - __ jccb(Assembler::notZero, done); - __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ jccb(Assembler::notZero, msw_not_zero); - __ movl(Rdst, BitsPerInt); - __ bind(msw_not_zero); - __ addl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - - -//---------- Population Count Instructions ------------------------------------- - -instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountI src)); - effect(KILL cr); - - format %{ "POPCNT $dst, $src" %} - ins_encode %{ - __ popcntl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountI (LoadI mem))); - effect(KILL cr); - - format %{ "POPCNT $dst, $mem" %} - ins_encode %{ - __ popcntl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg); -%} - -// Note: Long.bitCount(long) returns an int. -instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountL src)); - effect(KILL cr, TEMP tmp, TEMP dst); - - format %{ "POPCNT $dst, $src.lo\n\t" - "POPCNT $tmp, $src.hi\n\t" - "ADD $dst, $tmp" %} - ins_encode %{ - __ popcntl($dst$$Register, $src$$Register); - __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); - __ addl($dst$$Register, $tmp$$Register); - %} - ins_pipe(ialu_reg); -%} - -// Note: Long.bitCount(long) returns an int. -instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountL (LoadL mem))); - effect(KILL cr, TEMP tmp, TEMP dst); - - format %{ "POPCNT $dst, $mem\n\t" - "POPCNT $tmp, $mem+4\n\t" - "ADD $dst, $tmp" %} - ins_encode %{ - //__ popcntl($dst$$Register, $mem$$Address$$first); - //__ popcntl($tmp$$Register, $mem$$Address$$second); - __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); - __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); - __ addl($dst$$Register, $tmp$$Register); - %} - ins_pipe(ialu_reg); -%} - - -//----------Load/Store/Move Instructions--------------------------------------- -//----------Load Instructions-------------------------------------------------- -// Load Byte (8bit signed) -instruct loadB(xRegI dst, memory mem) %{ - match(Set dst (LoadB mem)); - - ins_cost(125); - format %{ "MOVSX8 $dst,$mem\t# byte" %} - - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Byte (8bit signed) into Long Register -instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadB mem))); - effect(KILL cr); - - ins_cost(375); - format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "SAR $dst.hi,7" %} - - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. - __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Byte (8bit UNsigned) -instruct loadUB(xRegI dst, memory mem) %{ - match(Set dst (LoadUB mem)); - - ins_cost(125); - format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} - - ins_encode %{ - __ movzbl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Byte (8 bit UNsigned) into Long Register -instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadUB mem))); - effect(KILL cr); - - ins_cost(250); - format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register -instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); - effect(KILL cr); - - format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" - "XOR $dst.hi,$dst.hi\n\t" - "AND $dst.lo,right_n_bits($mask, 8)" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - __ andl(Rdst, $mask$$constant & right_n_bits(8)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Short (16bit signed) -instruct loadS(rRegI dst, memory mem) %{ - match(Set dst (LoadS mem)); - - ins_cost(125); - format %{ "MOVSX $dst,$mem\t# short" %} - - ins_encode %{ - __ movswl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Short (16 bit signed) to Byte (8 bit signed) -instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# short -> byte" %} - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Short (16bit signed) into Long Register -instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadS mem))); - effect(KILL cr); - - ins_cost(375); - format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "SAR $dst.hi,15" %} - - ins_encode %{ - __ movswl($dst$$Register, $mem$$Address); - __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. - __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16bit unsigned) -instruct loadUS(rRegI dst, memory mem) %{ - match(Set dst (LoadUS mem)); - - ins_cost(125); - format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} - - ins_encode %{ - __ movzwl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) -instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) into Long Register -instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadUS mem))); - effect(KILL cr); - - ins_cost(250); - format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - - ins_encode %{ - __ movzwl($dst$$Register, $mem$$Address); - __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register -instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); - effect(KILL cr); - - format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register -instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); - effect(KILL cr); - - format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" - "XOR $dst.hi,$dst.hi\n\t" - "AND $dst.lo,right_n_bits($mask, 16)" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzwl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - __ andl(Rdst, $mask$$constant & right_n_bits(16)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer -instruct loadI(rRegI dst, memory mem) %{ - match(Set dst (LoadI mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem\t# int" %} - - ins_encode %{ - __ movl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Byte (8 bit signed) -instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# int -> byte" %} - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) -instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ - match(Set dst (AndI (LoadI mem) mask)); - - ins_cost(125); - format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} - ins_encode %{ - __ movzbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Short (16 bit signed) -instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ - match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# int -> short" %} - ins_encode %{ - __ movswl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) -instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ - match(Set dst (AndI (LoadI mem) mask)); - - ins_cost(125); - format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} - ins_encode %{ - __ movzwl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer into Long Register -instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadI mem))); - effect(KILL cr); - - ins_cost(375); - format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "SAR $dst.hi,31" %} - - ins_encode %{ - __ movl($dst$$Register, $mem$$Address); - __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. - __ sarl(HIGH_FROM_LOW($dst$$Register), 31); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Integer with mask 0xFF into Long Register -instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - effect(KILL cr); - - format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer with mask 0xFFFF into Long Register -instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - effect(KILL cr); - - format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzwl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer with 31-bit mask into Long Register -instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - effect(KILL cr); - - format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" - "XOR $dst.hi,$dst.hi\n\t" - "AND $dst.lo,$mask" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - __ andl(Rdst, $mask$$constant); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Integer into Long Register -instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ - match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); - effect(KILL cr); - - ins_cost(250); - format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - - ins_encode %{ - __ movl($dst$$Register, $mem$$Address); - __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Long. Cannot clobber address while loading, so restrict address -// register to ESI -instruct loadL(eRegL dst, load_long_memory mem) %{ - predicate(!((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - - ins_cost(250); - format %{ "MOV $dst.lo,$mem\t# long\n\t" - "MOV $dst.hi,$mem+4" %} - - ins_encode %{ - Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); - __ movl($dst$$Register, Amemlo); - __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); - %} - - ins_pipe(ialu_reg_long_mem); -%} - -// Volatile Load Long. Must be atomic, so do 64-bit FILD -// then store it down to the stack and reload on the int -// side. -instruct loadL_volatile(stackSlotL dst, memory mem) %{ - predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - - ins_cost(200); - format %{ "FILD $mem\t# Atomic volatile long load\n\t" - "FISTp $dst" %} - ins_encode(enc_loadL_volatile(mem,dst)); - ins_pipe( fpu_reg_mem ); -%} - -instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ - predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - effect(TEMP tmp); - ins_cost(180); - format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" - "MOVSD $dst,$tmp" %} - ins_encode %{ - __ movdbl($tmp$$XMMRegister, $mem$$Address); - __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ - predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - effect(TEMP tmp); - ins_cost(160); - format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" - "MOVD $dst.lo,$tmp\n\t" - "PSRLQ $tmp,32\n\t" - "MOVD $dst.hi,$tmp" %} - ins_encode %{ - __ movdbl($tmp$$XMMRegister, $mem$$Address); - __ movdl($dst$$Register, $tmp$$XMMRegister); - __ psrlq($tmp$$XMMRegister, 32); - __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Load Range -instruct loadRange(rRegI dst, memory mem) %{ - match(Set dst (LoadRange mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - - -// Load Pointer -instruct loadP(eRegP dst, memory mem) %{ - match(Set dst (LoadP mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -// Load Klass Pointer -instruct loadKlass(eRegP dst, memory mem) %{ - match(Set dst (LoadKlass mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -// Load Double -instruct loadDPR(regDPR dst, memory mem) %{ - predicate(UseSSE<=1); - match(Set dst (LoadD mem)); - - ins_cost(150); - format %{ "FLD_D ST,$mem\n\t" - "FSTP $dst" %} - opcode(0xDD); /* DD /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Load Double to XMM -instruct loadD(regD dst, memory mem) %{ - predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); - match(Set dst (LoadD mem)); - ins_cost(145); - format %{ "MOVSD $dst,$mem" %} - ins_encode %{ - __ movdbl ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -instruct loadD_partial(regD dst, memory mem) %{ - predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); - match(Set dst (LoadD mem)); - ins_cost(145); - format %{ "MOVLPD $dst,$mem" %} - ins_encode %{ - __ movdbl ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load to XMM register (single-precision floating point) -// MOVSS instruction -instruct loadF(regF dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (LoadF mem)); - ins_cost(145); - format %{ "MOVSS $dst,$mem" %} - ins_encode %{ - __ movflt ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Float -instruct loadFPR(regFPR dst, memory mem) %{ - predicate(UseSSE==0); - match(Set dst (LoadF mem)); - - ins_cost(150); - format %{ "FLD_S ST,$mem\n\t" - "FSTP $dst" %} - opcode(0xD9); /* D9 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Load Effective Address -instruct leaP8(eRegP dst, indOffset8 mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaP32(eRegP dst, indOffset32 mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -// Load Constant -instruct loadConI(rRegI dst, immI src) %{ - match(Set dst src); - - format %{ "MOV $dst,$src" %} - ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark ); - ins_pipe( ialu_reg_fat ); -%} - -// Load Constant zero -instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - - ins_cost(50); - format %{ "XOR $dst,$dst" %} - opcode(0x33); /* + rd */ - ins_encode( OpcP, RegReg( dst, dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct loadConP(eRegP dst, immP src) %{ - match(Set dst src); - - format %{ "MOV $dst,$src" %} - opcode(0xB8); /* + rd */ - ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark ); - ins_pipe( ialu_reg_fat ); -%} - -instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - ins_cost(200); - format %{ "MOV $dst.lo,$src.lo\n\t" - "MOV $dst.hi,$src.hi" %} - opcode(0xB8); - ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); - ins_pipe( ialu_reg_long_fat ); -%} - -instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - ins_cost(150); - format %{ "XOR $dst.lo,$dst.lo\n\t" - "XOR $dst.hi,$dst.hi" %} - opcode(0x33,0x33); - ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); - ins_pipe( ialu_reg_long ); -%} - -// The instruction usage is guarded by predicate in operand immFPR(). -instruct loadConFPR(regFPR dst, immFPR con) %{ - match(Set dst con); - ins_cost(125); - format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_s($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immFPR0(). -instruct loadConFPR0(regFPR dst, immFPR0 con) %{ - match(Set dst con); - ins_cost(125); - format %{ "FLDZ ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fldz(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immFPR1(). -instruct loadConFPR1(regFPR dst, immFPR1 con) %{ - match(Set dst con); - ins_cost(125); - format %{ "FLD1 ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld1(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immF(). -instruct loadConF(regF dst, immF con) %{ - match(Set dst con); - ins_cost(125); - format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -// The instruction usage is guarded by predicate in operand immF0(). -instruct loadConF0(regF dst, immF0 src) %{ - match(Set dst src); - ins_cost(100); - format %{ "XORPS $dst,$dst\t# float 0.0" %} - ins_encode %{ - __ xorps($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe(pipe_slow); -%} - -// The instruction usage is guarded by predicate in operand immDPR(). -instruct loadConDPR(regDPR dst, immDPR con) %{ - match(Set dst con); - ins_cost(125); - - format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immDPR0(). -instruct loadConDPR0(regDPR dst, immDPR0 con) %{ - match(Set dst con); - ins_cost(125); - - format %{ "FLDZ ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fldz(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immDPR1(). -instruct loadConDPR1(regDPR dst, immDPR1 con) %{ - match(Set dst con); - ins_cost(125); - - format %{ "FLD1 ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld1(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immD(). -instruct loadConD(regD dst, immD con) %{ - match(Set dst con); - ins_cost(125); - format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -// The instruction usage is guarded by predicate in operand immD0(). -instruct loadConD0(regD dst, immD0 src) %{ - match(Set dst src); - ins_cost(100); - format %{ "XORPD $dst,$dst\t# double 0.0" %} - ins_encode %{ - __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Load Stack Slot -instruct loadSSI(rRegI dst, stackSlotI src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "MOV $dst,$src" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -instruct loadSSL(eRegL dst, stackSlotL src) %{ - match(Set dst src); - - ins_cost(200); - format %{ "MOV $dst,$src.lo\n\t" - "MOV $dst+4,$src.hi" %} - opcode(0x8B, 0x8B); - ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - -// Load Stack Slot -instruct loadSSP(eRegP dst, stackSlotP src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "MOV $dst,$src" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -// Load Stack Slot -instruct loadSSF(regFPR dst, stackSlotF src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "FLD_S $src\n\t" - "FSTP $dst" %} - opcode(0xD9); /* D9 /0, FLD m32real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Load Stack Slot -instruct loadSSD(regDPR dst, stackSlotD src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "FLD_D $src\n\t" - "FSTP $dst" %} - opcode(0xDD); /* DD /0, FLD m64real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Prefetch instructions for allocation. -// Must be safe to execute with invalid address (cannot fault). - -instruct prefetchAlloc0( memory mem ) %{ - predicate(UseSSE==0 && AllocatePrefetchInstr!=3); - match(PrefetchAllocation mem); - ins_cost(0); - size(0); - format %{ "Prefetch allocation (non-SSE is empty encoding)" %} - ins_encode(); - ins_pipe(empty); -%} - -instruct prefetchAlloc( memory mem ) %{ - predicate(AllocatePrefetchInstr==3); - match( PrefetchAllocation mem ); - ins_cost(100); - - format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} - ins_encode %{ - __ prefetchw($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -instruct prefetchAllocNTA( memory mem ) %{ - predicate(UseSSE>=1 && AllocatePrefetchInstr==0); - match(PrefetchAllocation mem); - ins_cost(100); - - format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} - ins_encode %{ - __ prefetchnta($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -instruct prefetchAllocT0( memory mem ) %{ - predicate(UseSSE>=1 && AllocatePrefetchInstr==1); - match(PrefetchAllocation mem); - ins_cost(100); - - format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} - ins_encode %{ - __ prefetcht0($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -instruct prefetchAllocT2( memory mem ) %{ - predicate(UseSSE>=1 && AllocatePrefetchInstr==2); - match(PrefetchAllocation mem); - ins_cost(100); - - format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} - ins_encode %{ - __ prefetcht2($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -//----------Store Instructions------------------------------------------------- - -// Store Byte -instruct storeB(memory mem, xRegI src) %{ - match(Set mem (StoreB mem src)); - - ins_cost(125); - format %{ "MOV8 $mem,$src" %} - opcode(0x88); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Char/Short -instruct storeC(memory mem, rRegI src) %{ - match(Set mem (StoreC mem src)); - - ins_cost(125); - format %{ "MOV16 $mem,$src" %} - opcode(0x89, 0x66); - ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Integer -instruct storeI(memory mem, rRegI src) %{ - match(Set mem (StoreI mem src)); - - ins_cost(125); - format %{ "MOV $mem,$src" %} - opcode(0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Long -instruct storeL(long_memory mem, eRegL src) %{ - predicate(!((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - - ins_cost(200); - format %{ "MOV $mem,$src.lo\n\t" - "MOV $mem+4,$src.hi" %} - opcode(0x89, 0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - -// Store Long to Integer -instruct storeL2I(memory mem, eRegL src) %{ - match(Set mem (StoreI mem (ConvL2I src))); - - format %{ "MOV $mem,$src.lo\t# long -> int" %} - ins_encode %{ - __ movl($mem$$Address, $src$$Register); - %} - ins_pipe(ialu_mem_reg); -%} - -// Volatile Store Long. Must be atomic, so move it into -// the FP TOS and then do a 64-bit FIST. Has to probe the -// target address before the store (for null-ptr checks) -// so the memory operand is used twice in the encoding. -instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ - predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - effect( KILL cr ); - ins_cost(400); - format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" - "FILD $src\n\t" - "FISTp $mem\t # 64-bit atomic volatile long store" %} - opcode(0x3B); - ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ - predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - effect( TEMP tmp, KILL cr ); - ins_cost(380); - format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" - "MOVSD $tmp,$src\n\t" - "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} - ins_encode %{ - __ cmpl(rax, $mem$$Address); - __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); - __ movdbl($mem$$Address, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ - predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - effect( TEMP tmp2 , TEMP tmp, KILL cr ); - ins_cost(360); - format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" - "MOVD $tmp,$src.lo\n\t" - "MOVD $tmp2,$src.hi\n\t" - "PUNPCKLDQ $tmp,$tmp2\n\t" - "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} - ins_encode %{ - __ cmpl(rax, $mem$$Address); - __ movdl($tmp$$XMMRegister, $src$$Register); - __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdbl($mem$$Address, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Pointer; for storing unknown oops and raw pointers -instruct storeP(memory mem, anyRegP src) %{ - match(Set mem (StoreP mem src)); - - ins_cost(125); - format %{ "MOV $mem,$src" %} - opcode(0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Integer Immediate -instruct storeImmI(memory mem, immI src) %{ - match(Set mem (StoreI mem src)); - - ins_cost(150); - format %{ "MOV $mem,$src" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Short/Char Immediate -instruct storeImmI16(memory mem, immI16 src) %{ - predicate(UseStoreImmI16); - match(Set mem (StoreC mem src)); - - ins_cost(150); - format %{ "MOV16 $mem,$src" %} - opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ - ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Pointer Immediate; null pointers or constant oops that do not -// need card-mark barriers. -instruct storeImmP(memory mem, immP src) %{ - match(Set mem (StoreP mem src)); - - ins_cost(150); - format %{ "MOV $mem,$src" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Byte Immediate -instruct storeImmB(memory mem, immI8 src) %{ - match(Set mem (StoreB mem src)); - - ins_cost(150); - format %{ "MOV8 $mem,$src" %} - opcode(0xC6); /* C6 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Double -instruct storeDPR( memory mem, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set mem (StoreD mem src)); - - ins_cost(100); - format %{ "FST_D $mem,$src" %} - opcode(0xDD); /* DD /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store double does rounding on x86 -instruct storeDPR_rounded( memory mem, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set mem (StoreD mem (RoundDouble src))); - - ins_cost(100); - format %{ "FST_D $mem,$src\t# round" %} - opcode(0xDD); /* DD /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store XMM register to memory (double-precision floating points) -// MOVSD instruction -instruct storeD(memory mem, regD src) %{ - predicate(UseSSE>=2); - match(Set mem (StoreD mem src)); - ins_cost(95); - format %{ "MOVSD $mem,$src" %} - ins_encode %{ - __ movdbl($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store XMM register to memory (single-precision floating point) -// MOVSS instruction -instruct storeF(memory mem, regF src) %{ - predicate(UseSSE>=1); - match(Set mem (StoreF mem src)); - ins_cost(95); - format %{ "MOVSS $mem,$src" %} - ins_encode %{ - __ movflt($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - - -// Store Float -instruct storeFPR( memory mem, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set mem (StoreF mem src)); - - ins_cost(100); - format %{ "FST_S $mem,$src" %} - opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store Float does rounding on x86 -instruct storeFPR_rounded( memory mem, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set mem (StoreF mem (RoundFloat src))); - - ins_cost(100); - format %{ "FST_S $mem,$src\t# round" %} - opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store Float does rounding on x86 -instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set mem (StoreF mem (ConvD2F src))); - - ins_cost(100); - format %{ "FST_S $mem,$src\t# D-round" %} - opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store immediate Float value (it is faster than store from FPU register) -// The instruction usage is guarded by predicate in operand immFPR(). -instruct storeFPR_imm( memory mem, immFPR src) %{ - match(Set mem (StoreF mem src)); - - ins_cost(50); - format %{ "MOV $mem,$src\t# store float" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store immediate Float value (it is faster than store from XMM register) -// The instruction usage is guarded by predicate in operand immF(). -instruct storeF_imm( memory mem, immF src) %{ - match(Set mem (StoreF mem src)); - - ins_cost(50); - format %{ "MOV $mem,$src\t# store float" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Integer to stack slot -instruct storeSSI(stackSlotI dst, rRegI src) %{ - match(Set dst src); - - ins_cost(100); - format %{ "MOV $dst,$src" %} - opcode(0x89); - ins_encode( OpcPRegSS( dst, src ) ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Integer to stack slot -instruct storeSSP(stackSlotP dst, eRegP src) %{ - match(Set dst src); - - ins_cost(100); - format %{ "MOV $dst,$src" %} - opcode(0x89); - ins_encode( OpcPRegSS( dst, src ) ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Long to stack slot -instruct storeSSL(stackSlotL dst, eRegL src) %{ - match(Set dst src); - - ins_cost(200); - format %{ "MOV $dst,$src.lo\n\t" - "MOV $dst+4,$src.hi" %} - opcode(0x89, 0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - -//----------MemBar Instructions----------------------------------------------- -// Memory barrier flavors - -instruct membar_acquire() %{ - match(MemBarAcquire); - match(LoadFence); - ins_cost(400); - - size(0); - format %{ "MEMBAR-acquire ! (empty encoding)" %} - ins_encode(); - ins_pipe(empty); -%} - -instruct membar_acquire_lock() %{ - match(MemBarAcquireLock); - ins_cost(0); - - size(0); - format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_release() %{ - match(MemBarRelease); - match(StoreFence); - ins_cost(400); - - size(0); - format %{ "MEMBAR-release ! (empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_release_lock() %{ - match(MemBarReleaseLock); - ins_cost(0); - - size(0); - format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_volatile(eFlagsReg cr) %{ - match(MemBarVolatile); - effect(KILL cr); - ins_cost(400); - - format %{ - $$template - $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" - %} - ins_encode %{ - __ membar(Assembler::StoreLoad); - %} - ins_pipe(pipe_slow); -%} - -instruct unnecessary_membar_volatile() %{ - match(MemBarVolatile); - predicate(Matcher::post_store_load_barrier(n)); - ins_cost(0); - - size(0); - format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_storestore() %{ - match(MemBarStoreStore); - match(StoreStoreFence); - ins_cost(0); - - size(0); - format %{ "MEMBAR-storestore (empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -//----------Move Instructions-------------------------------------------------- -instruct castX2P(eAXRegP dst, eAXRegI src) %{ - match(Set dst (CastX2P src)); - format %{ "# X2P $dst, $src" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe(empty); -%} - -instruct castP2X(rRegI dst, eRegP src ) %{ - match(Set dst (CastP2X src)); - ins_cost(50); - format %{ "MOV $dst, $src\t# CastP2X" %} - ins_encode( enc_Copy( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -//----------Conditional Move--------------------------------------------------- -// Conditional move -instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ - predicate(!VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "J$cop,us skip\t# signed cmove\n\t" - "MOV $dst,$src\n" - "skip:" %} - ins_encode %{ - Label Lskip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); - __ movl($dst$$Register, $src$$Register); - __ bind(Lskip); - %} - ins_pipe( pipe_cmov_reg ); -%} - -instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ - predicate(!VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "J$cop,us skip\t# unsigned cmove\n\t" - "MOV $dst,$src\n" - "skip:" %} - ins_encode %{ - Label Lskip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); - __ movl($dst$$Register, $src$$Register); - __ bind(Lskip); - %} - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - cmovI_regU(cop, cr, dst, src); - %} -%} - -// Conditional move -instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -// Conditional move -instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovI_memU(cop, cr, dst, src); - %} -%} - -// Conditional move -instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src\t# ptr" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Conditional move (non-P6 version) -// Note: a CMoveP is generated for stubs and native wrappers -// regardless of whether we are on a P6, so we -// emulate a cmov here -instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(300); - format %{ "Jn$cop skip\n\t" - "MOV $dst,$src\t# pointer\n" - "skip:" %} - opcode(0x8b); - ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); - ins_pipe( pipe_cmov_reg ); -%} - -// Conditional move -instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src\t# ptr" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - cmovP_regU(cop, cr, dst, src); - %} -%} - -// DISABLED: Requires the ADLC to emit a bottom_type call that -// correctly meets the two pointer arguments; one is an incoming -// register but the other is a memory operand. ALSO appears to -// be buggy with implicit null checks. -// -//// Conditional move -//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); -// ins_cost(250); -// format %{ "CMOV$cop $dst,$src\t# ptr" %} -// opcode(0x0F,0x40); -// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); -// ins_pipe( pipe_cmov_mem ); -//%} -// -//// Conditional move -//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); -// ins_cost(250); -// format %{ "CMOV$cop $dst,$src\t# ptr" %} -// opcode(0x0F,0x40); -// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); -// ins_pipe( pipe_cmov_mem ); -//%} - -// Conditional move -instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "FCMOV$cop $dst,$src\t# double" %} - opcode(0xDA); - ins_encode( enc_cmov_dpr(cop,src) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// Conditional move -instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "FCMOV$cop $dst,$src\t# float" %} - opcode(0xDA); - ins_encode( enc_cmov_dpr(cop,src) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. -instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOV $dst,$src\t# double\n" - "skip:" %} - opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. -instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOV $dst,$src\t# float\n" - "skip:" %} - opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// No CMOVE with SSE/SSE2 -instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ - predicate (UseSSE>=1); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSS $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -// No CMOVE with SSE/SSE2 -instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ - predicate (UseSSE>=2); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSD $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -// unsigned version -instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ - predicate (UseSSE>=1); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSS $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ - predicate (UseSSE>=1); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regU(cop, cr, dst, src); - %} -%} - -// unsigned version -instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ - predicate (UseSSE>=2); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSD $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ - predicate (UseSSE>=2); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regU(cop, cr, dst, src); - %} -%} - -instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst.lo,$src.lo\n\t" - "CMOV$cop $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst.lo,$src.lo\n\t" - "CMOV$cop $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - cmovL_regU(cop, cr, dst, src); - %} -%} - -//----------Arithmetic Instructions-------------------------------------------- -//----------Addition Instructions---------------------------------------------- - -// Integer Addition Instructions -instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (AddI dst src)); - effect(KILL cr); - - size(2); - format %{ "ADD $dst,$src" %} - opcode(0x03); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (AddI dst src)); - effect(KILL cr); - - format %{ "ADD $dst,$src" %} - opcode(0x81, 0x00); /* /0 id */ - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ - predicate(UseIncDec); - match(Set dst (AddI dst src)); - effect(KILL cr); - - size(1); - format %{ "INC $dst" %} - opcode(0x40); /* */ - ins_encode( Opc_plus( primary, dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ - match(Set dst (AddI src0 src1)); - ins_cost(110); - - format %{ "LEA $dst,[$src0 + $src1]" %} - opcode(0x8D); /* 0x8D /r */ - ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); - ins_pipe( ialu_reg_reg ); -%} - -instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ - match(Set dst (AddP src0 src1)); - ins_cost(110); - - format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} - opcode(0x8D); /* 0x8D /r */ - ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); - ins_pipe( ialu_reg_reg ); -%} - -instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ - predicate(UseIncDec); - match(Set dst (AddI dst src)); - effect(KILL cr); - - size(1); - format %{ "DEC $dst" %} - opcode(0x48); /* */ - ins_encode( Opc_plus( primary, dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (AddP dst src)); - effect(KILL cr); - - size(2); - format %{ "ADD $dst,$src" %} - opcode(0x03); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ - match(Set dst (AddP dst src)); - effect(KILL cr); - - format %{ "ADD $dst,$src" %} - opcode(0x81,0x00); /* Opcode 81 /0 id */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (AddI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "ADD $dst,$src" %} - opcode(0x03); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "ADD $dst,$src" %} - opcode(0x01); /* Opcode 01 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Add Memory with Immediate -instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "ADD $dst,$src" %} - opcode(0x81); /* Opcode 81 /0 id */ - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "INC $dst" %} - opcode(0xFF); /* Opcode FF /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "DEC $dst" %} - opcode(0xFF); /* Opcode FF /1 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - - -instruct checkCastPP( eRegP dst ) %{ - match(Set dst (CheckCastPP dst)); - - size(0); - format %{ "#checkcastPP of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_pipe( empty ); -%} - -instruct castPP( eRegP dst ) %{ - match(Set dst (CastPP dst)); - format %{ "#castPP of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_pipe( empty ); -%} - -instruct castII( rRegI dst ) %{ - match(Set dst (CastII dst)); - format %{ "#castII of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castLL( eRegL dst ) %{ - match(Set dst (CastLL dst)); - format %{ "#castLL of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castFF( regF dst ) %{ - predicate(UseSSE >= 1); - match(Set dst (CastFF dst)); - format %{ "#castFF of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castDD( regD dst ) %{ - predicate(UseSSE >= 2); - match(Set dst (CastDD dst)); - format %{ "#castDD of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castFF_PR( regFPR dst ) %{ - predicate(UseSSE < 1); - match(Set dst (CastFF dst)); - format %{ "#castFF of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castDD_PR( regDPR dst ) %{ - predicate(UseSSE < 2); - match(Set dst (CastDD dst)); - format %{ "#castDD of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them - -instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ - match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchg8(mem_ptr), - enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ - match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ - match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchgb(mem_ptr), - enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ - match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchgw(mem_ptr), - enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ - match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchg8(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchg(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchgb(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchgw(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchg(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddB mem add)); - effect(KILL cr); - format %{ "ADDB [$mem],$add" %} - ins_encode %{ - __ lock(); - __ addb($mem$$Address, $add$$constant); - %} - ins_pipe( pipe_cmpxchg ); -%} - -// Important to match to xRegI: only 8-bit regs. -instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ - match(Set newval (GetAndAddB mem newval)); - effect(KILL cr); - format %{ "XADDB [$mem],$newval" %} - ins_encode %{ - __ lock(); - __ xaddb($mem$$Address, $newval$$Register); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddS mem add)); - effect(KILL cr); - format %{ "ADDS [$mem],$add" %} - ins_encode %{ - __ lock(); - __ addw($mem$$Address, $add$$constant); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ - match(Set newval (GetAndAddS mem newval)); - effect(KILL cr); - format %{ "XADDS [$mem],$newval" %} - ins_encode %{ - __ lock(); - __ xaddw($mem$$Address, $newval$$Register); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddI mem add)); - effect(KILL cr); - format %{ "ADDL [$mem],$add" %} - ins_encode %{ - __ lock(); - __ addl($mem$$Address, $add$$constant); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ - match(Set newval (GetAndAddI mem newval)); - effect(KILL cr); - format %{ "XADDL [$mem],$newval" %} - ins_encode %{ - __ lock(); - __ xaddl($mem$$Address, $newval$$Register); - %} - ins_pipe( pipe_cmpxchg ); -%} - -// Important to match to xRegI: only 8-bit regs. -instruct xchgB( memory mem, xRegI newval) %{ - match(Set newval (GetAndSetB mem newval)); - format %{ "XCHGB $newval,[$mem]" %} - ins_encode %{ - __ xchgb($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xchgS( memory mem, rRegI newval) %{ - match(Set newval (GetAndSetS mem newval)); - format %{ "XCHGW $newval,[$mem]" %} - ins_encode %{ - __ xchgw($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xchgI( memory mem, rRegI newval) %{ - match(Set newval (GetAndSetI mem newval)); - format %{ "XCHGL $newval,[$mem]" %} - ins_encode %{ - __ xchgl($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xchgP( memory mem, pRegP newval) %{ - match(Set newval (GetAndSetP mem newval)); - format %{ "XCHGL $newval,[$mem]" %} - ins_encode %{ - __ xchgl($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -//----------Subtraction Instructions------------------------------------------- - -// Integer Subtraction Instructions -instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (SubI dst src)); - effect(KILL cr); - - size(2); - format %{ "SUB $dst,$src" %} - opcode(0x2B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (SubI dst src)); - effect(KILL cr); - - format %{ "SUB $dst,$src" %} - opcode(0x81,0x05); /* Opcode 81 /5 */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (SubI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "SUB $dst,$src" %} - opcode(0x2B); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (SubI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "SUB $dst,$src" %} - opcode(0x29); /* Opcode 29 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Subtract from a pointer -instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ - match(Set dst (AddP dst (SubI zero src))); - effect(KILL cr); - - size(2); - format %{ "SUB $dst,$src" %} - opcode(0x2B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ - match(Set dst (SubI zero dst)); - effect(KILL cr); - - size(2); - format %{ "NEG $dst" %} - opcode(0xF7,0x03); // Opcode F7 /3 - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -//----------Multiplication/Division Instructions------------------------------- -// Integer Multiplication Instructions -// Multiply Register -instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (MulI dst src)); - effect(KILL cr); - - size(3); - ins_cost(300); - format %{ "IMUL $dst,$src" %} - opcode(0xAF, 0x0F); - ins_encode( OpcS, OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Multiply 32-bit Immediate -instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ - match(Set dst (MulI src imm)); - effect(KILL cr); - - ins_cost(300); - format %{ "IMUL $dst,$src,$imm" %} - opcode(0x69); /* 69 /r id */ - ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - - // Note that this is artificially increased to make it more expensive than loadConL - ins_cost(250); - format %{ "MOV EAX,$src\t// low word only" %} - opcode(0xB8); - ins_encode( LdImmL_Lo(dst, src) ); - ins_pipe( ialu_reg_fat ); -%} - -// Multiply by 32-bit Immediate, taking the shifted high order results -// (special case for shift by 32) -instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ - match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); - predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); - effect(USE src1, KILL cr); - - // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only - ins_cost(0*100 + 1*400 - 150); - format %{ "IMUL EDX:EAX,$src1" %} - ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); - ins_pipe( pipe_slow ); -%} - -// Multiply by 32-bit Immediate, taking the shifted high order results -instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); - predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); - effect(USE src1, KILL cr); - - // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only - ins_cost(1*100 + 1*400 - 150); - format %{ "IMUL EDX:EAX,$src1\n\t" - "SAR EDX,$cnt-32" %} - ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); - ins_pipe( pipe_slow ); -%} - -// Multiply Memory 32-bit Immediate -instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ - match(Set dst (MulI (LoadI src) imm)); - effect(KILL cr); - - ins_cost(300); - format %{ "IMUL $dst,$src,$imm" %} - opcode(0x69); /* 69 /r id */ - ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark ); - ins_pipe( ialu_reg_mem_alu0 ); -%} - -// Multiply Memory -instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (MulI dst (LoadI src))); - effect(KILL cr); - - ins_cost(350); - format %{ "IMUL $dst,$src" %} - opcode(0xAF, 0x0F); - ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem_alu0 ); -%} - -instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) -%{ - match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); - effect(KILL cr, KILL src2); - - expand %{ mulI_eReg(dst, src1, cr); - mulI_eReg(src2, src3, cr); - addI_eReg(dst, src2, cr); %} -%} - -// Multiply Register Int to Long -instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ - // Basic Idea: long = (long)int * (long)int - match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); - effect(DEF dst, USE src, USE src1, KILL flags); - - ins_cost(300); - format %{ "IMUL $dst,$src1" %} - - ins_encode( long_int_multiply( dst, src1 ) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ - // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) - match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); - effect(KILL flags); - - ins_cost(300); - format %{ "MUL $dst,$src1" %} - - ins_encode( long_uint_multiply(dst, src1) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Multiply Register Long -instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(4*100+3*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) - format %{ "MOV $tmp,$src.lo\n\t" - "IMUL $tmp,EDX\n\t" - "MOV EDX,$src.hi\n\t" - "IMUL EDX,EAX\n\t" - "ADD $tmp,EDX\n\t" - "MUL EDX:EAX,$src.lo\n\t" - "ADD EDX,$tmp" %} - ins_encode( long_multiply( dst, src, tmp ) ); - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long where the left operand's high 32 bits are zero -instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - predicate(is_operand_hi32_zero(n->in(1))); - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(2*100+2*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 - format %{ "MOV $tmp,$src.hi\n\t" - "IMUL $tmp,EAX\n\t" - "MUL EDX:EAX,$src.lo\n\t" - "ADD EDX,$tmp" %} - ins_encode %{ - __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); - __ imull($tmp$$Register, rax); - __ mull($src$$Register); - __ addl(rdx, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long where the right operand's high 32 bits are zero -instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - predicate(is_operand_hi32_zero(n->in(2))); - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(2*100+2*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 - format %{ "MOV $tmp,$src.lo\n\t" - "IMUL $tmp,EDX\n\t" - "MUL EDX:EAX,$src.lo\n\t" - "ADD EDX,$tmp" %} - ins_encode %{ - __ movl($tmp$$Register, $src$$Register); - __ imull($tmp$$Register, rdx); - __ mull($src$$Register); - __ addl(rdx, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long where the left and the right operands' high 32 bits are zero -instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ - predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); - match(Set dst (MulL dst src)); - effect(KILL cr); - ins_cost(1*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 - format %{ "MUL EDX:EAX,$src.lo\n\t" %} - ins_encode %{ - __ mull($src$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long by small constant -instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(2*100+2*400); - size(12); -// Basic idea: lo(result) = lo(src * EAX) -// hi(result) = hi(src * EAX) + lo(src * EDX) - format %{ "IMUL $tmp,EDX,$src\n\t" - "MOV EDX,$src\n\t" - "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" - "ADD EDX,$tmp" %} - ins_encode( long_multiply_con( dst, src, tmp ) ); - ins_pipe( pipe_slow ); -%} - -// Integer DIV with Register -instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ - match(Set rax (DivI rax div)); - effect(KILL rdx, KILL cr); - size(26); - ins_cost(30*100+10*100); - format %{ "CMP EAX,0x80000000\n\t" - "JNE,s normal\n\t" - "XOR EDX,EDX\n\t" - "CMP ECX,-1\n\t" - "JE,s done\n" - "normal: CDQ\n\t" - "IDIV $div\n\t" - "done:" %} - opcode(0xF7, 0x7); /* Opcode F7 /7 */ - ins_encode( cdq_enc, OpcP, RegOpc(div) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Divide Register Long -instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ - match(Set dst (DivL src1 src2)); - effect(CALL); - ins_cost(10000); - format %{ "PUSH $src1.hi\n\t" - "PUSH $src1.lo\n\t" - "PUSH $src2.hi\n\t" - "PUSH $src2.lo\n\t" - "CALL SharedRuntime::ldiv\n\t" - "ADD ESP,16" %} - ins_encode( long_div(src1,src2) ); - ins_pipe( pipe_slow ); -%} - -// Integer DIVMOD with Register, both quotient and mod results -instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ - match(DivModI rax div); - effect(KILL cr); - size(26); - ins_cost(30*100+10*100); - format %{ "CMP EAX,0x80000000\n\t" - "JNE,s normal\n\t" - "XOR EDX,EDX\n\t" - "CMP ECX,-1\n\t" - "JE,s done\n" - "normal: CDQ\n\t" - "IDIV $div\n\t" - "done:" %} - opcode(0xF7, 0x7); /* Opcode F7 /7 */ - ins_encode( cdq_enc, OpcP, RegOpc(div) ); - ins_pipe( pipe_slow ); -%} - -// Integer MOD with Register -instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ - match(Set rdx (ModI rax div)); - effect(KILL rax, KILL cr); - - size(26); - ins_cost(300); - format %{ "CDQ\n\t" - "IDIV $div" %} - opcode(0xF7, 0x7); /* Opcode F7 /7 */ - ins_encode( cdq_enc, OpcP, RegOpc(div) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Remainder Register Long -instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ - match(Set dst (ModL src1 src2)); - effect(CALL); - ins_cost(10000); - format %{ "PUSH $src1.hi\n\t" - "PUSH $src1.lo\n\t" - "PUSH $src2.hi\n\t" - "PUSH $src2.lo\n\t" - "CALL SharedRuntime::lrem\n\t" - "ADD ESP,16" %} - ins_encode( long_mod(src1,src2) ); - ins_pipe( pipe_slow ); -%} - -// Divide Register Long (no special case since divisor != -1) -instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ - match(Set dst (DivL dst imm)); - effect( TEMP tmp, TEMP tmp2, KILL cr ); - ins_cost(1000); - format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" - "XOR $tmp2,$tmp2\n\t" - "CMP $tmp,EDX\n\t" - "JA,s fast\n\t" - "MOV $tmp2,EAX\n\t" - "MOV EAX,EDX\n\t" - "MOV EDX,0\n\t" - "JLE,s pos\n\t" - "LNEG EAX : $tmp2\n\t" - "DIV $tmp # unsigned division\n\t" - "XCHG EAX,$tmp2\n\t" - "DIV $tmp\n\t" - "LNEG $tmp2 : EAX\n\t" - "JMP,s done\n" - "pos:\n\t" - "DIV $tmp\n\t" - "XCHG EAX,$tmp2\n" - "fast:\n\t" - "DIV $tmp\n" - "done:\n\t" - "MOV EDX,$tmp2\n\t" - "NEG EDX:EAX # if $imm < 0" %} - ins_encode %{ - int con = (int)$imm$$constant; - assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); - int pcon = (con > 0) ? con : -con; - Label Lfast, Lpos, Ldone; - - __ movl($tmp$$Register, pcon); - __ xorl($tmp2$$Register,$tmp2$$Register); - __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); - __ jccb(Assembler::above, Lfast); // result fits into 32 bit - - __ movl($tmp2$$Register, $dst$$Register); // save - __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); - __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags - __ jccb(Assembler::lessEqual, Lpos); // result is positive - - // Negative dividend. - // convert value to positive to use unsigned division - __ lneg($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - __ xchgl($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - // revert result back to negative - __ lneg($tmp2$$Register, $dst$$Register); - __ jmpb(Ldone); - - __ bind(Lpos); - __ divl($tmp$$Register); // Use unsigned division - __ xchgl($dst$$Register, $tmp2$$Register); - // Fallthrow for final divide, tmp2 has 32 bit hi result - - __ bind(Lfast); - // fast path: src is positive - __ divl($tmp$$Register); // Use unsigned division - - __ bind(Ldone); - __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); - if (con < 0) { - __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); - } - %} - ins_pipe( pipe_slow ); -%} - -// Remainder Register Long (remainder fit into 32 bits) -instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ - match(Set dst (ModL dst imm)); - effect( TEMP tmp, TEMP tmp2, KILL cr ); - ins_cost(1000); - format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" - "CMP $tmp,EDX\n\t" - "JA,s fast\n\t" - "MOV $tmp2,EAX\n\t" - "MOV EAX,EDX\n\t" - "MOV EDX,0\n\t" - "JLE,s pos\n\t" - "LNEG EAX : $tmp2\n\t" - "DIV $tmp # unsigned division\n\t" - "MOV EAX,$tmp2\n\t" - "DIV $tmp\n\t" - "NEG EDX\n\t" - "JMP,s done\n" - "pos:\n\t" - "DIV $tmp\n\t" - "MOV EAX,$tmp2\n" - "fast:\n\t" - "DIV $tmp\n" - "done:\n\t" - "MOV EAX,EDX\n\t" - "SAR EDX,31\n\t" %} - ins_encode %{ - int con = (int)$imm$$constant; - assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); - int pcon = (con > 0) ? con : -con; - Label Lfast, Lpos, Ldone; - - __ movl($tmp$$Register, pcon); - __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); - __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit - - __ movl($tmp2$$Register, $dst$$Register); // save - __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); - __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags - __ jccb(Assembler::lessEqual, Lpos); // result is positive - - // Negative dividend. - // convert value to positive to use unsigned division - __ lneg($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - __ movl($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - // revert remainder back to negative - __ negl(HIGH_FROM_LOW($dst$$Register)); - __ jmpb(Ldone); - - __ bind(Lpos); - __ divl($tmp$$Register); - __ movl($dst$$Register, $tmp2$$Register); - - __ bind(Lfast); - // fast path: src is positive - __ divl($tmp$$Register); - - __ bind(Ldone); - __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); - __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign - - %} - ins_pipe( pipe_slow ); -%} - -// Integer Shift Instructions -// Shift Left by one -instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (LShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHL $dst,$shift" %} - opcode(0xD1, 0x4); /* D1 /4 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -// Shift Left by 8-bit immediate -instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (LShiftI dst shift)); - effect(KILL cr); - - size(3); - format %{ "SHL $dst,$shift" %} - opcode(0xC1, 0x4); /* C1 /4 ib */ - ins_encode( RegOpcImm( dst, shift) ); - ins_pipe( ialu_reg ); -%} - -// Shift Left by variable -instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (LShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHL $dst,$shift" %} - opcode(0xD3, 0x4); /* D3 /4 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg_reg ); -%} - -// Arithmetic shift right by one -instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (RShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SAR $dst,$shift" %} - opcode(0xD1, 0x7); /* D1 /7 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -// Arithmetic shift right by one -instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); - effect(KILL cr); - format %{ "SAR $dst,$shift" %} - opcode(0xD1, 0x7); /* D1 /7 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// Arithmetic Shift Right by 8-bit immediate -instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (RShiftI dst shift)); - effect(KILL cr); - - size(3); - format %{ "SAR $dst,$shift" %} - opcode(0xC1, 0x7); /* C1 /7 ib */ - ins_encode( RegOpcImm( dst, shift ) ); - ins_pipe( ialu_mem_imm ); -%} - -// Arithmetic Shift Right by 8-bit immediate -instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); - effect(KILL cr); - - format %{ "SAR $dst,$shift" %} - opcode(0xC1, 0x7); /* C1 /7 ib */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// Arithmetic Shift Right by variable -instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (RShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SAR $dst,$shift" %} - opcode(0xD3, 0x7); /* D3 /7 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg_reg ); -%} - -// Logical shift right by one -instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (URShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHR $dst,$shift" %} - opcode(0xD1, 0x5); /* D1 /5 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -// Logical Shift Right by 8-bit immediate -instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (URShiftI dst shift)); - effect(KILL cr); - - size(3); - format %{ "SHR $dst,$shift" %} - opcode(0xC1, 0x5); /* C1 /5 ib */ - ins_encode( RegOpcImm( dst, shift) ); - ins_pipe( ialu_reg ); -%} - - -// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. -// This idiom is used by the compiler for the i2b bytecode. -instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); - - size(3); - format %{ "MOVSX $dst,$src :8" %} - ins_encode %{ - __ movsbl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. -// This idiom is used by the compiler the i2s bytecode. -instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ - match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); - - size(3); - format %{ "MOVSX $dst,$src :16" %} - ins_encode %{ - __ movswl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - - -// Logical Shift Right by variable -instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (URShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHR $dst,$shift" %} - opcode(0xD3, 0x5); /* D3 /5 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg_reg ); -%} - - -//----------Logical Instructions----------------------------------------------- -//----------Integer Logical Instructions--------------------------------------- -// And Instructions -// And Register with Register -instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (AndI dst src)); - effect(KILL cr); - - size(2); - format %{ "AND $dst,$src" %} - opcode(0x23); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -// And Register with Immediate -instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (AndI dst src)); - effect(KILL cr); - - format %{ "AND $dst,$src" %} - opcode(0x81,0x04); /* Opcode 81 /4 */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -// And Register with Memory -instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (AndI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "AND $dst,$src" %} - opcode(0x23); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -// And Memory with Register -instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AndI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "AND $dst,$src" %} - opcode(0x21); /* Opcode 21 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// And Memory with Immediate -instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AndI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "AND $dst,$src" %} - opcode(0x81, 0x4); /* Opcode 81 /4 id */ - // ins_encode( MemImm( dst, src) ); - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// BMI1 instructions -instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndI (XorI src1 minus_1) src2)); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "ANDNL $dst, $src1, $src2" %} - - ins_encode %{ - __ andnl($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "ANDNL $dst, $src1, $src2" %} - - ins_encode %{ - __ andnl($dst$$Register, $src1$$Register, $src2$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndI (SubI imm_zero src) src)); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "BLSIL $dst, $src" %} - - ins_encode %{ - __ blsil($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "BLSIL $dst, $src" %} - - ins_encode %{ - __ blsil($dst$$Register, $src$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorI (AddI src minus_1) src)); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "BLSMSKL $dst, $src" %} - - ins_encode %{ - __ blsmskl($dst$$Register, $src$$Register); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "BLSMSKL $dst, $src" %} - - ins_encode %{ - __ blsmskl($dst$$Register, $src$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndI (AddI src minus_1) src) ); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "BLSRL $dst, $src" %} - - ins_encode %{ - __ blsrl($dst$$Register, $src$$Register); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "BLSRL $dst, $src" %} - - ins_encode %{ - __ blsrl($dst$$Register, $src$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Or Instructions -// Or Register with Register -instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (OrI dst src)); - effect(KILL cr); - - size(2); - format %{ "OR $dst,$src" %} - opcode(0x0B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ - match(Set dst (OrI dst (CastP2X src))); - effect(KILL cr); - - size(2); - format %{ "OR $dst,$src" %} - opcode(0x0B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - - -// Or Register with Immediate -instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (OrI dst src)); - effect(KILL cr); - - format %{ "OR $dst,$src" %} - opcode(0x81,0x01); /* Opcode 81 /1 id */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -// Or Register with Memory -instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (OrI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "OR $dst,$src" %} - opcode(0x0B); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -// Or Memory with Register -instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (OrI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "OR $dst,$src" %} - opcode(0x09); /* Opcode 09 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Or Memory with Immediate -instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (OrI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "OR $dst,$src" %} - opcode(0x81,0x1); /* Opcode 81 /1 id */ - // ins_encode( MemImm( dst, src) ); - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// ROL/ROR -// ROL expand -instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROL $dst, $shift" %} - opcode(0xD1, 0x0); /* Opcode D1 /0 */ - ins_encode( OpcP, RegOpc( dst )); - ins_pipe( ialu_reg ); -%} - -instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROL $dst, $shift" %} - opcode(0xC1, 0x0); /*Opcode /C1 /0 */ - ins_encode( RegOpcImm(dst, shift) ); - ins_pipe(ialu_reg); -%} - -instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROL $dst, $shift" %} - opcode(0xD3, 0x0); /* Opcode D3 /0 */ - ins_encode(OpcP, RegOpc(dst)); - ins_pipe( ialu_reg_reg ); -%} -// end of ROL expand - -// ROL 32bit by one once -instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ - match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); - - expand %{ - rolI_eReg_imm1(dst, lshift, cr); - %} -%} - -// ROL 32bit var by imm8 once -instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ - predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); - match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); - - expand %{ - rolI_eReg_imm8(dst, lshift, cr); - %} -%} - -// ROL 32bit var by var once -instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ - match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); - - expand %{ - rolI_eReg_CL(dst, shift, cr); - %} -%} - -// ROL 32bit var by var once -instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ - match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); - - expand %{ - rolI_eReg_CL(dst, shift, cr); - %} -%} - -// ROR expand -instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROR $dst, $shift" %} - opcode(0xD1,0x1); /* Opcode D1 /1 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ - effect (USE_DEF dst, USE shift, KILL cr); - - format %{ "ROR $dst, $shift" %} - opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ - ins_encode( RegOpcImm(dst, shift) ); - ins_pipe( ialu_reg ); -%} - -instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROR $dst, $shift" %} - opcode(0xD3, 0x1); /* Opcode D3 /1 */ - ins_encode(OpcP, RegOpc(dst)); - ins_pipe( ialu_reg_reg ); -%} -// end of ROR expand - -// ROR right once -instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ - match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); - - expand %{ - rorI_eReg_imm1(dst, rshift, cr); - %} -%} - -// ROR 32bit by immI8 once -instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ - predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); - match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); - - expand %{ - rorI_eReg_imm8(dst, rshift, cr); - %} -%} - -// ROR 32bit var by var once -instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ - match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); - - expand %{ - rorI_eReg_CL(dst, shift, cr); - %} -%} - -// ROR 32bit var by var once -instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ - match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); - - expand %{ - rorI_eReg_CL(dst, shift, cr); - %} -%} - -// Xor Instructions -// Xor Register with Register -instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (XorI dst src)); - effect(KILL cr); - - size(2); - format %{ "XOR $dst,$src" %} - opcode(0x33); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -// Xor Register with Immediate -1 -instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ - match(Set dst (XorI dst imm)); - - size(2); - format %{ "NOT $dst" %} - ins_encode %{ - __ notl($dst$$Register); - %} - ins_pipe( ialu_reg ); -%} - -// Xor Register with Immediate -instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (XorI dst src)); - effect(KILL cr); - - format %{ "XOR $dst,$src" %} - opcode(0x81,0x06); /* Opcode 81 /6 id */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -// Xor Register with Memory -instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (XorI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "XOR $dst,$src" %} - opcode(0x33); - ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -// Xor Memory with Register -instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (XorI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "XOR $dst,$src" %} - opcode(0x31); /* Opcode 31 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Xor Memory with Immediate -instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (XorI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "XOR $dst,$src" %} - opcode(0x81,0x6); /* Opcode 81 /6 id */ - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -//----------Convert Int to Boolean--------------------------------------------- - -instruct movI_nocopy(rRegI dst, rRegI src) %{ - effect( DEF dst, USE src ); - format %{ "MOV $dst,$src" %} - ins_encode( enc_Copy( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ - effect( USE_DEF dst, USE src, KILL cr ); - - size(4); - format %{ "NEG $dst\n\t" - "ADC $dst,$src" %} - ins_encode( neg_reg(dst), - OpcRegReg(0x13,dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ - match(Set dst (Conv2B src)); - - expand %{ - movI_nocopy(dst,src); - ci2b(dst,src,cr); - %} -%} - -instruct movP_nocopy(rRegI dst, eRegP src) %{ - effect( DEF dst, USE src ); - format %{ "MOV $dst,$src" %} - ins_encode( enc_Copy( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ - effect( USE_DEF dst, USE src, KILL cr ); - format %{ "NEG $dst\n\t" - "ADC $dst,$src" %} - ins_encode( neg_reg(dst), - OpcRegReg(0x13,dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ - match(Set dst (Conv2B src)); - - expand %{ - movP_nocopy(dst,src); - cp2b(dst,src,cr); - %} -%} - -instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ - match(Set dst (CmpLTMask p q)); - effect(KILL cr); - ins_cost(400); - - // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination - format %{ "XOR $dst,$dst\n\t" - "CMP $p,$q\n\t" - "SETlt $dst\n\t" - "NEG $dst" %} - ins_encode %{ - Register Rp = $p$$Register; - Register Rq = $q$$Register; - Register Rd = $dst$$Register; - Label done; - __ xorl(Rd, Rd); - __ cmpl(Rp, Rq); - __ setb(Assembler::less, Rd); - __ negl(Rd); - %} - - ins_pipe(pipe_slow); -%} - -instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ - match(Set dst (CmpLTMask dst zero)); - effect(DEF dst, KILL cr); - ins_cost(100); - - format %{ "SAR $dst,31\t# cmpLTMask0" %} - ins_encode %{ - __ sarl($dst$$Register, 31); - %} - ins_pipe(ialu_reg); -%} - -/* better to save a register than avoid a branch */ -instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ - match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); - effect(KILL cr); - ins_cost(400); - format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" - "JGE done\n\t" - "ADD $p,$y\n" - "done: " %} - ins_encode %{ - Register Rp = $p$$Register; - Register Rq = $q$$Register; - Register Ry = $y$$Register; - Label done; - __ subl(Rp, Rq); - __ jccb(Assembler::greaterEqual, done); - __ addl(Rp, Ry); - __ bind(done); - %} - - ins_pipe(pipe_cmplt); -%} - -/* better to save a register than avoid a branch */ -instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ - match(Set y (AndI (CmpLTMask p q) y)); - effect(KILL cr); - - ins_cost(300); - - format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" - "JLT done\n\t" - "XORL $y, $y\n" - "done: " %} - ins_encode %{ - Register Rp = $p$$Register; - Register Rq = $q$$Register; - Register Ry = $y$$Register; - Label done; - __ cmpl(Rp, Rq); - __ jccb(Assembler::less, done); - __ xorl(Ry, Ry); - __ bind(done); - %} - - ins_pipe(pipe_cmplt); -%} - -/* If I enable this, I encourage spilling in the inner loop of compress. -instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ - match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); -*/ -//----------Overflow Math Instructions----------------------------------------- - -instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) -%{ - match(Set cr (OverflowAddI op1 op2)); - effect(DEF cr, USE_KILL op1, USE op2); - - format %{ "ADD $op1, $op2\t# overflow check int" %} - - ins_encode %{ - __ addl($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) -%{ - match(Set cr (OverflowAddI op1 op2)); - effect(DEF cr, USE_KILL op1, USE op2); - - format %{ "ADD $op1, $op2\t# overflow check int" %} - - ins_encode %{ - __ addl($op1$$Register, $op2$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) -%{ - match(Set cr (OverflowSubI op1 op2)); - - format %{ "CMP $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ cmpl($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) -%{ - match(Set cr (OverflowSubI op1 op2)); - - format %{ "CMP $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ cmpl($op1$$Register, $op2$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) -%{ - match(Set cr (OverflowSubI zero op2)); - effect(DEF cr, USE_KILL op2); - - format %{ "NEG $op2\t# overflow check int" %} - ins_encode %{ - __ negl($op2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) -%{ - match(Set cr (OverflowMulI op1 op2)); - effect(DEF cr, USE_KILL op1, USE op2); - - format %{ "IMUL $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ imull($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) -%{ - match(Set cr (OverflowMulI op1 op2)); - effect(DEF cr, TEMP tmp, USE op1, USE op2); - - format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ imull($tmp$$Register, $op1$$Register, $op2$$constant); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -// Integer Absolute Instructions -instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) -%{ - match(Set dst (AbsI src)); - effect(TEMP dst, TEMP tmp, KILL cr); - format %{ "movl $tmp, $src\n\t" - "sarl $tmp, 31\n\t" - "movl $dst, $src\n\t" - "xorl $dst, $tmp\n\t" - "subl $dst, $tmp\n" - %} - ins_encode %{ - __ movl($tmp$$Register, $src$$Register); - __ sarl($tmp$$Register, 31); - __ movl($dst$$Register, $src$$Register); - __ xorl($dst$$Register, $tmp$$Register); - __ subl($dst$$Register, $tmp$$Register); - %} - - ins_pipe(ialu_reg_reg); -%} - -//----------Long Instructions------------------------------------------------ -// Add Long Register with Register -instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (AddL dst src)); - effect(KILL cr); - ins_cost(200); - format %{ "ADD $dst.lo,$src.lo\n\t" - "ADC $dst.hi,$src.hi" %} - opcode(0x03, 0x13); - ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Add Long Register with Immediate -instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (AddL dst src)); - effect(KILL cr); - format %{ "ADD $dst.lo,$src.lo\n\t" - "ADC $dst.hi,$src.hi" %} - opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Add Long Register with Memory -instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (AddL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "ADD $dst.lo,$mem\n\t" - "ADC $dst.hi,$mem+4" %} - opcode(0x03, 0x13); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// Subtract Long Register with Register. -instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (SubL dst src)); - effect(KILL cr); - ins_cost(200); - format %{ "SUB $dst.lo,$src.lo\n\t" - "SBB $dst.hi,$src.hi" %} - opcode(0x2B, 0x1B); - ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Subtract Long Register with Immediate -instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (SubL dst src)); - effect(KILL cr); - format %{ "SUB $dst.lo,$src.lo\n\t" - "SBB $dst.hi,$src.hi" %} - opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Subtract Long Register with Memory -instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (SubL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "SUB $dst.lo,$mem\n\t" - "SBB $dst.hi,$mem+4" %} - opcode(0x2B, 0x1B); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ - match(Set dst (SubL zero dst)); - effect(KILL cr); - ins_cost(300); - format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} - ins_encode( neg_long(dst) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// And Long Register with Register -instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (AndL dst src)); - effect(KILL cr); - format %{ "AND $dst.lo,$src.lo\n\t" - "AND $dst.hi,$src.hi" %} - opcode(0x23,0x23); - ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// And Long Register with Immediate -instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (AndL dst src)); - effect(KILL cr); - format %{ "AND $dst.lo,$src.lo\n\t" - "AND $dst.hi,$src.hi" %} - opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// And Long Register with Memory -instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (AndL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "AND $dst.lo,$mem\n\t" - "AND $dst.hi,$mem+4" %} - opcode(0x23, 0x23); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// BMI1 instructions -instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndL (XorL src1 minus_1) src2)); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" - "ANDNL $dst.hi, $src1.hi, $src2.hi" - %} - - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc1 = $src1$$Register; - Register Rsrc2 = $src2$$Register; - __ andnl(Rdst, Rsrc1, Rsrc2); - __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); - %} - ins_pipe(ialu_reg_reg_long); -%} - -instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" - "ANDNL $dst.hi, $src1.hi, $src2+4" - %} - - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc1 = $src1$$Register; - Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); - - __ andnl(Rdst, Rsrc1, $src2$$Address); - __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndL (SubL imm_zero src) src)); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "MOVL $dst.hi, 0\n\t" - "BLSIL $dst.lo, $src.lo\n\t" - "JNZ done\n\t" - "BLSIL $dst.hi, $src.hi\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsil(Rdst, Rsrc); - __ jccb(Assembler::notZero, done); - __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "MOVL $dst.hi, 0\n\t" - "BLSIL $dst.lo, $src\n\t" - "JNZ done\n\t" - "BLSIL $dst.hi, $src+4\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); - - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsil(Rdst, $src$$Address); - __ jccb(Assembler::notZero, done); - __ blsil(HIGH_FROM_LOW(Rdst), src_hi); - __ bind(done); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorL (AddL src minus_1) src)); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "MOVL $dst.hi, 0\n\t" - "BLSMSKL $dst.lo, $src.lo\n\t" - "JNC done\n\t" - "BLSMSKL $dst.hi, $src.hi\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsmskl(Rdst, Rsrc); - __ jccb(Assembler::carryClear, done); - __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ bind(done); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "MOVL $dst.hi, 0\n\t" - "BLSMSKL $dst.lo, $src\n\t" - "JNC done\n\t" - "BLSMSKL $dst.hi, $src+4\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); - - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsmskl(Rdst, $src$$Address); - __ jccb(Assembler::carryClear, done); - __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); - __ bind(done); - %} - - ins_pipe(ialu_reg_mem); -%} - -instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndL (AddL src minus_1) src) ); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "MOVL $dst.hi, $src.hi\n\t" - "BLSRL $dst.lo, $src.lo\n\t" - "JNC done\n\t" - "BLSRL $dst.hi, $src.hi\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ blsrl(Rdst, Rsrc); - __ jccb(Assembler::carryClear, done); - __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ bind(done); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "MOVL $dst.hi, $src+4\n\t" - "BLSRL $dst.lo, $src\n\t" - "JNC done\n\t" - "BLSRL $dst.hi, $src+4\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); - __ movl(HIGH_FROM_LOW(Rdst), src_hi); - __ blsrl(Rdst, $src$$Address); - __ jccb(Assembler::carryClear, done); - __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); - __ bind(done); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Or Long Register with Register -instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (OrL dst src)); - effect(KILL cr); - format %{ "OR $dst.lo,$src.lo\n\t" - "OR $dst.hi,$src.hi" %} - opcode(0x0B,0x0B); - ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Or Long Register with Immediate -instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (OrL dst src)); - effect(KILL cr); - format %{ "OR $dst.lo,$src.lo\n\t" - "OR $dst.hi,$src.hi" %} - opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Or Long Register with Memory -instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (OrL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "OR $dst.lo,$mem\n\t" - "OR $dst.hi,$mem+4" %} - opcode(0x0B,0x0B); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// Xor Long Register with Register -instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (XorL dst src)); - effect(KILL cr); - format %{ "XOR $dst.lo,$src.lo\n\t" - "XOR $dst.hi,$src.hi" %} - opcode(0x33,0x33); - ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Xor Long Register with Immediate -1 -instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ - match(Set dst (XorL dst imm)); - format %{ "NOT $dst.lo\n\t" - "NOT $dst.hi" %} - ins_encode %{ - __ notl($dst$$Register); - __ notl(HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Xor Long Register with Immediate -instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (XorL dst src)); - effect(KILL cr); - format %{ "XOR $dst.lo,$src.lo\n\t" - "XOR $dst.hi,$src.hi" %} - opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Xor Long Register with Memory -instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (XorL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "XOR $dst.lo,$mem\n\t" - "XOR $dst.hi,$mem+4" %} - opcode(0x33,0x33); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// Shift Left Long by 1 -instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ - predicate(UseNewLongLShift); - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(100); - format %{ "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi" %} - ins_encode %{ - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 2 -instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ - predicate(UseNewLongLShift); - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(100); - format %{ "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi\n\t" - "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi" %} - ins_encode %{ - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 3 -instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ - predicate(UseNewLongLShift); - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(100); - format %{ "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi\n\t" - "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi\n\t" - "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi" %} - ins_encode %{ - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 1-31 -instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(200); - format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" - "SHL $dst.lo,$cnt" %} - opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ - ins_encode( move_long_small_shift(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 32-63 -instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(300); - format %{ "MOV $dst.hi,$dst.lo\n" - "\tSHL $dst.hi,$cnt-32\n" - "\tXOR $dst.lo,$dst.lo" %} - opcode(0xC1, 0x4); /* C1 /4 ib */ - ins_encode( move_long_big_shift_clr(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by variable -instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (LShiftL dst shift)); - effect(KILL cr); - ins_cost(500+200); - size(17); - format %{ "TEST $shift,32\n\t" - "JEQ,s small\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "XOR $dst.lo,$dst.lo\n" - "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" - "SHL $dst.lo,$shift" %} - ins_encode( shift_left_long( dst, shift ) ); - ins_pipe( pipe_slow ); -%} - -// Shift Right Long by 1-31 -instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ - match(Set dst (URShiftL dst cnt)); - effect(KILL cr); - ins_cost(200); - format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" - "SHR $dst.hi,$cnt" %} - opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ - ins_encode( move_long_small_shift(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right Long by 32-63 -instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (URShiftL dst cnt)); - effect(KILL cr); - ins_cost(300); - format %{ "MOV $dst.lo,$dst.hi\n" - "\tSHR $dst.lo,$cnt-32\n" - "\tXOR $dst.hi,$dst.hi" %} - opcode(0xC1, 0x5); /* C1 /5 ib */ - ins_encode( move_long_big_shift_clr(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right Long by variable -instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (URShiftL dst shift)); - effect(KILL cr); - ins_cost(600); - size(17); - format %{ "TEST $shift,32\n\t" - "JEQ,s small\n\t" - "MOV $dst.lo,$dst.hi\n\t" - "XOR $dst.hi,$dst.hi\n" - "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" - "SHR $dst.hi,$shift" %} - ins_encode( shift_right_long( dst, shift ) ); - ins_pipe( pipe_slow ); -%} - -// Shift Right Long by 1-31 -instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ - match(Set dst (RShiftL dst cnt)); - effect(KILL cr); - ins_cost(200); - format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" - "SAR $dst.hi,$cnt" %} - opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ - ins_encode( move_long_small_shift(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right Long by 32-63 -instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (RShiftL dst cnt)); - effect(KILL cr); - ins_cost(300); - format %{ "MOV $dst.lo,$dst.hi\n" - "\tSAR $dst.lo,$cnt-32\n" - "\tSAR $dst.hi,31" %} - opcode(0xC1, 0x7); /* C1 /7 ib */ - ins_encode( move_long_big_shift_sign(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right arithmetic Long by variable -instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (RShiftL dst shift)); - effect(KILL cr); - ins_cost(600); - size(18); - format %{ "TEST $shift,32\n\t" - "JEQ,s small\n\t" - "MOV $dst.lo,$dst.hi\n\t" - "SAR $dst.hi,31\n" - "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" - "SAR $dst.hi,$shift" %} - ins_encode( shift_right_arith_long( dst, shift ) ); - ins_pipe( pipe_slow ); -%} - - -//----------Double Instructions------------------------------------------------ -// Double Math - -// Compare & branch - -// P6 version of float compare, sets condition codes in EFLAGS -instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ - predicate(VM_Version::supports_cmov() && UseSSE <=1); - match(Set cr (CmpD src1 src2)); - effect(KILL rax); - ins_cost(150); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction\n\t" - "JNP exit\n\t" - "MOV ah,1 // saw a NaN, set CF\n\t" - "SAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - cmpF_P6_fixup ); - ins_pipe( pipe_slow ); -%} - -instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ - predicate(VM_Version::supports_cmov() && UseSSE <=1); - match(Set cr (CmpD src1 src2)); - ins_cost(150); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2)); - ins_pipe( pipe_slow ); -%} - -// Compare & branch -instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ - predicate(UseSSE<=1); - match(Set cr (CmpD src1 src2)); - effect(KILL rax); - ins_cost(200); - format %{ "FLD $src1\n\t" - "FCOMp $src2\n\t" - "FNSTSW AX\n\t" - "TEST AX,0x400\n\t" - "JZ,s flags\n\t" - "MOV AH,1\t# unordered treat as LT\n" - "flags:\tSAHF" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - fpu_flags); - ins_pipe( pipe_slow ); -%} - -// Compare vs zero into -1,0,1 -instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE<=1); - match(Set dst (CmpD3 src1 zero)); - effect(KILL cr, KILL rax); - ins_cost(280); - format %{ "FTSTD $dst,$src1" %} - opcode(0xE4, 0xD9); - ins_encode( Push_Reg_DPR(src1), - OpcS, OpcP, PopFPU, - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 -instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE<=1); - match(Set dst (CmpD3 src1 src2)); - effect(KILL cr, KILL rax); - ins_cost(300); - format %{ "FCMPD $dst,$src1,$src2" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 src2)); - ins_cost(145); - format %{ "UCOMISD $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 src2)); - ins_cost(100); - format %{ "UCOMISD $src1,$src2" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 (LoadD src2))); - ins_cost(145); - format %{ "UCOMISD $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$Address); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 (LoadD src2))); - ins_cost(100); - format %{ "UCOMISD $src1,$src2" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM -instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ - predicate(UseSSE>=2); - match(Set dst (CmpD3 src1 src2)); - effect(KILL cr); - ins_cost(255); - format %{ "UCOMISD $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM and memory -instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ - predicate(UseSSE>=2); - match(Set dst (CmpD3 src1 (LoadD src2))); - effect(KILL cr); - ins_cost(275); - format %{ "UCOMISD $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$Address); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - - -instruct subDPR_reg(regDPR dst, regDPR src) %{ - predicate (UseSSE <=1); - match(Set dst (SubD dst src)); - - format %{ "FLD $src\n\t" - "DSUBp $dst,ST" %} - opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ - ins_cost(150); - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ - predicate (UseSSE <=1); - match(Set dst (RoundDouble (SubD src1 src2))); - ins_cost(250); - - format %{ "FLD $src2\n\t" - "DSUB ST,$src1\n\t" - "FSTP_D $dst\t# D-round" %} - opcode(0xD8, 0x5); - ins_encode( Push_Reg_DPR(src2), - OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} - - -instruct subDPR_reg_mem(regDPR dst, memory src) %{ - predicate (UseSSE <=1); - match(Set dst (SubD dst (LoadD src))); - ins_cost(150); - - format %{ "FLD $src\n\t" - "DSUBp $dst,ST" %} - opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate (UseSSE<=1); - match(Set dst (AbsD src)); - ins_cost(100); - format %{ "FABS" %} - opcode(0xE1, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set dst (NegD src)); - ins_cost(100); - format %{ "FCHS" %} - opcode(0xE0, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -instruct addDPR_reg(regDPR dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (AddD dst src)); - format %{ "FLD $src\n\t" - "DADD $dst,ST" %} - size(4); - ins_cost(150); - opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - - -instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ - predicate(UseSSE<=1); - match(Set dst (RoundDouble (AddD src1 src2))); - ins_cost(250); - - format %{ "FLD $src2\n\t" - "DADD ST,$src1\n\t" - "FSTP_D $dst\t# D-round" %} - opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ - ins_encode( Push_Reg_DPR(src2), - OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} - - -instruct addDPR_reg_mem(regDPR dst, memory src) %{ - predicate(UseSSE<=1); - match(Set dst (AddD dst (LoadD src))); - ins_cost(150); - - format %{ "FLD $src\n\t" - "DADDp $dst,ST" %} - opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// add-to-memory -instruct addDPR_mem_reg(memory dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); - ins_cost(150); - - format %{ "FLD_D $dst\n\t" - "DADD ST,$src\n\t" - "FST_D $dst" %} - opcode(0xDD, 0x0); - ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst), - Opcode(0xD8), RegOpc(src), ClearInstMark, - SetInstMark, - Opcode(0xDD), RMopc_Mem(0x03,dst), - ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ - predicate(UseSSE<=1); - match(Set dst (AddD dst con)); - ins_cost(125); - format %{ "FLD1\n\t" - "DADDp $dst,ST" %} - ins_encode %{ - __ fld1(); - __ faddp($dst$$reg); - %} - ins_pipe(fpu_reg); -%} - -instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ - predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); - match(Set dst (AddD dst con)); - ins_cost(200); - format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" - "DADDp $dst,ST" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ faddp($dst$$reg); - %} - ins_pipe(fpu_reg_mem); -%} - -instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ - predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); - match(Set dst (RoundDouble (AddD src con))); - ins_cost(200); - format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" - "DADD ST,$src\n\t" - "FSTP_D $dst\t# D-round" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ fadd($src$$reg); - __ fstp_d(Address(rsp, $dst$$disp)); - %} - ins_pipe(fpu_mem_reg_con); -%} - -instruct mulDPR_reg(regDPR dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (MulD dst src)); - format %{ "FLD $src\n\t" - "DMULp $dst,ST" %} - opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ - ins_cost(150); - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -// Strict FP instruction biases argument before multiply then -// biases result to avoid double rounding of subnormals. -// -// scale arg1 by multiplying arg1 by 2^(-15360) -// load arg2 -// multiply scaled arg1 by arg2 -// rescale product by 2^(15360) -// -instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ - predicate( UseSSE<=1 && Compile::current()->has_method() ); - match(Set dst (MulD dst src)); - ins_cost(1); // Select this instruction for all FP double multiplies - - format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" - "DMULp $dst,ST\n\t" - "FLD $src\n\t" - "DMULp $dst,ST\n\t" - "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" - "DMULp $dst,ST\n\t" %} - opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ - ins_encode( strictfp_bias1(dst), - Push_Reg_DPR(src), - OpcP, RegOpc(dst), - strictfp_bias2(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ - predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); - match(Set dst (MulD dst con)); - ins_cost(200); - format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" - "DMULp $dst,ST" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ fmulp($dst$$reg); - %} - ins_pipe(fpu_reg_mem); -%} - - -instruct mulDPR_reg_mem(regDPR dst, memory src) %{ - predicate( UseSSE<=1 ); - match(Set dst (MulD dst (LoadD src))); - ins_cost(200); - format %{ "FLD_D $src\n\t" - "DMULp $dst,ST" %} - opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// -// Cisc-alternate to reg-reg multiply -instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ - predicate( UseSSE<=1 ); - match(Set dst (MulD src (LoadD mem))); - ins_cost(250); - format %{ "FLD_D $mem\n\t" - "DMUL ST,$src\n\t" - "FSTP_D $dst" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem), - OpcReg_FPR(src), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_reg_mem ); -%} - - -// MACRO3 -- addDPR a mulDPR -// This instruction is a '2-address' instruction in that the result goes -// back to src2. This eliminates a move from the macro; possibly the -// register allocator will have to add it back (and maybe not). -instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ - predicate( UseSSE<=1 ); - match(Set src2 (AddD (MulD src0 src1) src2)); - format %{ "FLD $src0\t# ===MACRO3d===\n\t" - "DMUL ST,$src1\n\t" - "DADDp $src2,ST" %} - ins_cost(250); - opcode(0xDD); /* LoadD DD /0 */ - ins_encode( Push_Reg_FPR(src0), - FMul_ST_reg(src1), - FAddP_reg_ST(src2) ); - ins_pipe( fpu_reg_reg_reg ); -%} - - -// MACRO3 -- subDPR a mulDPR -instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ - predicate( UseSSE<=1 ); - match(Set src2 (SubD (MulD src0 src1) src2)); - format %{ "FLD $src0\t# ===MACRO3d===\n\t" - "DMUL ST,$src1\n\t" - "DSUBRp $src2,ST" %} - ins_cost(250); - ins_encode( Push_Reg_FPR(src0), - FMul_ST_reg(src1), - Opcode(0xDE), Opc_plus(0xE0,src2)); - ins_pipe( fpu_reg_reg_reg ); -%} - - -instruct divDPR_reg(regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 ); - match(Set dst (DivD dst src)); - - format %{ "FLD $src\n\t" - "FDIVp $dst,ST" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_cost(150); - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -// Strict FP instruction biases argument before division then -// biases result, to avoid double rounding of subnormals. -// -// scale dividend by multiplying dividend by 2^(-15360) -// load divisor -// divide scaled dividend by divisor -// rescale quotient by 2^(15360) -// -instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ - predicate (UseSSE<=1); - match(Set dst (DivD dst src)); - predicate( UseSSE<=1 && Compile::current()->has_method() ); - ins_cost(01); - - format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" - "DMULp $dst,ST\n\t" - "FLD $src\n\t" - "FDIVp $dst,ST\n\t" - "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" - "DMULp $dst,ST\n\t" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( strictfp_bias1(dst), - Push_Reg_DPR(src), - OpcP, RegOpc(dst), - strictfp_bias2(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct atanDPR_reg(regDPR dst, regDPR src) %{ - predicate (UseSSE<=1); - match(Set dst(AtanD dst src)); - format %{ "DATA $dst,$src" %} - opcode(0xD9, 0xF3); - ins_encode( Push_Reg_DPR(src), - OpcP, OpcS, RegOpc(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ - predicate (UseSSE>=2); - match(Set dst(AtanD dst src)); - effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" - format %{ "DATA $dst,$src" %} - opcode(0xD9, 0xF3); - ins_encode( Push_SrcD(src), - OpcP, OpcS, Push_ResultD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ - predicate (UseSSE<=1); - match(Set dst (SqrtD src)); - format %{ "DSQRT $dst,$src" %} - opcode(0xFA, 0xD9); - ins_encode( Push_Reg_DPR(src), - OpcS, OpcP, Pop_Reg_DPR(dst) ); - ins_pipe( pipe_slow ); -%} - -//-------------Float Instructions------------------------------- -// Float Math - -// Code for float compare: -// fcompp(); -// fwait(); fnstsw_ax(); -// sahf(); -// movl(dst, unordered_result); -// jcc(Assembler::parity, exit); -// movl(dst, less_result); -// jcc(Assembler::below, exit); -// movl(dst, equal_result); -// jcc(Assembler::equal, exit); -// movl(dst, greater_result); -// exit: - -// P6 version of float compare, sets condition codes in EFLAGS -instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ - predicate(VM_Version::supports_cmov() && UseSSE == 0); - match(Set cr (CmpF src1 src2)); - effect(KILL rax); - ins_cost(150); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction\n\t" - "JNP exit\n\t" - "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" - "SAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - cmpF_P6_fixup ); - ins_pipe( pipe_slow ); -%} - -instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ - predicate(VM_Version::supports_cmov() && UseSSE == 0); - match(Set cr (CmpF src1 src2)); - ins_cost(100); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2)); - ins_pipe( pipe_slow ); -%} - - -// Compare & branch -instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ - predicate(UseSSE == 0); - match(Set cr (CmpF src1 src2)); - effect(KILL rax); - ins_cost(200); - format %{ "FLD $src1\n\t" - "FCOMp $src2\n\t" - "FNSTSW AX\n\t" - "TEST AX,0x400\n\t" - "JZ,s flags\n\t" - "MOV AH,1\t# unordered treat as LT\n" - "flags:\tSAHF" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - fpu_flags); - ins_pipe( pipe_slow ); -%} - -// Compare vs zero into -1,0,1 -instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE == 0); - match(Set dst (CmpF3 src1 zero)); - effect(KILL cr, KILL rax); - ins_cost(280); - format %{ "FTSTF $dst,$src1" %} - opcode(0xE4, 0xD9); - ins_encode( Push_Reg_DPR(src1), - OpcS, OpcP, PopFPU, - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 -instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE == 0); - match(Set dst (CmpF3 src1 src2)); - effect(KILL cr, KILL rax); - ins_cost(300); - format %{ "FCMPF $dst,$src1,$src2" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 src2)); - ins_cost(145); - format %{ "UCOMISS $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 src2)); - ins_cost(100); - format %{ "UCOMISS $src1,$src2" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 (LoadF src2))); - ins_cost(165); - format %{ "UCOMISS $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$Address); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 (LoadF src2))); - ins_cost(100); - format %{ "UCOMISS $src1,$src2" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM -instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ - predicate(UseSSE>=1); - match(Set dst (CmpF3 src1 src2)); - effect(KILL cr); - ins_cost(255); - format %{ "UCOMISS $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM and memory -instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ - predicate(UseSSE>=1); - match(Set dst (CmpF3 src1 (LoadF src2))); - effect(KILL cr); - ins_cost(275); - format %{ "UCOMISS $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$Address); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Spill to obtain 24-bit precision -instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (SubF src1 src2)); - - format %{ "FSUB $dst,$src1 - $src2" %} - opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ - ins_encode( Push_Reg_FPR(src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct subFPR_reg(regFPR dst, regFPR src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (SubF dst src)); - - format %{ "FSUB $dst,$src" %} - opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ - ins_encode( Push_Reg_FPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -// Spill to obtain 24-bit precision -instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 src2)); - - format %{ "FADD $dst,$src1,$src2" %} - opcode(0xD8, 0x0); /* D8 C0+i */ - ins_encode( Push_Reg_FPR(src2), - OpcReg_FPR(src1), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct addFPR_reg(regFPR dst, regFPR src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF dst src)); - - format %{ "FLD $src\n\t" - "FADDp $dst,ST" %} - opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ - ins_encode( Push_Reg_FPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set dst (AbsF src)); - ins_cost(100); - format %{ "FABS" %} - opcode(0xE1, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set dst (NegF src)); - ins_cost(100); - format %{ "FCHS" %} - opcode(0xE0, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -// Cisc-alternate to addFPR_reg -// Spill to obtain 24-bit precision -instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 (LoadF src2))); - - format %{ "FLD $src2\n\t" - "FADD ST,$src1\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_FPR(src1), - Pop_Mem_FPR(dst), ClearInstMark ); - ins_pipe( fpu_mem_reg_mem ); -%} -// -// Cisc-alternate to addFPR_reg -// This instruction does not round to 24-bits -instruct addFPR_reg_mem(regFPR dst, memory src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF dst (LoadF src))); - - format %{ "FADD $dst,$src" %} - opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// // Following two instructions for _222_mpegaudio -// Spill to obtain 24-bit precision -instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 src2)); - - format %{ "FADD $dst,$src1,$src2" %} - opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst), ClearInstMark ); - ins_pipe( fpu_mem_reg_mem ); -%} - -// Cisc-spill variant -// Spill to obtain 24-bit precision -instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 (LoadF src2))); - - format %{ "FADD $dst,$src1,$src2 cisc" %} - opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_FPR(dst), - ClearInstMark); - ins_pipe( fpu_mem_mem_mem ); -%} - -// Spill to obtain 24-bit precision -instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 src2)); - - format %{ "FADD $dst,$src1,$src2" %} - opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_FPR(dst), - ClearInstMark); - ins_pipe( fpu_mem_mem_mem ); -%} - - -// Spill to obtain 24-bit precision -instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src con)); - format %{ "FLD $src\n\t" - "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP_S $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fadd_s($constantaddress($con)); - __ fstp_s(Address(rsp, $dst$$disp)); - %} - ins_pipe(fpu_mem_reg_con); -%} -// -// This instruction does not round to 24-bits -instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src con)); - format %{ "FLD $src\n\t" - "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fadd_s($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_reg_con); -%} - -// Spill to obtain 24-bit precision -instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 src2)); - - format %{ "FLD $src1\n\t" - "FMUL $src2\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ - ins_encode( Push_Reg_FPR(src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 src2)); - - format %{ "FLD $src1\n\t" - "FMUL $src2\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x1); /* D8 C8+i */ - ins_encode( Push_Reg_FPR(src2), - OpcReg_FPR(src1), - Pop_Reg_FPR(dst) ); - ins_pipe( fpu_reg_reg_reg ); -%} - - -// Spill to obtain 24-bit precision -// Cisc-alternate to reg-reg multiply -instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 (LoadF src2))); - - format %{ "FLD_S $src2\n\t" - "FMUL $src1\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_FPR(src1), - Pop_Mem_FPR(dst), ClearInstMark ); - ins_pipe( fpu_mem_reg_mem ); -%} -// -// This instruction does not round to 24-bits -// Cisc-alternate to reg-reg multiply -instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 (LoadF src2))); - - format %{ "FMUL $dst,$src1,$src2" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_FPR(src1), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_reg_mem ); -%} - -// Spill to obtain 24-bit precision -instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 src2)); - - format %{ "FMUL $dst,$src1,$src2" %} - opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_FPR(dst), - ClearInstMark ); - ins_pipe( fpu_mem_mem_mem ); -%} - -// Spill to obtain 24-bit precision -instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src con)); - - format %{ "FLD $src\n\t" - "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP_S $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fmul_s($constantaddress($con)); - __ fstp_s(Address(rsp, $dst$$disp)); - %} - ins_pipe(fpu_mem_reg_con); -%} -// -// This instruction does not round to 24-bits -instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src con)); - - format %{ "FLD $src\n\t" - "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fmul_s($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_reg_con); -%} - - -// -// MACRO1 -- subsume unshared load into mulFPR -// This instruction does not round to 24-bits -instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF (LoadF mem1) src)); - - format %{ "FLD $mem1 ===MACRO1===\n\t" - "FMUL ST,$src\n\t" - "FSTP $dst" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1), - OpcReg_FPR(src), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_reg_mem ); -%} -// -// MACRO2 -- addFPR a mulFPR which subsumed an unshared load -// This instruction does not round to 24-bits -instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); - ins_cost(95); - - format %{ "FLD $mem1 ===MACRO2===\n\t" - "FMUL ST,$src1 subsume mulFPR left load\n\t" - "FADD ST,$src2\n\t" - "FSTP $dst" %} - opcode(0xD9); /* LoadF D9 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1), - FMul_ST_reg(src1), - FAdd_ST_reg(src2), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem_reg_reg ); -%} - -// MACRO3 -- addFPR a mulFPR -// This instruction does not round to 24-bits. It is a '2-address' -// instruction in that the result goes back to src2. This eliminates -// a move from the macro; possibly the register allocator will have -// to add it back (and maybe not). -instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set src2 (AddF (MulF src0 src1) src2)); - - format %{ "FLD $src0 ===MACRO3===\n\t" - "FMUL ST,$src1\n\t" - "FADDP $src2,ST" %} - opcode(0xD9); /* LoadF D9 /0 */ - ins_encode( Push_Reg_FPR(src0), - FMul_ST_reg(src1), - FAddP_reg_ST(src2) ); - ins_pipe( fpu_reg_reg_reg ); -%} - -// MACRO4 -- divFPR subFPR -// This instruction does not round to 24-bits -instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (DivF (SubF src2 src1) src3)); - - format %{ "FLD $src2 ===MACRO4===\n\t" - "FSUB ST,$src1\n\t" - "FDIV ST,$src3\n\t" - "FSTP $dst" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( Push_Reg_FPR(src2), - subFPR_divFPR_encode(src1,src3), - Pop_Reg_FPR(dst) ); - ins_pipe( fpu_reg_reg_reg_reg ); -%} - -// Spill to obtain 24-bit precision -instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (DivF src1 src2)); - - format %{ "FDIV $dst,$src1,$src2" %} - opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ - ins_encode( Push_Reg_FPR(src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct divFPR_reg(regFPR dst, regFPR src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (DivF dst src)); - - format %{ "FDIV $dst,$src" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( Push_Reg_FPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - - -//----------Arithmetic Conversion Instructions--------------------------------- -// The conversions operations are all Alpha sorted. Please keep it that way! - -instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (RoundFloat src)); - ins_cost(125); - format %{ "FST_S $dst,$src\t# F-round" %} - ins_encode( Pop_Mem_Reg_FPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (RoundDouble src)); - ins_cost(125); - format %{ "FST_D $dst,$src\t# D-round" %} - ins_encode( Pop_Mem_Reg_DPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Force rounding to 24-bit precision and 6-bit exponent -instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ - predicate(UseSSE==0); - match(Set dst (ConvD2F src)); - format %{ "FST_S $dst,$src\t# F-round" %} - expand %{ - roundFloat_mem_reg(dst,src); - %} -%} - -// Force rounding to 24-bit precision and 6-bit exponent -instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ - predicate(UseSSE==1); - match(Set dst (ConvD2F src)); - effect( KILL cr ); - format %{ "SUB ESP,4\n\t" - "FST_S [ESP],$src\t# F-round\n\t" - "MOVSS $dst,[ESP]\n\t" - "ADD ESP,4" %} - ins_encode %{ - __ subptr(rsp, 4); - if ($src$$reg != FPR1L_enc) { - __ fld_s($src$$reg-1); - __ fstp_s(Address(rsp, 0)); - } else { - __ fst_s(Address(rsp, 0)); - } - __ movflt($dst$$XMMRegister, Address(rsp, 0)); - __ addptr(rsp, 4); - %} - ins_pipe( pipe_slow ); -%} - -// Force rounding double precision to single precision -instruct convD2F_reg(regF dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (ConvD2F src)); - format %{ "CVTSD2SS $dst,$src\t# F-round" %} - ins_encode %{ - __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (ConvF2D src)); - format %{ "FST_S $dst,$src\t# D-round" %} - ins_encode( Pop_Reg_Reg_DPR(dst, src)); - ins_pipe( fpu_reg_reg ); -%} - -instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ - predicate(UseSSE==1); - match(Set dst (ConvF2D src)); - format %{ "FST_D $dst,$src\t# D-round" %} - expand %{ - roundDouble_mem_reg(dst,src); - %} -%} - -instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ - predicate(UseSSE==1); - match(Set dst (ConvF2D src)); - effect( KILL cr ); - format %{ "SUB ESP,4\n\t" - "MOVSS [ESP] $src\n\t" - "FLD_S [ESP]\n\t" - "ADD ESP,4\n\t" - "FSTP $dst\t# D-round" %} - ins_encode %{ - __ subptr(rsp, 4); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, 4); - __ fstp_d($dst$$reg); - %} - ins_pipe( pipe_slow ); -%} - -instruct convF2D_reg(regD dst, regF src) %{ - predicate(UseSSE>=2); - match(Set dst (ConvF2D src)); - format %{ "CVTSS2SD $dst,$src\t# D-round" %} - ins_encode %{ - __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Convert a double to an int. If the double is a NAN, stuff a zero in instead. -instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ - predicate(UseSSE<=1); - match(Set dst (ConvD2I src)); - effect( KILL tmp, KILL cr ); - format %{ "FLD $src\t# Convert double to int \n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,4\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "CMP EAX,0x80000000\n\t" - "JNE,s fast\n\t" - "FLD_D $src\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// Convert a double to an int. If the double is a NAN, stuff a zero in instead. -instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ - predicate(UseSSE>=2); - match(Set dst (ConvD2I src)); - effect( KILL tmp, KILL cr ); - format %{ "CVTTSD2SI $dst, $src\n\t" - "CMP $dst,0x80000000\n\t" - "JNE,s fast\n\t" - "SUB ESP, 8\n\t" - "MOVSD [ESP], $src\n\t" - "FLD_D [ESP]\n\t" - "ADD ESP, 8\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ cvttsd2sil($dst$$Register, $src$$XMMRegister); - __ cmpl($dst$$Register, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 8); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ - predicate(UseSSE<=1); - match(Set dst (ConvD2L src)); - effect( KILL cr ); - format %{ "FLD $src\t# Convert double to long\n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,8\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "FLD $src\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// XMM lacks a float/double->long conversion, so use the old FPU stack. -instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ - predicate (UseSSE>=2); - match(Set dst (ConvD2L src)); - effect( KILL cr ); - format %{ "SUB ESP,8\t# Convert double to long\n\t" - "MOVSD [ESP],$src\n\t" - "FLD_D [ESP]\n\t" - "FLDCW trunc mode\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "SUB ESP,8\n\t" - "MOVSD [ESP],$src\n\t" - "FLD_D [ESP]\n\t" - "ADD ESP,8\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); - __ fistp_d(Address(rsp, 0)); - // Restore the rounding mode, mask the exception - if (Compile::current()->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); - } else { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - // Load the converted long, adjust CPU stack - __ pop(rax); - __ pop(rdx); - __ cmpl(rdx, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ testl(rax, rax); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 8); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -// Convert a double to an int. Java semantics require we do complex -// manglations in the corner cases. So we set the rounding mode to -// 'zero', store the darned double down as an int, and reset the -// rounding mode to 'nearest'. The hardware stores a flag value down -// if we would overflow or converted a NAN; we check for this and -// and go the slow path if needed. -instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ - predicate(UseSSE==0); - match(Set dst (ConvF2I src)); - effect( KILL tmp, KILL cr ); - format %{ "FLD $src\t# Convert float to int \n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,4\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "CMP EAX,0x80000000\n\t" - "JNE,s fast\n\t" - "FLD $src\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - // DPR2I_encoding works for FPR2I - ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// Convert a float in xmm to an int reg. -instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ - predicate(UseSSE>=1); - match(Set dst (ConvF2I src)); - effect( KILL tmp, KILL cr ); - format %{ "CVTTSS2SI $dst, $src\n\t" - "CMP $dst,0x80000000\n\t" - "JNE,s fast\n\t" - "SUB ESP, 4\n\t" - "MOVSS [ESP], $src\n\t" - "FLD [ESP]\n\t" - "ADD ESP, 4\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ cvttss2sil($dst$$Register, $src$$XMMRegister); - __ cmpl($dst$$Register, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 4); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, 4); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ - predicate(UseSSE==0); - match(Set dst (ConvF2L src)); - effect( KILL cr ); - format %{ "FLD $src\t# Convert float to long\n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,8\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "FLD $src\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - // DPR2L_encoding works for FPR2L - ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// XMM lacks a float/double->long conversion, so use the old FPU stack. -instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ - predicate (UseSSE>=1); - match(Set dst (ConvF2L src)); - effect( KILL cr ); - format %{ "SUB ESP,8\t# Convert float to long\n\t" - "MOVSS [ESP],$src\n\t" - "FLD_S [ESP]\n\t" - "FLDCW trunc mode\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "SUB ESP,4\t# Convert float to long\n\t" - "MOVSS [ESP],$src\n\t" - "FLD_S [ESP]\n\t" - "ADD ESP,4\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ subptr(rsp, 8); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); - __ fistp_d(Address(rsp, 0)); - // Restore the rounding mode, mask the exception - if (Compile::current()->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); - } else { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - // Load the converted long, adjust CPU stack - __ pop(rax); - __ pop(rdx); - __ cmpl(rdx, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ testl(rax, rax); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 4); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, 4); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ - predicate( UseSSE<=1 ); - match(Set dst (ConvI2D src)); - format %{ "FILD $src\n\t" - "FSTP $dst" %} - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); - ins_pipe( fpu_reg_mem ); -%} - -instruct convI2D_reg(regD dst, rRegI src) %{ - predicate( UseSSE>=2 && !UseXmmI2D ); - match(Set dst (ConvI2D src)); - format %{ "CVTSI2SD $dst,$src" %} - ins_encode %{ - __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); - %} - ins_pipe( pipe_slow ); -%} - -instruct convI2D_mem(regD dst, memory mem) %{ - predicate( UseSSE>=2 ); - match(Set dst (ConvI2D (LoadI mem))); - format %{ "CVTSI2SD $dst,$mem" %} - ins_encode %{ - __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -instruct convXI2D_reg(regD dst, rRegI src) -%{ - predicate( UseSSE>=2 && UseXmmI2D ); - match(Set dst (ConvI2D src)); - - format %{ "MOVD $dst,$src\n\t" - "CVTDQ2PD $dst,$dst\t# i2d" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe(pipe_slow); // XXX -%} - -instruct convI2DPR_mem(regDPR dst, memory mem) %{ - predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2D (LoadI mem))); - format %{ "FILD $mem\n\t" - "FSTP $dst" %} - opcode(0xDB); /* DB /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_DPR(dst), ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -// Convert a byte to a float; no rounding step needed. -instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ - predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); - match(Set dst (ConvI2F src)); - format %{ "FILD $src\n\t" - "FSTP $dst" %} - - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); - ins_pipe( fpu_reg_mem ); -%} - -// In 24-bit mode, force exponent rounding by storing back out -instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ - predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F src)); - ins_cost(200); - format %{ "FILD $src\n\t" - "FSTP_S $dst" %} - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode( Push_Mem_I(src), - Pop_Mem_FPR(dst)); - ins_pipe( fpu_mem_mem ); -%} - -// In 24-bit mode, force exponent rounding by storing back out -instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ - predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F (LoadI mem))); - ins_cost(200); - format %{ "FILD $mem\n\t" - "FSTP_S $dst" %} - opcode(0xDB); /* DB /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Mem_FPR(dst), ClearInstMark); - ins_pipe( fpu_mem_mem ); -%} - -// This instruction does not round to 24-bits -instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ - predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F src)); - format %{ "FILD $src\n\t" - "FSTP $dst" %} - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode( Push_Mem_I(src), - Pop_Reg_FPR(dst)); - ins_pipe( fpu_reg_mem ); -%} - -// This instruction does not round to 24-bits -instruct convI2FPR_mem(regFPR dst, memory mem) %{ - predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F (LoadI mem))); - format %{ "FILD $mem\n\t" - "FSTP $dst" %} - opcode(0xDB); /* DB /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_FPR(dst), ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -// Convert an int to a float in xmm; no rounding step needed. -instruct convI2F_reg(regF dst, rRegI src) %{ - predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); - match(Set dst (ConvI2F src)); - format %{ "CVTSI2SS $dst, $src" %} - ins_encode %{ - __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); - %} - ins_pipe( pipe_slow ); -%} - - instruct convXI2F_reg(regF dst, rRegI src) -%{ - predicate( UseSSE>=2 && UseXmmI2F ); - match(Set dst (ConvI2F src)); - - format %{ "MOVD $dst,$src\n\t" - "CVTDQ2PS $dst,$dst\t# i2f" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe(pipe_slow); // XXX -%} - -instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (ConvI2L src)); - effect(KILL cr); - ins_cost(375); - format %{ "MOV $dst.lo,$src\n\t" - "MOV $dst.hi,$src\n\t" - "SAR $dst.hi,31" %} - ins_encode(convert_int_long(dst,src)); - ins_pipe( ialu_reg_reg_long ); -%} - -// Zero-extend convert int to long -instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ - match(Set dst (AndL (ConvI2L src) mask) ); - effect( KILL flags ); - ins_cost(250); - format %{ "MOV $dst.lo,$src\n\t" - "XOR $dst.hi,$dst.hi" %} - opcode(0x33); // XOR - ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Zero-extend long -instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ - match(Set dst (AndL src mask) ); - effect( KILL flags ); - ins_cost(250); - format %{ "MOV $dst.lo,$src.lo\n\t" - "XOR $dst.hi,$dst.hi\n\t" %} - opcode(0x33); // XOR - ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); - ins_pipe( ialu_reg_reg_long ); -%} - -instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ - predicate (UseSSE<=1); - match(Set dst (ConvL2D src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to double\n\t" - "PUSH $src.lo\n\t" - "FILD ST,[ESP + #0]\n\t" - "ADD ESP,8\n\t" - "FSTP_D $dst\t# D-round" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ - predicate (UseSSE>=2); - match(Set dst (ConvL2D src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to double\n\t" - "PUSH $src.lo\n\t" - "FILD_D [ESP]\n\t" - "FSTP_D [ESP]\n\t" - "MOVSD $dst,[ESP]\n\t" - "ADD ESP,8" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double2(src), Push_ResultD(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ - predicate (UseSSE>=1); - match(Set dst (ConvL2F src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to single float\n\t" - "PUSH $src.lo\n\t" - "FILD_D [ESP]\n\t" - "FSTP_S [ESP]\n\t" - "MOVSS $dst,[ESP]\n\t" - "ADD ESP,8" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); - ins_pipe( pipe_slow ); -%} - -instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (ConvL2F src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to single float\n\t" - "PUSH $src.lo\n\t" - "FILD ST,[ESP + #0]\n\t" - "ADD ESP,8\n\t" - "FSTP_S $dst\t# F-round" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2I_reg( rRegI dst, eRegL src ) %{ - match(Set dst (ConvL2I src)); - effect( DEF dst, USE src ); - format %{ "MOV $dst,$src.lo" %} - ins_encode(enc_CopyL_Lo(dst,src)); - ins_pipe( ialu_reg_reg ); -%} - -instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - ins_cost(100); - format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} - ins_encode %{ - __ movl($dst$$Register, Address(rsp, $src$$disp)); - %} - ins_pipe( ialu_reg_mem ); -%} - -instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - - ins_cost(125); - format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} - ins_encode( Pop_Mem_Reg_FPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ - predicate(UseSSE>=1); - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - - ins_cost(95); - format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} - ins_encode %{ - __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ - predicate(UseSSE>=2); - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - ins_cost(85); - format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$Register, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ - match(Set dst (MoveI2F src)); - effect( DEF dst, USE src ); - - ins_cost(100); - format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} - ins_encode %{ - __ movl(Address(rsp, $dst$$disp), $src$$Register); - %} - ins_pipe( ialu_mem_reg ); -%} - - -instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ - predicate(UseSSE==0); - match(Set dst (MoveI2F src)); - effect(DEF dst, USE src); - - ins_cost(125); - format %{ "FLD_S $src\n\t" - "FSTP $dst\t# MoveI2F_stack_reg" %} - opcode(0xD9); /* D9 /0, FLD m32real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ - predicate(UseSSE>=1); - match(Set dst (MoveI2F src)); - effect( DEF dst, USE src ); - - ins_cost(95); - format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (MoveI2F src)); - effect( DEF dst, USE src ); - - ins_cost(85); - format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src); - - ins_cost(250); - format %{ "MOV $dst.lo,$src\n\t" - "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} - opcode(0x8B, 0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark); - ins_pipe( ialu_mem_long_reg ); -%} - -instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src); - - ins_cost(125); - format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} - ins_encode( Pop_Mem_Reg_DPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src); - ins_cost(95); - format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} - ins_encode %{ - __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ - predicate(UseSSE>=2); - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src, TEMP tmp); - ins_cost(85); - format %{ "MOVD $dst.lo,$src\n\t" - "PSHUFLW $tmp,$src,0x4E\n\t" - "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$Register, $src$$XMMRegister); - __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); - __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - - ins_cost(200); - format %{ "MOV $dst,$src.lo\n\t" - "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} - opcode(0x89, 0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - - -instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ - predicate(UseSSE<=1); - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - ins_cost(125); - - format %{ "FLD_D $src\n\t" - "FSTP $dst\t# MoveL2D_stack_reg" %} - opcode(0xDD); /* DD /0, FLD m64real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - - -instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ - predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - - ins_cost(95); - format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ - predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - - ins_cost(95); - format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ - predicate(UseSSE>=2); - match(Set dst (MoveL2D src)); - effect(TEMP dst, USE src, TEMP tmp); - ins_cost(85); - format %{ "MOVD $dst,$src.lo\n\t" - "MOVD $tmp,$src.hi\n\t" - "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -//----------------------------- CompressBits/ExpandBits ------------------------ - -instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ - predicate(n->bottom_type()->isa_long()); - match(Set dst (CompressBits src mask)); - effect(TEMP rtmp, TEMP xtmp, KILL cr); - format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} - ins_encode %{ - Label exit, partail_result; - // Parallely extract both upper and lower 32 bits of source into destination register pair. - // Merge the results of upper and lower destination registers such that upper destination - // results are contiguously laid out after the lower destination result. - __ pextl($dst$$Register, $src$$Register, $mask$$Register); - __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); - __ popcntl($rtmp$$Register, $mask$$Register); - // Skip merging if bit count of lower mask register is equal to 32 (register size). - __ cmpl($rtmp$$Register, 32); - __ jccb(Assembler::equal, exit); - // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. - __ movdl($xtmp$$XMMRegister, $rtmp$$Register); - // Shift left the contents of upper destination register by true bit count of lower mask register - // and merge with lower destination register. - __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); - __ orl($dst$$Register, $rtmp$$Register); - __ movdl($rtmp$$Register, $xtmp$$XMMRegister); - // Zero out upper destination register if true bit count of lower 32 bit mask is zero - // since contents of upper destination have already been copied to lower destination - // register. - __ cmpl($rtmp$$Register, 0); - __ jccb(Assembler::greater, partail_result); - __ movl(HIGH_FROM_LOW($dst$$Register), 0); - __ jmp(exit); - __ bind(partail_result); - // Perform right shift over upper destination register to move out bits already copied - // to lower destination register. - __ subl($rtmp$$Register, 32); - __ negl($rtmp$$Register); - __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); - __ bind(exit); - %} - ins_pipe( pipe_slow ); -%} - -instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ - predicate(n->bottom_type()->isa_long()); - match(Set dst (ExpandBits src mask)); - effect(TEMP rtmp, TEMP xtmp, KILL cr); - format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} - ins_encode %{ - // Extraction operation sequentially reads the bits from source register starting from LSB - // and lays them out into destination register at bit locations corresponding to true bits - // in mask register. Thus number of source bits read are equal to combined true bit count - // of mask register pair. - Label exit, mask_clipping; - __ pdepl($dst$$Register, $src$$Register, $mask$$Register); - __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); - __ popcntl($rtmp$$Register, $mask$$Register); - // If true bit count of lower mask register is 32 then none of bit of lower source register - // will feed to upper destination register. - __ cmpl($rtmp$$Register, 32); - __ jccb(Assembler::equal, exit); - // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. - __ movdl($xtmp$$XMMRegister, $rtmp$$Register); - // Shift right the contents of lower source register to remove already consumed bits. - __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); - // Extract the bits from lower source register starting from LSB under the influence - // of upper mask register. - __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); - __ movdl($rtmp$$Register, $xtmp$$XMMRegister); - __ subl($rtmp$$Register, 32); - __ negl($rtmp$$Register); - __ movdl($xtmp$$XMMRegister, $mask$$Register); - __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); - // Clear the set bits in upper mask register which have been used to extract the contents - // from lower source register. - __ bind(mask_clipping); - __ blsrl($mask$$Register, $mask$$Register); - __ decrementl($rtmp$$Register, 1); - __ jccb(Assembler::greater, mask_clipping); - // Starting from LSB extract the bits from upper source register under the influence of - // remaining set bits in upper mask register. - __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); - // Merge the partial results extracted from lower and upper source register bits. - __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); - __ movdl($mask$$Register, $xtmp$$XMMRegister); - __ bind(exit); - %} - ins_pipe( pipe_slow ); -%} - -// ======================================================================= -// Fast clearing of an array -// Small non-constant length ClearArray for non-AVX512 targets. -instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); - - format %{ $$template - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"CMP InitArrayShortSize,rcx\n\t" - $$emit$$"JG LARGE\n\t" - $$emit$$"SHL ECX, 1\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JS DONE\t# Zero length\n\t" - $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JGE LOOP\n\t" - $$emit$$"JMP DONE\n\t" - $$emit$$"# LARGE:\n\t" - if (UseFastStosb) { - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, false, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -// Small non-constant length ClearArray for AVX512 targets. -instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); - match(Set dummy (ClearArray cnt base)); - ins_cost(125); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); - - format %{ $$template - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"CMP InitArrayShortSize,rcx\n\t" - $$emit$$"JG LARGE\n\t" - $$emit$$"SHL ECX, 1\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JS DONE\t# Zero length\n\t" - $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JGE LOOP\n\t" - $$emit$$"JMP DONE\n\t" - $$emit$$"# LARGE:\n\t" - if (UseFastStosb) { - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, false, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Large non-constant length ClearArray for non-AVX512 targets. -instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); - format %{ $$template - if (UseFastStosb) { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, true, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -// Large non-constant length ClearArray for AVX512 targets. -instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); - format %{ $$template - if (UseFastStosb) { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, true, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Small constant length ClearArray for AVX512 targets. -instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) -%{ - predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); - match(Set dummy (ClearArray cnt base)); - ins_cost(100); - effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); - format %{ "clear_mem_imm $base , $cnt \n\t" %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); - %} - ins_pipe(pipe_slow); -%} - -instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str2$$Register, $str1$$Register, - $cnt2$$Register, $cnt1$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str2$$Register, $str1$$Register, - $cnt2$$Register, $cnt1$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// fast string equals -instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, - regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw()); - match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); - - format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ arrays_equals(false, $str1$$Register, $str2$$Register, - $cnt$$Register, $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); - %} - - ins_pipe( pipe_slow ); -%} - -instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw()); - match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); - - format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ arrays_equals(false, $str1$$Register, $str2$$Register, - $cnt$$Register, $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); - %} - - ins_pipe( pipe_slow ); -%} - - -// fast search of substring with known size. -instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, - eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} - ins_encode %{ - int icnt2 = (int)$int_cnt2$$constant; - if (icnt2 >= 16) { - // IndexOf for constant substrings with size >= 16 elements - // which don't need to be loaded through stack. - __ string_indexofC8($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); - } else { - // Small strings are loaded through stack if they cross page boundary. - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); - } - %} - ins_pipe( pipe_slow ); -%} - -// fast search of substring with known size. -instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, - eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} - ins_encode %{ - int icnt2 = (int)$int_cnt2$$constant; - if (icnt2 >= 8) { - // IndexOf for constant substrings with size >= 8 elements - // which don't need to be loaded through stack. - __ string_indexofC8($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); - } else { - // Small strings are loaded through stack if they cross page boundary. - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); - } - %} - ins_pipe( pipe_slow ); -%} - -// fast search of substring with known size. -instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, - eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} - ins_encode %{ - int icnt2 = (int)$int_cnt2$$constant; - if (icnt2 >= 8) { - // IndexOf for constant substrings with size >= 8 elements - // which don't need to be loaded through stack. - __ string_indexofC8($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); - } else { - // Small strings are loaded through stack if they cross page boundary. - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); - } - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, - eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} - ins_encode %{ - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - (-1), $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, - eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} - ins_encode %{ - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - (-1), $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, - eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} - ins_encode %{ - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - (-1), $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); - format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - -instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); - format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - - -// fast array equals -instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, - regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) -%{ - predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); - match(Set result (CountPositives ary1 len)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); - - format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ count_positives($ary1$$Register, $len$$Register, - $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) -%{ - predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); - match(Set result (CountPositives ary1 len)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); - - format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ count_positives($ary1$$Register, $len$$Register, - $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - - -// fast char[] to byte[] compression -instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, - regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); - match(Set result (StrCompressedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} - ins_encode %{ - __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, - knoreg, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, - regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); - match(Set result (StrCompressedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} - ins_encode %{ - __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, - $ktmp1$$KRegister, $ktmp2$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// fast byte[] to char[] inflation -instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); - match(Set dummy (StrInflatedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); - - format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} - ins_encode %{ - __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$Register, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); - match(Set dummy (StrInflatedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); - - format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} - ins_encode %{ - __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// encode char[] to byte[] in ISO_8859_1 -instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, regD tmp2, regD tmp3, regD tmp4, - eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(!((EncodeISOArrayNode*)n)->is_ascii()); - match(Set result (EncodeISOArray src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} - ins_encode %{ - __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); - %} - ins_pipe( pipe_slow ); -%} - -// encode char[] to byte[] in ASCII -instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, regD tmp2, regD tmp3, regD tmp4, - eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(((EncodeISOArrayNode*)n)->is_ascii()); - match(Set result (EncodeISOArray src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} - ins_encode %{ - __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); - %} - ins_pipe( pipe_slow ); -%} - -//----------Control Flow Instructions------------------------------------------ -// Signed compare Instructions -instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ - match(Set cr (CmpI op1 op2)); - effect( DEF cr, USE op1, USE op2 ); - format %{ "CMP $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( OpcP, RegReg( op1, op2) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ - match(Set cr (CmpI op1 op2)); - effect( DEF cr, USE op1 ); - format %{ "CMP $op1,$op2" %} - opcode(0x81,0x07); /* Opcode 81 /7 */ - // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ - ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Cisc-spilled version of cmpI_eReg -instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ - match(Set cr (CmpI op1 (LoadI op2))); - - format %{ "CMP $op1,$op2" %} - ins_cost(500); - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ - match(Set cr (CmpI src zero)); - effect( DEF cr, USE src ); - - format %{ "TEST $src,$src" %} - opcode(0x85); - ins_encode( OpcP, RegReg( src, src ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ - match(Set cr (CmpI (AndI src con) zero)); - - format %{ "TEST $src,$con" %} - opcode(0xF7,0x00); - ins_encode( OpcP, RegOpc(src), Con32(con) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ - match(Set cr (CmpI (AndI src mem) zero)); - - format %{ "TEST $src,$mem" %} - opcode(0x85); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// Unsigned compare Instructions; really, same as signed except they -// produce an eFlagsRegU instead of eFlagsReg. -instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ - match(Set cr (CmpU op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( OpcP, RegReg( op1, op2) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ - match(Set cr (CmpU op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x81,0x07); /* Opcode 81 /7 */ - ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// // Cisc-spilled version of cmpU_eReg -instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ - match(Set cr (CmpU op1 (LoadI op2))); - - format %{ "CMPu $op1,$op2" %} - ins_cost(500); - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// // Cisc-spilled version of cmpU_eReg -//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ -// match(Set cr (CmpU (LoadI op1) op2)); -// -// format %{ "CMPu $op1,$op2" %} -// ins_cost(500); -// opcode(0x39); /* Opcode 39 /r */ -// ins_encode( OpcP, RegMem( op1, op2) ); -//%} - -instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ - match(Set cr (CmpU src zero)); - - format %{ "TESTu $src,$src" %} - opcode(0x85); - ins_encode( OpcP, RegReg( src, src ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Unsigned pointer compare Instructions -instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ - match(Set cr (CmpP op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( OpcP, RegReg( op1, op2) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ - match(Set cr (CmpP op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x81,0x07); /* Opcode 81 /7 */ - ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// // Cisc-spilled version of cmpP_eReg -instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ - match(Set cr (CmpP op1 (LoadP op2))); - - format %{ "CMPu $op1,$op2" %} - ins_cost(500); - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// // Cisc-spilled version of cmpP_eReg -//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ -// match(Set cr (CmpP (LoadP op1) op2)); -// -// format %{ "CMPu $op1,$op2" %} -// ins_cost(500); -// opcode(0x39); /* Opcode 39 /r */ -// ins_encode( OpcP, RegMem( op1, op2) ); -//%} - -// Compare raw pointer (used in out-of-heap check). -// Only works because non-oop pointers must be raw pointers -// and raw pointers have no anti-dependencies. -instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ - predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); - match(Set cr (CmpP op1 (LoadP op2))); - - format %{ "CMPu $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// -// This will generate a signed flags result. This should be ok -// since any compare to a zero should be eq/neq. -instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ - match(Set cr (CmpP src zero)); - - format %{ "TEST $src,$src" %} - opcode(0x85); - ins_encode( OpcP, RegReg( src, src ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Cisc-spilled version of testP_reg -// This will generate a signed flags result. This should be ok -// since any compare to a zero should be eq/neq. -instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ - match(Set cr (CmpP (LoadP op) zero)); - - format %{ "TEST $op,0xFFFFFFFF" %} - ins_cost(500); - opcode(0xF7); /* Opcode F7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Yanked all unsigned pointer compare operations. -// Pointer compares are done with CmpP which is already unsigned. - -//----------Max and Min-------------------------------------------------------- -// Min Instructions -//// -// *** Min and Max using the conditional move are slower than the -// *** branch version on a Pentium III. -// // Conditional move for min -//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ -// effect( USE_DEF op2, USE op1, USE cr ); -// format %{ "CMOVlt $op2,$op1\t! min" %} -// opcode(0x4C,0x0F); -// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); -// ins_pipe( pipe_cmov_reg ); -//%} -// -//// Min Register with Register (P6 version) -//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set op2 (MinI op1 op2)); -// ins_cost(200); -// expand %{ -// eFlagsReg cr; -// compI_eReg(cr,op1,op2); -// cmovI_reg_lt(op2,op1,cr); -// %} -//%} - -// Min Register with Register (generic version) -instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ - match(Set dst (MinI dst src)); - effect(KILL flags); - ins_cost(300); - - format %{ "MIN $dst,$src" %} - opcode(0xCC); - ins_encode( min_enc(dst,src) ); - ins_pipe( pipe_slow ); -%} - -// Max Register with Register -// *** Min and Max using the conditional move are slower than the -// *** branch version on a Pentium III. -// // Conditional move for max -//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ -// effect( USE_DEF op2, USE op1, USE cr ); -// format %{ "CMOVgt $op2,$op1\t! max" %} -// opcode(0x4F,0x0F); -// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); -// ins_pipe( pipe_cmov_reg ); -//%} -// -// // Max Register with Register (P6 version) -//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set op2 (MaxI op1 op2)); -// ins_cost(200); -// expand %{ -// eFlagsReg cr; -// compI_eReg(cr,op1,op2); -// cmovI_reg_gt(op2,op1,cr); -// %} -//%} - -// Max Register with Register (generic version) -instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ - match(Set dst (MaxI dst src)); - effect(KILL flags); - ins_cost(300); - - format %{ "MAX $dst,$src" %} - opcode(0xCC); - ins_encode( max_enc(dst,src) ); - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Counted Loop limit node which represents exact final iterator value. -// Note: the resulting value should fit into integer range since -// counted loops have limit check on overflow. -instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ - match(Set limit (LoopLimit (Binary init limit) stride)); - effect(TEMP limit_hi, TEMP tmp, KILL flags); - ins_cost(300); - - format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} - ins_encode %{ - int strd = (int)$stride$$constant; - assert(strd != 1 && strd != -1, "sanity"); - int m1 = (strd > 0) ? 1 : -1; - // Convert limit to long (EAX:EDX) - __ cdql(); - // Convert init to long (init:tmp) - __ movl($tmp$$Register, $init$$Register); - __ sarl($tmp$$Register, 31); - // $limit - $init - __ subl($limit$$Register, $init$$Register); - __ sbbl($limit_hi$$Register, $tmp$$Register); - // + ($stride - 1) - if (strd > 0) { - __ addl($limit$$Register, (strd - 1)); - __ adcl($limit_hi$$Register, 0); - __ movl($tmp$$Register, strd); - } else { - __ addl($limit$$Register, (strd + 1)); - __ adcl($limit_hi$$Register, -1); - __ lneg($limit_hi$$Register, $limit$$Register); - __ movl($tmp$$Register, -strd); - } - // signed division: (EAX:EDX) / pos_stride - __ idivl($tmp$$Register); - if (strd < 0) { - // restore sign - __ negl($tmp$$Register); - } - // (EAX) * stride - __ mull($tmp$$Register); - // + init (ignore upper bits) - __ addl($limit$$Register, $init$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Branch Instructions -// Jump Table -instruct jumpXtnd(rRegI switch_val) %{ - match(Jump switch_val); - ins_cost(350); - format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} - ins_encode %{ - // Jump to Address(table_base + switch_reg) - Address index(noreg, $switch_val$$Register, Address::times_1); - __ jump(ArrayAddress($constantaddress, index), noreg); - %} - ins_pipe(pipe_jmp); -%} - -// Jump Direct - Label defines a relative address from JMP+1 -instruct jmpDir(label labl) %{ - match(Goto); - effect(USE labl); - - ins_cost(300); - format %{ "JMP $labl" %} - size(5); - ins_encode %{ - Label* L = $labl$$label; - __ jmp(*L, false); // Always long jump - %} - ins_pipe( pipe_jmp ); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ - match(If cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop $labl" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe( pipe_jcc ); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ - match(CountedLoopEnd cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop $labl\t# Loop end" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe( pipe_jcc ); -%} - -// Jump Direct Conditional - using unsigned comparison -instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,u $labl" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe(pipe_jcc); -%} - -instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(200); - format %{ "J$cop,u $labl" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe(pipe_jcc); -%} - -instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(200); - format %{ $$template - if ($cop$$cmpcode == Assembler::notEqual) { - $$emit$$"JP,u $labl\n\t" - $$emit$$"J$cop,u $labl" - } else { - $$emit$$"JP,u done\n\t" - $$emit$$"J$cop,u $labl\n\t" - $$emit$$"done:" - } - %} - ins_encode %{ - Label* l = $labl$$label; - if ($cop$$cmpcode == Assembler::notEqual) { - __ jcc(Assembler::parity, *l, false); - __ jcc(Assembler::notEqual, *l, false); - } else if ($cop$$cmpcode == Assembler::equal) { - Label done; - __ jccb(Assembler::parity, done); - __ jcc(Assembler::equal, *l, false); - __ bind(done); - } else { - ShouldNotReachHere(); - } - %} - ins_pipe(pipe_jcc); -%} - -// ============================================================================ -// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass -// array for an instance of the superklass. Set a hidden internal cache on a -// hit (cache is checked with exposed code in gen_subtype_check()). Return -// NZ for a miss or zero for a hit. The encoding ALSO sets flags. -instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ - match(Set result (PartialSubtypeCheck sub super)); - effect( KILL rcx, KILL cr ); - - ins_cost(1100); // slightly larger than the next version - format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" - "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" - "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" - "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" - "JNE,s miss\t\t# Missed: EDI not-zero\n\t" - "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" - "XOR $result,$result\t\t Hit: EDI zero\n\t" - "miss:\t" %} - - opcode(0x1); // Force a XOR of EDI - ins_encode( enc_PartialSubtypeCheck() ); - ins_pipe( pipe_slow ); -%} - -instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ - match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); - effect( KILL rcx, KILL result ); - - ins_cost(1000); - format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" - "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" - "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" - "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" - "JNE,s miss\t\t# Missed: flags NZ\n\t" - "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" - "miss:\t" %} - - opcode(0x0); // No need to XOR EDI - ins_encode( enc_PartialSubtypeCheck() ); - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Branch Instructions -- short offset versions -// -// These instructions are used to replace jumps of a long offset (the default -// match) with jumps of a shorter offset. These instructions are all tagged -// with the ins_short_branch attribute, which causes the ADLC to suppress the -// match rules in general matching. Instead, the ADLC generates a conversion -// method in the MachNode which can be used to do in-place replacement of the -// long variant with the shorter variant. The compiler will determine if a -// branch can be taken by the is_short_branch_offset() predicate in the machine -// specific code section of the file. - -// Jump Direct - Label defines a relative address from JMP+1 -instruct jmpDir_short(label labl) %{ - match(Goto); - effect(USE labl); - - ins_cost(300); - format %{ "JMP,s $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jmpb(*L); - %} - ins_pipe( pipe_jmp ); - ins_short_branch(1); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ - match(If cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,s $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ - match(CountedLoopEnd cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,s $labl\t# Loop end" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -// Jump Direct Conditional - using unsigned comparison -instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,us $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,us $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ $$template - if ($cop$$cmpcode == Assembler::notEqual) { - $$emit$$"JP,u,s $labl\n\t" - $$emit$$"J$cop,u,s $labl" - } else { - $$emit$$"JP,u,s done\n\t" - $$emit$$"J$cop,u,s $labl\n\t" - $$emit$$"done:" - } - %} - size(4); - ins_encode %{ - Label* l = $labl$$label; - if ($cop$$cmpcode == Assembler::notEqual) { - __ jccb(Assembler::parity, *l); - __ jccb(Assembler::notEqual, *l); - } else if ($cop$$cmpcode == Assembler::equal) { - Label done; - __ jccb(Assembler::parity, done); - __ jccb(Assembler::equal, *l); - __ bind(done); - } else { - ShouldNotReachHere(); - } - %} - ins_pipe(pipe_jcc); - ins_short_branch(1); -%} - -// ============================================================================ -// Long Compare -// -// Currently we hold longs in 2 registers. Comparing such values efficiently -// is tricky. The flavor of compare used depends on whether we are testing -// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. -// The GE test is the negated LT test. The LE test can be had by commuting -// the operands (yielding a GE test) and then negating; negate again for the -// GT test. The EQ test is done by ORcc'ing the high and low halves, and the -// NE test is negated from that. - -// Due to a shortcoming in the ADLC, it mixes up expressions like: -// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the -// difference between 'Y' and '0L'. The tree-matches for the CmpI sections -// are collapsed internally in the ADLC's dfa-gen code. The match for -// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the -// foo match ends up with the wrong leaf. One fix is to not match both -// reg-reg and reg-zero forms of long-compare. This is unfortunate because -// both forms beat the trinary form of long-compare and both are very useful -// on Intel which has so few registers. - -// Manifest a CmpL result in an integer register. Very painful. -// This is the test to avoid. -instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ - match(Set dst (CmpL3 src1 src2)); - effect( KILL flags ); - ins_cost(1000); - format %{ "XOR $dst,$dst\n\t" - "CMP $src1.hi,$src2.hi\n\t" - "JLT,s m_one\n\t" - "JGT,s p_one\n\t" - "CMP $src1.lo,$src2.lo\n\t" - "JB,s m_one\n\t" - "JEQ,s done\n" - "p_one:\tINC $dst\n\t" - "JMP,s done\n" - "m_one:\tDEC $dst\n" - "done:" %} - ins_encode %{ - Label p_one, m_one, done; - __ xorptr($dst$$Register, $dst$$Register); - __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); - __ jccb(Assembler::less, m_one); - __ jccb(Assembler::greater, p_one); - __ cmpl($src1$$Register, $src2$$Register); - __ jccb(Assembler::below, m_one); - __ jccb(Assembler::equal, done); - __ bind(p_one); - __ incrementl($dst$$Register); - __ jmpb(done); - __ bind(m_one); - __ decrementl($dst$$Register); - __ bind(done); - %} - ins_pipe( pipe_slow ); -%} - -//====== -// Manifest a CmpL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ - match( Set flags (CmpL src zero )); - ins_cost(100); - format %{ "TEST $src.hi,$src.hi" %} - opcode(0x85); - ins_encode( OpcP, RegReg_Hi2( src, src ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Manifest a CmpL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ - match( Set flags (CmpL src1 src2 )); - effect( TEMP tmp ); - ins_cost(300); - format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" - "MOV $tmp,$src1.hi\n\t" - "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} - ins_encode( long_cmp_flags2( src1, src2, tmp ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); - expand %{ - jmpCon(cmp,flags,labl); // JLT or JGE... - %} -%} - -//====== -// Manifest a CmpUL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ - match(Set flags (CmpUL src zero)); - ins_cost(100); - format %{ "TEST $src.hi,$src.hi" %} - opcode(0x85); - ins_encode(OpcP, RegReg_Hi2(src, src)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Manifest a CmpUL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ - match(Set flags (CmpUL src1 src2)); - effect(TEMP tmp); - ins_cost(300); - format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" - "MOV $tmp,$src1.hi\n\t" - "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} - ins_encode(long_cmp_flags2(src1, src2, tmp)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Unsigned long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); - expand %{ - jmpCon(cmp, flags, labl); // JLT or JGE... - %} -%} - -// Compare 2 longs and CMOVE longs. -instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(400); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(500); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(400); - expand %{ - cmovLL_reg_LTGE(cmp, flags, dst, src); - %} -%} - -instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(500); - expand %{ - cmovLL_mem_LTGE(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovII_reg_LTGE(cmp, flags, dst, src); - %} -%} - -instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovII_mem_LTGE(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ptrs. -instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Compare 2 unsigned longs and CMOVE ptrs. -instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovPP_reg_LTGE(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovDPR_regS(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ - predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ - predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovFPR_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ - predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regS(cmp,flags,dst,src); - %} -%} - -//====== -// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. -instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ - match( Set flags (CmpL src zero )); - effect(TEMP tmp); - ins_cost(200); - format %{ "MOV $tmp,$src.lo\n\t" - "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} - ins_encode( long_cmp_flags0( src, tmp ) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. -instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ - match( Set flags (CmpL src1 src2 )); - ins_cost(200+300); - format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" - "JNE,s skip\n\t" - "CMP $src1.hi,$src2.hi\n\t" - "skip:\t" %} - ins_encode( long_cmp_flags1( src1, src2 ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Long compare reg == zero/reg OR reg != zero/reg -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); - expand %{ - jmpCon(cmp,flags,labl); // JEQ or JNE... - %} -%} - -//====== -// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. -instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ - match(Set flags (CmpUL src zero)); - effect(TEMP tmp); - ins_cost(200); - format %{ "MOV $tmp,$src.lo\n\t" - "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} - ins_encode(long_cmp_flags0(src, tmp)); - ins_pipe(ialu_reg_reg_long); -%} - -// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. -instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ - match(Set flags (CmpUL src1 src2)); - ins_cost(200+300); - format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" - "JNE,s skip\n\t" - "CMP $src1.hi,$src2.hi\n\t" - "skip:\t" %} - ins_encode(long_cmp_flags1(src1, src2)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Unsigned long compare reg == zero/reg OR reg != zero/reg -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); - expand %{ - jmpCon(cmp, flags, labl); // JEQ or JNE... - %} -%} - -// Compare 2 longs and CMOVE longs. -instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - ins_cost(400); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - ins_cost(500); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); - ins_pipe( pipe_cmov_reg_long ); -%} - -// Compare 2 longs and CMOVE ints. -instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovII_reg_EQNE(cmp, flags, dst, src); - %} -%} - -instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovII_mem_EQNE(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ptrs. -instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Compare 2 unsigned longs and CMOVE ptrs. -instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovPP_reg_EQNE(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovDPR_regS(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ - predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ - predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovFPR_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ - predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regS(cmp,flags,dst,src); - %} -%} - -//====== -// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. -// Same as cmpL_reg_flags_LEGT except must negate src -instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ - match( Set flags (CmpL src zero )); - effect( TEMP tmp ); - ins_cost(300); - format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" - "CMP $tmp,$src.lo\n\t" - "SBB $tmp,$src.hi\n\t" %} - ins_encode( long_cmp_flags3(src, tmp) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. -// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands -// requires a commuted test to get the same result. -instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ - match( Set flags (CmpL src1 src2 )); - effect( TEMP tmp ); - ins_cost(300); - format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" - "MOV $tmp,$src2.hi\n\t" - "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} - ins_encode( long_cmp_flags2( src2, src1, tmp ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test -instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); - ins_cost(300); - expand %{ - jmpCon(cmp,flags,labl); // JGT or JLE... - %} -%} - -//====== -// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. -// Same as cmpUL_reg_flags_LEGT except must negate src -instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ - match(Set flags (CmpUL src zero)); - effect(TEMP tmp); - ins_cost(300); - format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" - "CMP $tmp,$src.lo\n\t" - "SBB $tmp,$src.hi\n\t" %} - ins_encode(long_cmp_flags3(src, tmp)); - ins_pipe(ialu_reg_reg_long); -%} - -// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. -// Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands -// requires a commuted test to get the same result. -instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ - match(Set flags (CmpUL src1 src2)); - effect(TEMP tmp); - ins_cost(300); - format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" - "MOV $tmp,$src2.hi\n\t" - "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} - ins_encode(long_cmp_flags2( src2, src1, tmp)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Unsigned long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test -instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); - ins_cost(300); - expand %{ - jmpCon(cmp, flags, labl); // JGT or JLE... - %} -%} - -// Compare 2 longs and CMOVE longs. -instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(400); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(500); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi+4" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(400); - expand %{ - cmovLL_reg_LEGT(cmp, flags, dst, src); - %} -%} - -instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(500); - expand %{ - cmovLL_mem_LEGT(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovII_reg_LEGT(cmp, flags, dst, src); - %} -%} - -instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovII_mem_LEGT(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ptrs. -instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Compare 2 unsigned longs and CMOVE ptrs. -instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovPP_reg_LEGT(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovDPR_regS(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ - predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ - predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovFPR_regS(cmp,flags,dst,src); - %} -%} - - -instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ - predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regS(cmp,flags,dst,src); - %} -%} - - -// ============================================================================ -// Procedure Call/Return Instructions -// Call Java Static Instruction -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallStaticJavaDirect(method meth) %{ - match(CallStaticJava); - effect(USE meth); - - ins_cost(300); - format %{ "CALL,static " %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - Java_Static_Call( meth ), - call_epilog, - post_call_FPU ); - ins_pipe( pipe_slow ); - ins_alignment(4); -%} - -// Call Java Dynamic Instruction -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallDynamicJavaDirect(method meth) %{ - match(CallDynamicJava); - effect(USE meth); - - ins_cost(300); - format %{ "MOV EAX,(oop)-1\n\t" - "CALL,dynamic" %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - Java_Dynamic_Call( meth ), - call_epilog, - post_call_FPU ); - ins_pipe( pipe_slow ); - ins_alignment(4); -%} - -// Call Runtime Instruction -instruct CallRuntimeDirect(method meth) %{ - match(CallRuntime ); - effect(USE meth); - - ins_cost(300); - format %{ "CALL,runtime " %} - opcode(0xE8); /* E8 cd */ - // Use FFREEs to clear entries in float stack - ins_encode( pre_call_resets, - FFree_Float_Stack_All, - Java_To_Runtime( meth ), - post_call_FPU ); - ins_pipe( pipe_slow ); -%} - -// Call runtime without safepoint -instruct CallLeafDirect(method meth) %{ - match(CallLeaf); - effect(USE meth); - - ins_cost(300); - format %{ "CALL_LEAF,runtime " %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - FFree_Float_Stack_All, - Java_To_Runtime( meth ), - Verify_FPU_For_Leaf, post_call_FPU ); - ins_pipe( pipe_slow ); -%} - -instruct CallLeafNoFPDirect(method meth) %{ - match(CallLeafNoFP); - effect(USE meth); - - ins_cost(300); - format %{ "CALL_LEAF_NOFP,runtime " %} - opcode(0xE8); /* E8 cd */ - ins_encode(pre_call_resets, Java_To_Runtime(meth)); - ins_pipe( pipe_slow ); -%} - - -// Return Instruction -// Remove the return address & jump to it. -instruct Ret() %{ - match(Return); - format %{ "RET" %} - opcode(0xC3); - ins_encode(OpcP); - ins_pipe( pipe_jmp ); -%} - -// Tail Call; Jump from runtime stub to Java code. -// Also known as an 'interprocedural jump'. -// Target of jump will eventually return to caller. -// TailJump below removes the return address. -// Don't use ebp for 'jump_target' because a MachEpilogNode has already been -// emitted just above the TailCall which has reset ebp to the caller state. -instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ - match(TailCall jump_target method_ptr); - ins_cost(300); - format %{ "JMP $jump_target \t# EBX holds method" %} - opcode(0xFF, 0x4); /* Opcode FF /4 */ - ins_encode( OpcP, RegOpc(jump_target) ); - ins_pipe( pipe_jmp ); -%} - - -// Tail Jump; remove the return address; jump to target. -// TailCall above leaves the return address around. -instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ - match( TailJump jump_target ex_oop ); - ins_cost(300); - format %{ "POP EDX\t# pop return address into dummy\n\t" - "JMP $jump_target " %} - opcode(0xFF, 0x4); /* Opcode FF /4 */ - ins_encode( enc_pop_rdx, - OpcP, RegOpc(jump_target) ); - ins_pipe( pipe_jmp ); -%} - -// Forward exception. -instruct ForwardExceptionjmp() -%{ - match(ForwardException); - - format %{ "JMP forward_exception_stub" %} - ins_encode %{ - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg); - %} - ins_pipe(pipe_jmp); -%} - -// Create exception oop: created by stack-crawling runtime code. -// Created exception is now available to this handler, and is setup -// just prior to jumping to this handler. No code emitted. -instruct CreateException( eAXRegP ex_oop ) -%{ - match(Set ex_oop (CreateEx)); - - size(0); - // use the following format syntax - format %{ "# exception oop is in EAX; no code emitted" %} - ins_encode(); - ins_pipe( empty ); -%} - - -// Rethrow exception: -// The exception oop will come in the first argument position. -// Then JUMP (not call) to the rethrow stub code. -instruct RethrowException() -%{ - match(Rethrow); - - // use the following format syntax - format %{ "JMP rethrow_stub" %} - ins_encode(enc_rethrow); - ins_pipe( pipe_jmp ); -%} - -// inlined locking and unlocking - -instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ - predicate(LockingMode != LM_LIGHTWEIGHT); - match(Set cr (FastLock object box)); - effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); - ins_cost(300); - format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} - ins_encode %{ - __ get_thread($thread$$Register); - __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, noreg, noreg, $thread$$Register, nullptr); - %} - ins_pipe(pipe_slow); -%} - -instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ - predicate(LockingMode != LM_LIGHTWEIGHT); - match(Set cr (FastUnlock object box)); - effect(TEMP tmp, USE_KILL box); - ins_cost(300); - format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} - ins_encode %{ - __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); - %} - ins_pipe(pipe_slow); -%} - -instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ - predicate(LockingMode == LM_LIGHTWEIGHT); - match(Set cr (FastLock object box)); - effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); - ins_cost(300); - format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} - ins_encode %{ - __ get_thread($thread$$Register); - __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); - %} - ins_pipe(pipe_slow); -%} - -instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ - predicate(LockingMode == LM_LIGHTWEIGHT); - match(Set cr (FastUnlock object eax_reg)); - effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); - ins_cost(300); - format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} - ins_encode %{ - __ get_thread($thread$$Register); - __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); - %} - ins_pipe(pipe_slow); -%} - -instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ - predicate(Matcher::vector_length(n) <= 32); - match(Set dst (MaskAll src)); - format %{ "mask_all_evexL_LE32 $dst, $src \t" %} - ins_encode %{ - int mask_len = Matcher::vector_length(this); - __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ - predicate(Matcher::vector_length(n) > 32); - match(Set dst (MaskAll src)); - effect(TEMP ktmp); - format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} - ins_encode %{ - int mask_len = Matcher::vector_length(this); - __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ - predicate(Matcher::vector_length(n) > 32); - match(Set dst (MaskAll src)); - effect(TEMP ktmp); - format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} - ins_encode %{ - int mask_len = Matcher::vector_length(this); - __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); - %} - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Safepoint Instruction -instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ - match(SafePoint poll); - effect(KILL cr, USE poll); - - format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} - ins_cost(125); - // EBP would need size(3) - size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ - ins_encode %{ - __ set_inst_mark(); - __ relocate(relocInfo::poll_type); - __ clear_inst_mark(); - address pre_pc = __ pc(); - __ testl(rax, Address($poll$$Register, 0)); - address post_pc = __ pc(); - guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); - %} - ins_pipe(ialu_reg_mem); -%} - - -// ============================================================================ -// This name is KNOWN by the ADLC and cannot be changed. -// The ADLC forces a 'TypeRawPtr::BOTTOM' output type -// for this guy. -instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ - match(Set dst (ThreadLocal)); - effect(DEF dst, KILL cr); - - format %{ "MOV $dst, Thread::current()" %} - ins_encode %{ - Register dstReg = as_Register($dst$$reg); - __ get_thread(dstReg); - %} - ins_pipe( ialu_reg_fat ); -%} - - - -//----------PEEPHOLE RULES----------------------------------------------------- -// These must follow all instruction definitions as they use the names -// defined in the instructions definitions. -// -// peepmatch ( root_instr_name [preceding_instruction]* ); -// -// peepconstraint %{ -// (instruction_number.operand_name relational_op instruction_number.operand_name -// [, ...] ); -// // instruction numbers are zero-based using left to right order in peepmatch -// -// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); -// // provide an instruction_number.operand_name for each operand that appears -// // in the replacement instruction's match rule -// -// ---------VM FLAGS--------------------------------------------------------- -// -// All peephole optimizations can be turned off using -XX:-OptoPeephole -// -// Each peephole rule is given an identifying number starting with zero and -// increasing by one in the order seen by the parser. An individual peephole -// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# -// on the command-line. -// -// ---------CURRENT LIMITATIONS---------------------------------------------- -// -// Only match adjacent instructions in same basic block -// Only equality constraints -// Only constraints between operands, not (0.dest_reg == EAX_enc) -// Only one replacement instruction -// -// ---------EXAMPLE---------------------------------------------------------- -// -// // pertinent parts of existing instructions in architecture description -// instruct movI(rRegI dst, rRegI src) %{ -// match(Set dst (CopyI src)); -// %} -// -// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ -// match(Set dst (AddI dst src)); -// effect(KILL cr); -// %} -// -// // Change (inc mov) to lea -// peephole %{ -// // increment preceded by register-register move -// peepmatch ( incI_eReg movI ); -// // require that the destination register of the increment -// // match the destination register of the move -// peepconstraint ( 0.dst == 1.dst ); -// // construct a replacement instruction that sets -// // the destination to ( move's source register + one ) -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// Implementation no longer uses movX instructions since -// machine-independent system no longer uses CopyX nodes. -// -// peephole %{ -// peepmatch ( incI_eReg movI ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// peephole %{ -// peepmatch ( decI_eReg movI ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// peephole %{ -// peepmatch ( addI_eReg_imm movI ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// peephole %{ -// peepmatch ( addP_eReg_imm movP ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); -// %} - -// // Change load of spilled value to only a spill -// instruct storeI(memory mem, rRegI src) %{ -// match(Set mem (StoreI mem src)); -// %} -// -// instruct loadI(rRegI dst, memory mem) %{ -// match(Set dst (LoadI mem)); -// %} -// -peephole %{ - peepmatch ( loadI storeI ); - peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); - peepreplace ( storeI( 1.mem 1.mem 1.src ) ); -%} - -//----------SMARTSPILL RULES--------------------------------------------------- -// These must follow all instruction definitions as they use the names -// defined in the instructions definitions. diff --git a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S b/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S deleted file mode 100644 index 7d8892bcd87..00000000000 --- a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S +++ /dev/null @@ -1,525 +0,0 @@ -# -# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - -#include "defs.S.inc" - - # NOTE WELL! The _Copy functions are called directly - # from server-compiler-generated code via CallLeafNoFP, - # which means that they *must* either not use floating - # point or use it in the same manner as does the server - # compiler. - - .text - -# Set fpu to 53 bit precision. This happens too early to use a stub. - .p2align 4,,15 -DECLARE_FUNC(fixcw): - pushl $0x27f - fldcw 0(%esp) - popl %eax - ret - - .p2align 4,,15 -DECLARE_FUNC(SpinPause): - rep - nop - movl $1, %eax - ret - - # Support for void Copy::arrayof_conjoint_bytes(void* from, - # void* to, - # size_t count) - # - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_bytes): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -1(%esi,%ecx),%eax # from + count - 1 - jbe acb_CopyRight - cmpl %eax,%edi - jbe acb_CopyLeft - # copy from low to high -acb_CopyRight: - cmpl $3,%ecx - jbe 5f -1: movl %ecx,%eax - shrl $2,%ecx - jz 4f - cmpl $32,%ecx - ja 3f - # copy aligned dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f - # copy aligned dwords -3: rep; smovl -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - # copy suffix - xorl %eax,%eax -6: movb (%esi,%eax,1),%dl - movb %dl,(%edi,%eax,1) - addl $1,%eax - subl $1,%ecx - jnz 6b -7: popl %edi - popl %esi - ret -acb_CopyLeft: - std - leal -4(%edi,%ecx),%edi # to + count - 4 - movl %eax,%esi # from + count - 1 - movl %ecx,%eax - subl $3,%esi # from + count - 4 - cmpl $3,%ecx - jbe 5f -1: shrl $2,%ecx - jz 4f - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - jmp 4f - .space 8 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - subl %esi,%edi - addl $3,%esi -6: movb (%esi),%dl - movb %dl,(%edi,%esi,1) - subl $1,%esi - subl $1,%ecx - jnz 6b -7: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jshorts_atomic(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jshorts_atomic): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe cs_CopyRight - cmpl %eax,%edi - jbe cs_CopyLeft - # copy from low to high -cs_CopyRight: - # align source address at dword address boundary - movl %esi,%eax # original from - andl $3,%eax # either 0 or 2 - jz 1f # no prefix - # copy prefix - subl $1,%ecx - jl 5f # zero count - movw (%esi),%dx - movw %dx,(%edi) - addl %eax,%esi # %eax == 2 - addl %eax,%edi -1: movl %ecx,%eax # word count less prefix - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret - # copy from high to low -cs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 -1: sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe acs_CopyRight - cmpl %eax,%edi - jbe acs_CopyLeft -acs_CopyRight: - movl %ecx,%eax # word count - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords - .space 5 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret -acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jints_atomic(void* from, - # void* to, - # size_t count) - # Equivalent to - # arrayof_conjoint_jints - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jints_atomic): -DECLARE_FUNC(_Copy_arrayof_conjoint_jints): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 - jbe ci_CopyRight - cmpl %eax,%edi - jbe ci_CopyLeft -ci_CopyRight: - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - popl %edi - popl %esi - ret - .space 10 -2: subl %esi,%edi - jmp 4f - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi -4: subl $1,%ecx - jge 3b - popl %edi - popl %esi - ret -ci_CopyLeft: - std - leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 - cmpl $32,%ecx - ja 4f # > 32 dwords - subl %eax,%edi # eax == from + count*4 - 4 - jmp 3f - .p2align 4,,15 -2: movl (%eax),%edx - movl %edx,(%edi,%eax,1) - subl $4,%eax -3: subl $1,%ecx - jge 2b - cld - popl %edi - popl %esi - ret -4: movl %eax,%esi # from + count*4 - 4 - rep; smovl - cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jlongs_atomic(jlong* from, - # jlong* to, - # size_t count) - # - # 32-bit - # - # count treated as signed - # - # // if (from > to) { - # while (--count >= 0) { - # *to++ = *from++; - # } - # } else { - # while (--count >= 0) { - # to[count] = from[count]; - # } - # } - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jlongs_atomic): - movl 4+8(%esp),%ecx # count - movl 4+0(%esp),%eax # from - movl 4+4(%esp),%edx # to - cmpl %eax,%edx - jae cla_CopyLeft -cla_CopyRight: - subl %eax,%edx - jmp 2f - .p2align 4,,15 -1: fildll (%eax) - fistpll (%edx,%eax,1) - addl $8,%eax -2: subl $1,%ecx - jge 1b - ret - .p2align 4,,15 -3: fildll (%eax,%ecx,8) - fistpll (%edx,%ecx,8) -cla_CopyLeft: - subl $1,%ecx - jge 3b - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx - pushl %edi - movl 8+ 4(%esp),%esi - movl 8+ 8(%esp),%edi - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax - jbe mmx_acs_CopyRight - cmpl %eax,%edi - jbe mmx_acs_CopyLeft -mmx_acs_CopyRight: - movl %ecx,%eax - sarl %ecx - je 5f - cmpl $33,%ecx - jae 3f -1: subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 5f -3: smovl # align to 8 bytes, we know we are 4 byte aligned to start - subl $1,%ecx -4: .p2align 4,,15 - movq 0(%esi),%mm0 - addl $64,%edi - movq 8(%esi),%mm1 - subl $16,%ecx - movq 16(%esi),%mm2 - movq %mm0,-64(%edi) - movq 24(%esi),%mm0 - movq %mm1,-56(%edi) - movq 32(%esi),%mm1 - movq %mm2,-48(%edi) - movq 40(%esi),%mm2 - movq %mm0,-40(%edi) - movq 48(%esi),%mm0 - movq %mm1,-32(%edi) - movq 56(%esi),%mm1 - movq %mm2,-24(%edi) - movq %mm0,-16(%edi) - addl $64,%esi - movq %mm1,-8(%edi) - cmpl $16,%ecx - jge 4b - emms - testl %ecx,%ecx - ja 1b -5: andl $1,%eax - je 7f -6: movw (%esi),%dx - movw %dx,(%edi) -7: popl %edi - popl %esi - ret -mmx_acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi - movl %eax,%esi - movl %ecx,%eax - subl $2,%esi - sarl %ecx - je 4f - cmpl $32,%ecx - ja 3f - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax - je 6f - addl $2,%esi - addl $2,%edi -5: movw (%esi),%dx - movw %dx,(%edi) -6: cld - popl %edi - popl %esi - ret - - - # Support for int64_t Atomic::cmpxchg(int64_t compare_value, - # volatile int64_t* dest, - # int64_t exchange_value) - # - .p2align 4,,15 -DECLARE_FUNC(_Atomic_cmpxchg_long): - # 8(%esp) : return PC - pushl %ebx # 4(%esp) : old %ebx - pushl %edi # 0(%esp) : old %edi - movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) - movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) - movl 24(%esp), %eax # 24(%esp) : compare_value (low) - movl 28(%esp), %edx # 28(%esp) : compare_value (high) - movl 20(%esp), %edi # 20(%esp) : dest - lock - cmpxchg8b (%edi) - popl %edi - popl %ebx - ret - - - # Support for int64_t Atomic::load and Atomic::store. - # void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst) - .p2align 4,,15 -DECLARE_FUNC(_Atomic_move_long): - movl 4(%esp), %eax # src - fildll (%eax) - movl 8(%esp), %eax # dest - fistpll (%eax) - ret diff --git a/src/hotspot/os_cpu/linux_x86/linux_x86_32.S b/src/hotspot/os_cpu/linux_x86/linux_x86_32.S deleted file mode 100644 index 43a9a38e57f..00000000000 --- a/src/hotspot/os_cpu/linux_x86/linux_x86_32.S +++ /dev/null @@ -1,518 +0,0 @@ -# -# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - -#include "defs.S.inc" - - # NOTE WELL! The _Copy functions are called directly - # from server-compiler-generated code via CallLeafNoFP, - # which means that they *must* either not use floating - # point or use it in the same manner as does the server - # compiler. - - .text - - .p2align 4,,15 -DECLARE_FUNC(SpinPause): - rep - nop - movl $1, %eax - ret - - # Support for void Copy::arrayof_conjoint_bytes(void* from, - # void* to, - # size_t count) - # - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_bytes): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -1(%esi,%ecx),%eax # from + count - 1 - jbe acb_CopyRight - cmpl %eax,%edi - jbe acb_CopyLeft - # copy from low to high -acb_CopyRight: - cmpl $3,%ecx - jbe 5f -1: movl %ecx,%eax - shrl $2,%ecx - jz 4f - cmpl $32,%ecx - ja 3f - # copy aligned dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f - # copy aligned dwords -3: rep; smovl -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - # copy suffix - xorl %eax,%eax -6: movb (%esi,%eax,1),%dl - movb %dl,(%edi,%eax,1) - addl $1,%eax - subl $1,%ecx - jnz 6b -7: popl %edi - popl %esi - ret -acb_CopyLeft: - std - leal -4(%edi,%ecx),%edi # to + count - 4 - movl %eax,%esi # from + count - 1 - movl %ecx,%eax - subl $3,%esi # from + count - 4 - cmpl $3,%ecx - jbe 5f -1: shrl $2,%ecx - jz 4f - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - jmp 4f - .space 8 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - subl %esi,%edi - addl $3,%esi -6: movb (%esi),%dl - movb %dl,(%edi,%esi,1) - subl $1,%esi - subl $1,%ecx - jnz 6b -7: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jshorts_atomic(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jshorts_atomic): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe cs_CopyRight - cmpl %eax,%edi - jbe cs_CopyLeft - # copy from low to high -cs_CopyRight: - # align source address at dword address boundary - movl %esi,%eax # original from - andl $3,%eax # either 0 or 2 - jz 1f # no prefix - # copy prefix - subl $1,%ecx - jl 5f # zero count - movw (%esi),%dx - movw %dx,(%edi) - addl %eax,%esi # %eax == 2 - addl %eax,%edi -1: movl %ecx,%eax # word count less prefix - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret - # copy from high to low -cs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 -1: sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe acs_CopyRight - cmpl %eax,%edi - jbe acs_CopyLeft -acs_CopyRight: - movl %ecx,%eax # word count - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords - .space 5 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret -acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jints_atomic(void* from, - # void* to, - # size_t count) - # Equivalent to - # arrayof_conjoint_jints - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jints_atomic): -DECLARE_FUNC(_Copy_arrayof_conjoint_jints): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 - jbe ci_CopyRight - cmpl %eax,%edi - jbe ci_CopyLeft -ci_CopyRight: - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - popl %edi - popl %esi - ret - .space 10 -2: subl %esi,%edi - jmp 4f - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi -4: subl $1,%ecx - jge 3b - popl %edi - popl %esi - ret -ci_CopyLeft: - std - leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 - cmpl $32,%ecx - ja 4f # > 32 dwords - subl %eax,%edi # eax == from + count*4 - 4 - jmp 3f - .p2align 4,,15 -2: movl (%eax),%edx - movl %edx,(%edi,%eax,1) - subl $4,%eax -3: subl $1,%ecx - jge 2b - cld - popl %edi - popl %esi - ret -4: movl %eax,%esi # from + count*4 - 4 - rep; smovl - cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jlongs_atomic(jlong* from, - # jlong* to, - # size_t count) - # - # 32-bit - # - # count treated as signed - /* - # - # if (from > to) { - # while (--count >= 0) { - # *to++ = *from++; - # } - # } else { - # while (--count >= 0) { - # to[count] = from[count]; - # } - # } - */ - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jlongs_atomic): - movl 4+8(%esp),%ecx # count - movl 4+0(%esp),%eax # from - movl 4+4(%esp),%edx # to - cmpl %eax,%edx - jae cla_CopyLeft -cla_CopyRight: - subl %eax,%edx - jmp 2f - .p2align 4,,15 -1: fildll (%eax) - fistpll (%edx,%eax,1) - addl $8,%eax -2: subl $1,%ecx - jge 1b - ret - .p2align 4,,15 -3: fildll (%eax,%ecx,8) - fistpll (%edx,%ecx,8) -cla_CopyLeft: - subl $1,%ecx - jge 3b - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx - pushl %edi - movl 8+ 4(%esp),%esi - movl 8+ 8(%esp),%edi - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax - jbe mmx_acs_CopyRight - cmpl %eax,%edi - jbe mmx_acs_CopyLeft -mmx_acs_CopyRight: - movl %ecx,%eax - sarl %ecx - je 5f - cmpl $33,%ecx - jae 3f -1: subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 5f -3: smovl # align to 8 bytes, we know we are 4 byte aligned to start - subl $1,%ecx -4: .p2align 4,,15 - movq 0(%esi),%mm0 - addl $64,%edi - movq 8(%esi),%mm1 - subl $16,%ecx - movq 16(%esi),%mm2 - movq %mm0,-64(%edi) - movq 24(%esi),%mm0 - movq %mm1,-56(%edi) - movq 32(%esi),%mm1 - movq %mm2,-48(%edi) - movq 40(%esi),%mm2 - movq %mm0,-40(%edi) - movq 48(%esi),%mm0 - movq %mm1,-32(%edi) - movq 56(%esi),%mm1 - movq %mm2,-24(%edi) - movq %mm0,-16(%edi) - addl $64,%esi - movq %mm1,-8(%edi) - cmpl $16,%ecx - jge 4b - emms - testl %ecx,%ecx - ja 1b -5: andl $1,%eax - je 7f -6: movw (%esi),%dx - movw %dx,(%edi) -7: popl %edi - popl %esi - ret -mmx_acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi - movl %eax,%esi - movl %ecx,%eax - subl $2,%esi - sarl %ecx - je 4f - cmpl $32,%ecx - ja 3f - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax - je 6f - addl $2,%esi - addl $2,%edi -5: movw (%esi),%dx - movw %dx,(%edi) -6: cld - popl %edi - popl %esi - ret - - - # Support for jlong Atomic::cmpxchg(volatile jlong* dest, - # jlong compare_value, - # jlong exchange_value) - # - .p2align 4,,15 -DECLARE_FUNC(_Atomic_cmpxchg_long): - # 8(%esp) : return PC - pushl %ebx # 4(%esp) : old %ebx - pushl %edi # 0(%esp) : old %edi - movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) - movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) - movl 24(%esp), %eax # 24(%esp) : compare_value (low) - movl 28(%esp), %edx # 28(%esp) : compare_value (high) - movl 20(%esp), %edi # 20(%esp) : dest - lock cmpxchg8b (%edi) - popl %edi - popl %ebx - ret - - - # Support for jlong Atomic::load and Atomic::store. - # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst) - .p2align 4,,15 -DECLARE_FUNC(_Atomic_move_long): - movl 4(%esp), %eax # src - fildll (%eax) - movl 8(%esp), %eax # dest - fistpll (%eax) - ret diff --git a/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S b/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S deleted file mode 100644 index 73f6cdf38c9..00000000000 --- a/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2022 SAP SE. All rights reserved. -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - -#include "defs.S.inc" - - .text - - # Support for int SafeFetch32(int* address, int defaultval); - # - # 8(%esp) : default value - # 4(%esp) : crash address - # 0(%esp) : return pc -DECLARE_FUNC(SafeFetch32_impl): - movl 4(%esp),%ecx # load address from stack -DECLARE_FUNC(_SafeFetch32_fault): - movl (%ecx), %eax # load target value, may fault - ret -DECLARE_FUNC(_SafeFetch32_continuation): - movl 8(%esp),%eax # load default value from stack - ret