diff --git a/make/autoconf/basic.m4 b/make/autoconf/basic.m4
index 19e6641b94d..6daba35547b 100644
--- a/make/autoconf/basic.m4
+++ b/make/autoconf/basic.m4
@@ -75,10 +75,11 @@ AC_DEFUN_ONCE([BASIC_SETUP_PATHS],
     AC_MSG_NOTICE([Rewriting ORIGINAL_PATH to $REWRITTEN_PATH])
   fi
 
+  if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then
+    AC_MSG_ERROR([32-bit x86 builds are not supported])
+  fi
+
   if test "x$OPENJDK_TARGET_OS" = "xwindows"; then
-    if test "x$OPENJDK_TARGET_CPU_BITS" = "x32"; then
-      AC_MSG_ERROR([32-bit Windows builds are not supported])
-    fi
     BASIC_SETUP_PATHS_WINDOWS
   fi
 
diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
index 937d8c37927..31451d0c37f 100644
--- a/make/autoconf/platform.m4
+++ b/make/autoconf/platform.m4
@@ -666,17 +666,7 @@ AC_DEFUN([PLATFORM_CHECK_DEPRECATION],
 [
   AC_ARG_ENABLE(deprecated-ports, [AS_HELP_STRING([--enable-deprecated-ports@<:@=yes/no@:>@],
       [Suppress the error when configuring for a deprecated port @<:@no@:>@])])
-  # Unfortunately, variants have not been parsed yet, so we have to check the configure option
-  # directly. Allow only the directly specified Zero variant, treat any other mix as containing
-  # something non-Zero.
-  if test "x$OPENJDK_TARGET_CPU" = xx86 && test "x$with_jvm_variants" != xzero; then
-    if test "x$enable_deprecated_ports" = "xyes"; then
-      AC_MSG_WARN([The 32-bit x86 port is deprecated and may be removed in a future release.])
-    else
-      AC_MSG_ERROR(m4_normalize([The 32-bit x86 port is deprecated and may be removed in a future release.
-        Use --enable-deprecated-ports=yes to suppress this error.]))
-    fi
-  fi
+  # There are no deprecated ports. Implement the deprecation warnings here.
 ])
 
 AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_BUILD_OS_VERSION],
diff --git a/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp
deleted file mode 100644
index 3c7d93fc79e..00000000000
--- a/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "prims/downcallLinker.hpp"
-
-RuntimeStub* DowncallLinker::make_downcall_stub(BasicType* signature,
-                                                int num_args,
-                                                BasicType ret_bt,
-                                                const ABIDescriptor& abi,
-                                                const GrowableArray<VMStorage>& input_registers,
-                                                const GrowableArray<VMStorage>& output_registers,
-                                                bool needs_return_buffer,
-                                                int captured_state_mask,
-                                                bool needs_transition) {
-  Unimplemented();
-  return nullptr;
-}
-
-void DowncallLinker::StubGenerator::pd_add_offset_to_oop(VMStorage reg_oop, VMStorage reg_offset,
-                                                         VMStorage tmp1, VMStorage tmp2) const {
-  Unimplemented();
-}
diff --git a/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp b/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp
deleted file mode 100644
index 18aa454e61c..00000000000
--- a/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "code/vmreg.hpp"
-#include "prims/foreignGlobals.hpp"
-#include "utilities/debug.hpp"
-
-class MacroAssembler;
-
-bool ForeignGlobals::is_foreign_linker_supported() {
-  return false;
-}
-
-const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) {
-  Unimplemented();
-  return {};
-}
-
-int RegSpiller::pd_reg_size(VMStorage reg) {
-  Unimplemented();
-  return -1;
-}
-
-void RegSpiller::pd_store_reg(MacroAssembler* masm, int offset, VMStorage reg) {
-  Unimplemented();
-}
-
-void RegSpiller::pd_load_reg(MacroAssembler* masm, int offset, VMStorage reg) {
-  Unimplemented();
-}
-
-void ArgumentShuffle::pd_generate(MacroAssembler* masm, VMStorage tmp, int in_stk_bias, int out_stk_bias) const {
-  Unimplemented();
-}
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad
deleted file mode 100644
index 3cf82bf9fb1..00000000000
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad
+++ /dev/null
@@ -1,71 +0,0 @@
-//
-// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-//
-// This code is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License version 2 only, as
-// published by the Free Software Foundation.
-//
-// This code is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-// version 2 for more details (a copy is included in the LICENSE file that
-// accompanied this code).
-//
-// You should have received a copy of the GNU General Public License version
-// 2 along with this work; if not, write to the Free Software Foundation,
-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-// or visit www.oracle.com if you need additional information or have any
-// questions.
-//
-//
-
-source_hpp %{
-#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
-#include "gc/shenandoah/c2/shenandoahSupport.hpp"
-%}
-
-instruct compareAndSwapP_shenandoah(rRegI res,
-                                    memory mem_ptr,
-                                    eRegP tmp1, eRegP tmp2,
-                                    eAXRegP oldval, eRegP newval,
-                                    eFlagsReg cr)
-%{
-  match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval)));
-  match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval)));
-  effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval);
-
-  format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
-
-  ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
-                                                   $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
-                                                   false, // swap
-                                                   $tmp1$$Register, $tmp2$$Register
-                                                   );
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeP_shenandoah(memory mem_ptr,
-                                        eAXRegP oldval, eRegP newval,
-                                        eRegP tmp1, eRegP tmp2,
-                                        eFlagsReg cr)
-%{
-  match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval)));
-  effect(KILL cr, TEMP tmp1, TEMP tmp2);
-  ins_cost(1000);
-
-  format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
-
-  ins_encode %{
-    ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm,
-                                                   noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
-                                                   true,  // exchange
-                                                   $tmp1$$Register, $tmp2$$Register
-                                                   );
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
diff --git a/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp b/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp
deleted file mode 100644
index 14f11596924..00000000000
--- a/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (c) 1998, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "interpreter/interp_masm.hpp"
-#include "interpreter/interpreter.hpp"
-#include "interpreter/interpreterRuntime.hpp"
-#include "memory/allocation.inline.hpp"
-#include "oops/method.hpp"
-#include "oops/oop.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/icache.hpp"
-#include "runtime/interfaceSupport.inline.hpp"
-#include "runtime/signature.hpp"
-
-
-#define __ _masm->
-
-
-// Implementation of SignatureHandlerGenerator
-InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer) :
-    NativeSignatureIterator(method) {
-  _masm = new MacroAssembler(buffer);
-}
-
-void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
-  move(offset(), jni_offset() + 1);
-}
-
-void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
-  move(offset(), jni_offset() + 1);
-}
-
-void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
-   move(offset(), jni_offset() + 2);
-   move(offset() + 1, jni_offset() + 1);
-}
-
-void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
-  box (offset(), jni_offset() + 1);
-}
-
-void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
-  __ movl(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset)));
-  __ movl(Address(to(), to_offset * wordSize), temp());
-}
-
-
-void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
-  __ lea(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset)));
-  __ cmpptr(Address(from(), Interpreter::local_offset_in_bytes(from_offset)), NULL_WORD); // do not use temp() to avoid AGI
-  Label L;
-  __ jcc(Assembler::notZero, L);
-  __ movptr(temp(), NULL_WORD);
-  __ bind(L);
-  __ movptr(Address(to(), to_offset * wordSize), temp());
-}
-
-
-void InterpreterRuntime::SignatureHandlerGenerator::generate( uint64_t fingerprint) {
-  // generate code to handle arguments
-  iterate(fingerprint);
-  // return result handler
-  __ lea(rax,
-         ExternalAddress((address)Interpreter::result_handler(method()->result_type())));
-  // return
-  __ ret(0);
-  __ flush();
-}
-
-
-Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return rdi; }
-Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return rsp; }
-Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return rcx; }
-
-
-// Implementation of SignatureHandlerLibrary
-
-void SignatureHandlerLibrary::pd_set_handler(address handler) {}
-
-class SlowSignatureHandler: public NativeSignatureIterator {
- private:
-  address   _from;
-  intptr_t* _to;
-
-  virtual void pass_int() {
-    *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
-    _from -= Interpreter::stackElementSize;
-  }
-
-  virtual void pass_float() {
-    *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
-    _from -= Interpreter::stackElementSize;
-  }
-
-  virtual void pass_long() {
-    _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
-    _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0));
-    _to += 2;
-    _from -= 2*Interpreter::stackElementSize;
-  }
-
-  virtual void pass_object() {
-    // pass address of from
-    intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
-    *_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr;
-    _from -= Interpreter::stackElementSize;
-   }
-
- public:
-  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) :
-    NativeSignatureIterator(method) {
-    _from = from;
-    _to   = to + (is_static() ? 2 : 1);
-  }
-};
-
-JRT_ENTRY(address, InterpreterRuntime::slow_signature_handler(JavaThread* current, Method* method, intptr_t* from, intptr_t* to))
-  methodHandle m(current, (Method*)method);
-  assert(m->is_native(), "sanity check");
-  // handle arguments
-  SlowSignatureHandler(m, (address)from, to + 1).iterate((uint64_t)CONST64(-1));
-  // return result handler
-  return Interpreter::result_handler(m->result_type());
-JRT_END
diff --git a/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp b/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp
deleted file mode 100644
index eee82a5c682..00000000000
--- a/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * Copyright (c) 2004, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "asm/macroAssembler.hpp"
-#include "memory/resourceArea.hpp"
-#include "prims/jniFastGetField.hpp"
-#include "prims/jvm_misc.hpp"
-#include "prims/jvmtiExport.hpp"
-#include "runtime/os.inline.hpp"
-#include "runtime/safepoint.hpp"
-#include "runtime/stubRoutines.hpp"
-
-#define __ masm->
-
-#define BUFFER_SIZE 30
-
-// Instead of issuing lfence for LoadLoad barrier, we create data dependency
-// between loads, which is much more efficient than lfence.
-
-address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-  const char *name = nullptr;
-  switch (type) {
-    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
-    case T_BYTE:    name = "jni_fast_GetByteField";    break;
-    case T_CHAR:    name = "jni_fast_GetCharField";    break;
-    case T_SHORT:   name = "jni_fast_GetShortField";   break;
-    case T_INT:     name = "jni_fast_GetIntField";     break;
-    default:        ShouldNotReachHere();
-  }
-  ResourceMark rm;
-  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
-  CodeBuffer cbuf(blob);
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  address fast_entry = __ pc();
-
-  Label slow;
-
-  // stack layout:    offset from rsp (in words):
-  //  return pc        0
-  //  jni env          1
-  //  obj              2
-  //  jfieldID         3
-
-  ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
-  __ mov32 (rcx, counter);
-  __ testb (rcx, 1);
-  __ jcc (Assembler::notZero, slow);
-
-  if (JvmtiExport::can_post_field_access()) {
-    // Check to see if a field access watch has been set before we
-    // take the fast path.
-    __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
-    __ jcc(Assembler::notZero, slow);
-  }
-
-  __ mov(rax, rcx);
-  __ andptr(rax, 1);                         // rax, must end up 0
-  __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize));
-                                            // obj, notice rax, is 0.
-                                            // rdx is data dependent on rcx.
-  __ movptr(rax, Address(rsp, 3*wordSize));  // jfieldID
-
-  __ clear_jobject_tag(rdx);
-
-  __ movptr(rdx, Address(rdx, 0));           // *obj
-  __ shrptr (rax, 2);                         // offset
-
-  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
-  speculative_load_pclist[count] = __ pc();
-  switch (type) {
-    case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break;
-    case T_BYTE:    __ movsbl (rax, Address(rdx, rax, Address::times_1)); break;
-    case T_CHAR:    __ movzwl (rax, Address(rdx, rax, Address::times_1)); break;
-    case T_SHORT:   __ movswl (rax, Address(rdx, rax, Address::times_1)); break;
-    case T_INT:     __ movl   (rax, Address(rdx, rax, Address::times_1)); break;
-    default:        ShouldNotReachHere();
-  }
-
-  Address ca1;
-  __ lea(rdx, counter);
-  __ xorptr(rdx, rax);
-  __ xorptr(rdx, rax);
-  __ cmp32(rcx, Address(rdx, 0));
-  // ca1 is the same as ca because
-  // rax, ^ counter_addr ^ rax, = address
-  // ca1 is data dependent on rax,.
-  __ jcc (Assembler::notEqual, slow);
-
-  __ ret (0);
-
-  slowcase_entry_pclist[count++] = __ pc();
-  __ bind (slow);
-  address slow_case_addr = nullptr;
-  switch (type) {
-    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
-    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
-    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
-    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
-    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
-    default:        ShouldNotReachHere();
-  }
-  // tail call
-  __ jump (RuntimeAddress(slow_case_addr));
-
-  __ flush ();
-
-  return fast_entry;
-}
-
-address JNI_FastGetField::generate_fast_get_boolean_field() {
-  return generate_fast_get_int_field0(T_BOOLEAN);
-}
-
-address JNI_FastGetField::generate_fast_get_byte_field() {
-  return generate_fast_get_int_field0(T_BYTE);
-}
-
-address JNI_FastGetField::generate_fast_get_char_field() {
-  return generate_fast_get_int_field0(T_CHAR);
-}
-
-address JNI_FastGetField::generate_fast_get_short_field() {
-  return generate_fast_get_int_field0(T_SHORT);
-}
-
-address JNI_FastGetField::generate_fast_get_int_field() {
-  return generate_fast_get_int_field0(T_INT);
-}
-
-address JNI_FastGetField::generate_fast_get_long_field() {
-  const char *name = "jni_fast_GetLongField";
-  ResourceMark rm;
-  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
-  CodeBuffer cbuf(blob);
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  address fast_entry = __ pc();
-
-  Label slow;
-
-  // stack layout:    offset from rsp (in words):
-  //  old rsi          0
-  //  return pc        1
-  //  jni env          2
-  //  obj              3
-  //  jfieldID         4
-
-  ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
-
-  __ push  (rsi);
-  __ mov32 (rcx, counter);
-  __ testb (rcx, 1);
-  __ jcc (Assembler::notZero, slow);
-
-  if (JvmtiExport::can_post_field_access()) {
-    // Check to see if a field access watch has been set before we
-    // take the fast path.
-    __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
-    __ jcc(Assembler::notZero, slow);
-  }
-
-  __ mov(rax, rcx);
-  __ andptr(rax, 1);                         // rax, must end up 0
-  __ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize));
-                                            // obj, notice rax, is 0.
-                                            // rdx is data dependent on rcx.
-  __ movptr(rsi, Address(rsp, 4*wordSize));  // jfieldID
-
-  __ clear_jobject_tag(rdx);
-
-  __ movptr(rdx, Address(rdx, 0));           // *obj
-  __ shrptr(rsi, 2);                         // offset
-
-  assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small");
-  speculative_load_pclist[count++] = __ pc();
-  __ movptr(rax, Address(rdx, rsi, Address::times_1));
-  speculative_load_pclist[count] = __ pc();
-  __ movl(rdx, Address(rdx, rsi, Address::times_1, 4));
-
-  __ lea(rsi, counter);
-  __ xorptr(rsi, rdx);
-  __ xorptr(rsi, rax);
-  __ xorptr(rsi, rdx);
-  __ xorptr(rsi, rax);
-  __ cmp32(rcx, Address(rsi, 0));
-  // ca1 is the same as ca because
-  // rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address
-  // ca1 is data dependent on both rax, and rdx.
-  __ jcc (Assembler::notEqual, slow);
-
-  __ pop (rsi);
-
-  __ ret (0);
-
-  slowcase_entry_pclist[count-1] = __ pc();
-  slowcase_entry_pclist[count++] = __ pc();
-  __ bind (slow);
-  __ pop  (rsi);
-  address slow_case_addr = jni_GetLongField_addr();;
-  // tail call
-  __ jump (RuntimeAddress(slow_case_addr));
-
-  __ flush ();
-
-  return fast_entry;
-}
-
-address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
-  const char *name = nullptr;
-  switch (type) {
-    case T_FLOAT:  name = "jni_fast_GetFloatField";  break;
-    case T_DOUBLE: name = "jni_fast_GetDoubleField"; break;
-    default:       ShouldNotReachHere();
-  }
-  ResourceMark rm;
-  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize);
-  CodeBuffer cbuf(blob);
-  MacroAssembler* masm = new MacroAssembler(&cbuf);
-  address fast_entry = __ pc();
-
-  Label slow_with_pop, slow;
-
-  // stack layout:    offset from rsp (in words):
-  //  return pc        0
-  //  jni env          1
-  //  obj              2
-  //  jfieldID         3
-
-  ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
-
-  __ mov32 (rcx, counter);
-  __ testb (rcx, 1);
-  __ jcc (Assembler::notZero, slow);
-
-  if (JvmtiExport::can_post_field_access()) {
-    // Check to see if a field access watch has been set before we
-    // take the fast path.
-    __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0);
-    __ jcc(Assembler::notZero, slow);
-  }
-
-  __ mov(rax, rcx);
-  __ andptr(rax, 1);                         // rax, must end up 0
-  __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize));
-                                            // obj, notice rax, is 0.
-                                            // rdx is data dependent on rcx.
-  __ movptr(rax, Address(rsp, 3*wordSize));  // jfieldID
-
-  __ clear_jobject_tag(rdx);
-
-  __ movptr(rdx, Address(rdx, 0));           // *obj
-  __ shrptr(rax, 2);                         // offset
-
-  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
-  speculative_load_pclist[count] = __ pc();
-  switch (type) {
-    case T_FLOAT:  __ fld_s (Address(rdx, rax, Address::times_1)); break;
-    case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break;
-    default:       ShouldNotReachHere();
-  }
-
-  Address ca1;
-  __ fst_s (Address(rsp, -4));
-  __ lea(rdx, counter);
-  __ movl (rax, Address(rsp, -4));
-  // garbage hi-order bits on 64bit are harmless.
-  __ xorptr(rdx, rax);
-  __ xorptr(rdx, rax);
-  __ cmp32(rcx, Address(rdx, 0));
-  // rax, ^ counter_addr ^ rax, = address
-  // ca1 is data dependent on the field
-  // access.
-  __ jcc (Assembler::notEqual, slow_with_pop);
-
-  __ ret (0);
-
-  __ bind (slow_with_pop);
-  // invalid load. pop FPU stack.
-  __ fstp_d (0);
-
-  slowcase_entry_pclist[count++] = __ pc();
-  __ bind (slow);
-  address slow_case_addr = nullptr;
-  switch (type) {
-    case T_FLOAT:  slow_case_addr = jni_GetFloatField_addr();  break;
-    case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break;
-    default:       ShouldNotReachHere();
-  }
-  // tail call
-  __ jump (RuntimeAddress(slow_case_addr));
-
-  __ flush ();
-
-  return fast_entry;
-}
-
-address JNI_FastGetField::generate_fast_get_float_field() {
-  return generate_fast_get_float_field0(T_FLOAT);
-}
-
-address JNI_FastGetField::generate_fast_get_double_field() {
-  return generate_fast_get_float_field0(T_DOUBLE);
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp
deleted file mode 100644
index 6fdda4c2f71..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "macroAssembler_x86.hpp"
-
-ATTRIBUTE_ALIGNED(16) static const juint _ONES[] = {
-    0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xbff00000UL
-};
-address MacroAssembler::ONES = (address)_ONES;
-
-ATTRIBUTE_ALIGNED(16) static const juint _PI4_INV[] = {
-    0x6dc9c883UL, 0x3ff45f30UL
-};
-address MacroAssembler::PI4_INV = (address)_PI4_INV;
-
-ATTRIBUTE_ALIGNED(16) static const juint _PI4X3[] = {
-    0x54443000UL, 0xbfe921fbUL, 0x3b39a000UL, 0x3d373dcbUL, 0xe0e68948UL,
-    0xba845c06UL
-};
-address MacroAssembler::PI4X3 = (address)_PI4X3;
-
-ATTRIBUTE_ALIGNED(16) static const juint _PI4X4[] = {
-    0x54400000UL, 0xbfe921fbUL, 0x1a600000UL, 0xbdc0b461UL, 0x2e000000UL,
-    0xbb93198aUL, 0x252049c1UL, 0xb96b839aUL
-};
-address MacroAssembler::PI4X4 = (address)_PI4X4;
-
-ATTRIBUTE_ALIGNED(16) static const juint _L_2IL0FLOATPACKET_0[] = {
-    0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL
-};
-address MacroAssembler::L_2IL0FLOATPACKET_0 = (address)_L_2IL0FLOATPACKET_0;
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp
deleted file mode 100644
index dce16756a66..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
-* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
-* Intel Math Library (LIBM) Source Code
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*
-*/
-
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/******************************************************************************/
-//                     ALGORITHM DESCRIPTION - COS()
-//                     ---------------------
-//
-//     1. RANGE REDUCTION
-//
-//     We perform an initial range reduction from X to r with
-//
-//          X =~= N * pi/32 + r
-//
-//     so that |r| <= pi/64 + epsilon. We restrict inputs to those
-//     where |N| <= 932560. Beyond this, the range reduction is
-//     insufficiently accurate. For extremely small inputs,
-//     denormalization can occur internally, impacting performance.
-//     This means that the main path is actually only taken for
-//     2^-252 <= |X| < 90112.
-//
-//     To avoid branches, we perform the range reduction to full
-//     accuracy each time.
-//
-//          X - N * (P_1 + P_2 + P_3)
-//
-//     where P_1 and P_2 are 32-bit numbers (so multiplication by N
-//     is exact) and P_3 is a 53-bit number. Together, these
-//     approximate pi well enough for all cases in the restricted
-//     range.
-//
-//     The main reduction sequence is:
-//
-//             y = 32/pi * x
-//             N = integer(y)
-//     (computed by adding and subtracting off SHIFTER)
-//
-//             m_1 = N * P_1
-//             m_2 = N * P_2
-//             r_1 = x - m_1
-//             r = r_1 - m_2
-//     (this r can be used for most of the calculation)
-//
-//             c_1 = r_1 - r
-//             m_3 = N * P_3
-//             c_2 = c_1 - m_2
-//             c = c_2 - m_3
-//
-//     2. MAIN ALGORITHM
-//
-//     The algorithm uses a table lookup based on B = M * pi / 32
-//     where M = N mod 64. The stored values are:
-//       sigma             closest power of 2 to cos(B)
-//       C_hl              53-bit cos(B) - sigma
-//       S_hi + S_lo       2 * 53-bit sin(B)
-//
-//     The computation is organized as follows:
-//
-//          sin(B + r + c) = [sin(B) + sigma * r] +
-//                           r * (cos(B) - sigma) +
-//                           sin(B) * [cos(r + c) - 1] +
-//                           cos(B) * [sin(r + c) - r]
-//
-//     which is approximately:
-//
-//          [S_hi + sigma * r] +
-//          C_hl * r +
-//          S_lo + S_hi * [(cos(r) - 1) - r * c] +
-//          (C_hl + sigma) * [(sin(r) - r) + c]
-//
-//     and this is what is actually computed. We separate this sum
-//     into four parts:
-//
-//          hi + med + pols + corr
-//
-//     where
-//
-//          hi       = S_hi + sigma r
-//          med      = C_hl * r
-//          pols     = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
-//          corr     = S_lo + c * ((C_hl + sigma) - S_hi * r)
-//
-//     3. POLYNOMIAL
-//
-//     The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
-//     (sin(r) - r) can be rearranged freely, since it is quite
-//     small, so we exploit parallelism to the fullest.
-//
-//          psc4       =   SC_4 * r_1
-//          msc4       =   psc4 * r
-//          r2         =   r * r
-//          msc2       =   SC_2 * r2
-//          r4         =   r2 * r2
-//          psc3       =   SC_3 + msc4
-//          psc1       =   SC_1 + msc2
-//          msc3       =   r4 * psc3
-//          sincospols =   psc1 + msc3
-//          pols       =   sincospols *
-//                         <S_hi * r^2 | (C_hl + sigma) * r^3>
-//
-//     4. CORRECTION TERM
-//
-//     This is where the "c" component of the range reduction is
-//     taken into account; recall that just "r" is used for most of
-//     the calculation.
-//
-//          -c   = m_3 - c_2
-//          -d   = S_hi * r - (C_hl + sigma)
-//          corr = -c * -d + S_lo
-//
-//     5. COMPENSATED SUMMATIONS
-//
-//     The two successive compensated summations add up the high
-//     and medium parts, leaving just the low parts to add up at
-//     the end.
-//
-//          rs        =  sigma * r
-//          res_int   =  S_hi + rs
-//          k_0       =  S_hi - res_int
-//          k_2       =  k_0 + rs
-//          med       =  C_hl * r
-//          res_hi    =  res_int + med
-//          k_1       =  res_int - res_hi
-//          k_3       =  k_1 + med
-//
-//     6. FINAL SUMMATION
-//
-//     We now add up all the small parts:
-//
-//          res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
-//
-//     Now the overall result is just:
-//
-//          res_hi + res_lo
-//
-//     7. SMALL ARGUMENTS
-//
-//     Inputs with |X| < 2^-252 are treated specially as
-//     1 - |x|.
-//
-// Special cases:
-//  cos(NaN) = quiet NaN, and raise invalid exception
-//  cos(INF) = NaN and raise invalid exception
-//  cos(0) = 1
-//
-/******************************************************************************/
-
-// The 32 bit code is at most SSE2 compliant
-
-ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_cos[] =
-{
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
-    0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
-    0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
-    0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
-    0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
-    0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
-    0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
-    0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
-    0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
-    0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
-    0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
-    0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
-    0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
-    0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
-    0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
-    0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
-    0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
-    0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
-    0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
-    0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
-    0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
-    0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
-    0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
-    0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
-    0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
-    0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
-    0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
-    0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
-    0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
-    0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
-    0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
-    0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
-    0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
-    0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
-    0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
-    0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
-    0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
-    0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
-    0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
-    0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
-    0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
-    0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
-    0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
-    0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
-    0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
-    0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
-    0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
-    0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
-    0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
-    0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
-    0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
-    0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
-    0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
-    0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
-    0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
-    0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
-    0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
-    0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
-    0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
-    0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
-    0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
-    0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
-    0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
-    0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
-    0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
-    0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
-    0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
-    0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
-    0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
-    0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
-    0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
-    0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
-    0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
-    0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
-    0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
-    0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
-    0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
-    0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
-    0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
-    0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
-    0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
-    0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
-    0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
-    0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
-    0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
-    0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
-    0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
-    0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
-    0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
-    0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
-    0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
-    0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
-    0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
-    0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
-    0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
-    0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
-    0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
-    0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
-    0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
-    0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
-    0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
-    0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
-    0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
-};
-//registers,
-// input: (rbp + 8)
-// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
-//          eax, ecx, edx, ebx (tmp)
-
-// Code generated by Intel C compiler for LIBM library
-
-void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
-                              XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                              Register eax, Register ecx, Register edx, Register tmp) {
-  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
-  Label start;
-
-  assert_different_registers(tmp, eax, ecx, edx);
-
-  address static_const_table_cos = (address)_static_const_table_cos;
-
-  bind(start);
-  subl(rsp, 120);
-  movl(Address(rsp, 56), tmp);
-  lea(tmp, ExternalAddress(static_const_table_cos));
-  movsd(xmm0, Address(rsp, 128));
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32767);
-  subl(eax, 12336);
-  cmpl(eax, 4293);
-  jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
-  movsd(xmm1, Address(tmp, 2160));
-  mulsd(xmm1, xmm0);
-  movdqu(xmm5, Address(tmp, 2240));
-  movsd(xmm4, Address(tmp, 2224));
-  pand(xmm4, xmm0);
-  por(xmm5, xmm4);
-  movsd(xmm3, Address(tmp, 2128));
-  movdqu(xmm2, Address(tmp, 2112));
-  addpd(xmm1, xmm5);
-  cvttsd2sil(edx, xmm1);
-  cvtsi2sdl(xmm1, edx);
-  mulsd(xmm3, xmm1);
-  unpcklpd(xmm1, xmm1);
-  addl(edx, 1865232);
-  movdqu(xmm4, xmm0);
-  andl(edx, 63);
-  movdqu(xmm5, Address(tmp, 2096));
-  lea(eax, Address(tmp, 0));
-  shll(edx, 5);
-  addl(eax, edx);
-  mulpd(xmm2, xmm1);
-  subsd(xmm0, xmm3);
-  mulsd(xmm1, Address(tmp, 2144));
-  subsd(xmm4, xmm3);
-  movsd(xmm7, Address(eax, 8));
-  unpcklpd(xmm0, xmm0);
-  movapd(xmm3, xmm4);
-  subsd(xmm4, xmm2);
-  mulpd(xmm5, xmm0);
-  subpd(xmm0, xmm2);
-  movdqu(xmm6, Address(tmp, 2064));
-  mulsd(xmm7, xmm4);
-  subsd(xmm3, xmm4);
-  mulpd(xmm5, xmm0);
-  mulpd(xmm0, xmm0);
-  subsd(xmm3, xmm2);
-  movdqu(xmm2, Address(eax, 0));
-  subsd(xmm1, xmm3);
-  movsd(xmm3, Address(eax, 24));
-  addsd(xmm2, xmm3);
-  subsd(xmm7, xmm2);
-  mulsd(xmm2, xmm4);
-  mulpd(xmm6, xmm0);
-  mulsd(xmm3, xmm4);
-  mulpd(xmm2, xmm0);
-  mulpd(xmm0, xmm0);
-  addpd(xmm5, Address(tmp, 2080));
-  mulsd(xmm4, Address(eax, 0));
-  addpd(xmm6, Address(tmp, 2048));
-  mulpd(xmm5, xmm0);
-  movapd(xmm0, xmm3);
-  addsd(xmm3, Address(eax, 8));
-  mulpd(xmm1, xmm7);
-  movapd(xmm7, xmm4);
-  addsd(xmm4, xmm3);
-  addpd(xmm6, xmm5);
-  movsd(xmm5, Address(eax, 8));
-  subsd(xmm5, xmm3);
-  subsd(xmm3, xmm4);
-  addsd(xmm1, Address(eax, 16));
-  mulpd(xmm6, xmm2);
-  addsd(xmm5, xmm0);
-  addsd(xmm3, xmm7);
-  addsd(xmm1, xmm5);
-  addsd(xmm1, xmm3);
-  addsd(xmm1, xmm6);
-  unpckhpd(xmm6, xmm6);
-  addsd(xmm1, xmm6);
-  addsd(xmm4, xmm1);
-  movsd(Address(rsp, 0), xmm4);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_0_0_2);
-  jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32767);
-  pinsrw(xmm0, eax, 3);
-  movsd(xmm1, Address(tmp, 2192));
-  subsd(xmm1, xmm0);
-  movsd(Address(rsp, 0), xmm1);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_2_0_2);
-  movl(eax, Address(rsp, 132));
-  andl(eax, 2146435072);
-  cmpl(eax, 2146435072);
-  jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
-  subl(rsp, 32);
-  movsd(Address(rsp, 0), xmm0);
-  lea(eax, Address(rsp, 40));
-  movl(Address(rsp, 8), eax);
-  movl(eax, 1);
-  movl(Address(rsp, 12), eax);
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
-  addl(rsp, 32);
-  fld_d(Address(rsp, 8));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_3_0_2);
-  fld_d(Address(rsp, 128));
-  fmul_d(Address(tmp, 2208));
-
-  bind(L_2TAG_PACKET_1_0_2);
-  movl(tmp, Address(rsp, 56));
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp
deleted file mode 100644
index 2e6c1a617bb..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
-* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
-* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
-* Intel Math Library (LIBM) Source Code
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*
-*/
-
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/******************************************************************************/
-//                     ALGORITHM DESCRIPTION - EXP()
-//                     ---------------------
-//
-// Description:
-//  Let K = 64 (table size).
-//        x    x/log(2)     n
-//       e  = 2          = 2 * T[j] * (1 + P(y))
-//  where
-//       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
-//       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
-//                  j/K
-//       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
-//
-//       P(y) is a minimax polynomial approximation of exp(x)-1
-//       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
-//
-//  To avoid problems with arithmetic overflow and underflow,
-//            n                        n1  n2
-//  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
-//  where BIAS is a value of exponent bias.
-//
-// Special cases:
-//  exp(NaN) = NaN
-//  exp(+INF) = +INF
-//  exp(-INF) = 0
-//  exp(x) = 1 for subnormals
-//  for finite argument, only exp(0)=1 is exact
-//  For IEEE double
-//    if x >  709.782712893383973096 then exp(x) overflow
-//    if x < -745.133219101941108420 then exp(x) underflow
-//
-/******************************************************************************/
-
-// The 32 bit code is at most SSE2 compliant
-
-ATTRIBUTE_ALIGNED(16) static const juint _static_const_table[] =
-{
-    0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
-    0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
-    0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
-    0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
-    0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
-    0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
-    0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
-    0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
-    0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
-    0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
-    0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
-    0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
-    0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
-    0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
-    0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
-    0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
-    0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
-    0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
-    0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
-    0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
-    0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
-    0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
-    0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
-    0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
-    0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
-    0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
-    0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
-    0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
-    0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
-    0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
-    0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
-    0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
-    0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
-    0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
-    0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
-    0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
-    0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
-    0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
-    0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
-    0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
-    0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
-    0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
-    0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
-    0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
-    0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
-    0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
-    0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
-    0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
-    0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
-    0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
-    0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
-    0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
-    0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
-    0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
-    0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
-    0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
-    0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
-    0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
-    0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
-    0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
-    0x00100000UL
-};
-
-//registers,
-// input: (rbp + 8)
-// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
-//          rax, rdx, rcx, rbx (tmp)
-
-// Code generated by Intel C compiler for LIBM library
-
-void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
-                              XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                              Register eax, Register ecx, Register edx, Register tmp) {
-  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
-  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
-  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
-  Label L_2TAG_PACKET_12_0_2;
-
-  assert_different_registers(tmp, eax, ecx, edx);
-  address static_const_table = (address)_static_const_table;
-
-  subl(rsp, 120);
-  movl(Address(rsp, 64), tmp);
-  lea(tmp, ExternalAddress(static_const_table));
-  movsd(xmm0, Address(rsp, 128));
-  unpcklpd(xmm0, xmm0);
-  movdqu(xmm1, Address(tmp, 64));          // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
-  movdqu(xmm6, Address(tmp, 48));          // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
-  movdqu(xmm2, Address(tmp, 80));          // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
-  movdqu(xmm3, Address(tmp, 96));          // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32767);
-  movl(edx, 16527);
-  subl(edx, eax);
-  subl(eax, 15504);
-  orl(edx, eax);
-  cmpl(edx, INT_MIN);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
-  mulpd(xmm1, xmm0);
-  addpd(xmm1, xmm6);
-  movapd(xmm7, xmm1);
-  subpd(xmm1, xmm6);
-  mulpd(xmm2, xmm1);
-  movdqu(xmm4, Address(tmp, 128));         // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
-  mulpd(xmm3, xmm1);
-  movdqu(xmm5, Address(tmp, 144));         // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
-  subpd(xmm0, xmm2);
-  movdl(eax, xmm7);
-  movl(ecx, eax);
-  andl(ecx, 63);
-  shll(ecx, 4);
-  sarl(eax, 6);
-  movl(edx, eax);
-  movdqu(xmm6, Address(tmp, 16));          // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
-  pand(xmm7, xmm6);
-  movdqu(xmm6, Address(tmp, 32));          // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
-  paddq(xmm7, xmm6);
-  psllq(xmm7, 46);
-  subpd(xmm0, xmm3);
-  movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
-  mulpd(xmm4, xmm0);
-  movapd(xmm6, xmm0);
-  movapd(xmm1, xmm0);
-  mulpd(xmm6, xmm6);
-  mulpd(xmm0, xmm6);
-  addpd(xmm5, xmm4);
-  mulsd(xmm0, xmm6);
-  mulpd(xmm6, Address(tmp, 112));          // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
-  addsd(xmm1, xmm2);
-  unpckhpd(xmm2, xmm2);
-  mulpd(xmm0, xmm5);
-  addsd(xmm1, xmm0);
-  por(xmm2, xmm7);
-  unpckhpd(xmm0, xmm0);
-  addsd(xmm0, xmm1);
-  addsd(xmm0, xmm6);
-  addl(edx, 894);
-  cmpl(edx, 1916);
-  jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
-  mulsd(xmm0, xmm2);
-  addsd(xmm0, xmm2);
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_1_0_2);
-  fnstcw(Address(rsp, 24));
-  movzwl(edx, Address(rsp, 24));
-  orl(edx, 768);
-  movw(Address(rsp, 28), edx);
-  fldcw(Address(rsp, 28));
-  movl(edx, eax);
-  sarl(eax, 1);
-  subl(edx, eax);
-  movdqu(xmm6, Address(tmp, 0));           // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
-  pandn(xmm6, xmm2);
-  addl(eax, 1023);
-  movdl(xmm3, eax);
-  psllq(xmm3, 52);
-  por(xmm6, xmm3);
-  addl(edx, 1023);
-  movdl(xmm4, edx);
-  psllq(xmm4, 52);
-  movsd(Address(rsp, 8), xmm0);
-  fld_d(Address(rsp, 8));
-  movsd(Address(rsp, 16), xmm6);
-  fld_d(Address(rsp, 16));
-  fmula(1);
-  faddp(1);
-  movsd(Address(rsp, 8), xmm4);
-  fld_d(Address(rsp, 8));
-  fmulp(1);
-  fstp_d(Address(rsp, 8));
-  movsd(xmm0, Address(rsp, 8));
-  fldcw(Address(rsp, 24));
-  pextrw(ecx, xmm0, 3);
-  andl(ecx, 32752);
-  cmpl(ecx, 32752);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
-  cmpl(ecx, 0);
-  jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
-  jmp(L_2TAG_PACKET_2_0_2);
-  cmpl(ecx, INT_MIN);
-  jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
-  cmpl(ecx, -1064950997);
-  jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
-  jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
-  movl(edx, Address(rsp, 128));
-  cmpl(edx, -17155601);
-  jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
-  jmp(L_2TAG_PACKET_4_0_2);
-
-  bind(L_2TAG_PACKET_3_0_2);
-  movl(edx, 14);
-  jmp(L_2TAG_PACKET_5_0_2);
-
-  bind(L_2TAG_PACKET_4_0_2);
-  movl(edx, 15);
-
-  bind(L_2TAG_PACKET_5_0_2);
-  movsd(Address(rsp, 0), xmm0);
-  movsd(xmm0, Address(rsp, 128));
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_6_0_2);
-
-  bind(L_2TAG_PACKET_7_0_2);
-  cmpl(eax, 2146435072);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
-  movl(eax, Address(rsp, 132));
-  cmpl(eax, INT_MIN);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
-  movsd(xmm0, Address(tmp, 1208));         // 0xffffffffUL, 0x7fefffffUL
-  mulsd(xmm0, xmm0);
-  movl(edx, 14);
-  jmp(L_2TAG_PACKET_5_0_2);
-
-  bind(L_2TAG_PACKET_9_0_2);
-  movsd(xmm0, Address(tmp, 1216));
-  mulsd(xmm0, xmm0);
-  movl(edx, 15);
-  jmp(L_2TAG_PACKET_5_0_2);
-
-  bind(L_2TAG_PACKET_8_0_2);
-  movl(edx, Address(rsp, 128));
-  cmpl(eax, 2146435072);
-  jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
-  cmpl(edx, 0);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
-  movl(eax, Address(rsp, 132));
-  cmpl(eax, 2146435072);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
-  movsd(xmm0, Address(tmp, 1192));         // 0x00000000UL, 0x7ff00000UL
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_11_0_2);
-  movsd(xmm0, Address(tmp, 1200));         // 0x00000000UL, 0x00000000UL
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_10_0_2);
-  movsd(xmm0, Address(rsp, 128));
-  addsd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_0_0_2);
-  movl(eax, Address(rsp, 132));
-  andl(eax, 2147483647);
-  cmpl(eax, 1083179008);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
-  movsd(xmm0, Address(rsp, 128));
-  addsd(xmm0, Address(tmp, 1184));         // 0x00000000UL, 0x3ff00000UL
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_2_0_2);
-  movsd(Address(rsp, 48), xmm0);
-  fld_d(Address(rsp, 48));
-
-  bind(L_2TAG_PACKET_6_0_2);
-  movl(tmp, Address(rsp, 64));
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp
deleted file mode 100644
index abaabef6741..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
-* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
-* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
-* Intel Math Library (LIBM) Source Code
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*
-*/
-
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/******************************************************************************/
-//                     ALGORITHM DESCRIPTION - LOG()
-//                     ---------------------
-//
-//    x=2^k * mx, mx in [1,2)
-//
-//    Get B~1/mx based on the output of rcpss instruction (B0)
-//    B = int((B0*2^7+0.5))/2^7
-//
-//    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
-//
-//    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
-//             p(r) is a degree 7 polynomial
-//             -log(B) read from data table (high, low parts)
-//             Result is formed from high and low parts
-//
-// Special cases:
-//  log(NaN) = quiet NaN, and raise invalid exception
-//  log(+INF) = that INF
-//  log(0) = -INF with divide-by-zero exception raised
-//  log(1) = +0
-//  log(x) = NaN with invalid exception raised if x < -0, including -INF
-//
-/******************************************************************************/
-
-// The 32 bit code is at most SSE2 compliant
-//
-ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log[] =
-{
-    0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
-    0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
-    0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
-    0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
-    0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
-    0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
-    0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
-    0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
-    0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
-    0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
-    0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
-    0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
-    0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
-    0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
-    0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
-    0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
-    0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
-    0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
-    0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
-    0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
-    0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
-    0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
-    0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
-    0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
-    0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
-    0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
-    0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
-    0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
-    0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
-    0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
-    0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
-    0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
-    0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
-    0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
-    0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
-    0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
-    0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
-    0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
-    0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
-    0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
-    0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
-    0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
-    0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
-    0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
-    0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
-    0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
-    0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
-    0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
-    0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
-    0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
-    0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
-    0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
-    0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
-    0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
-    0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
-    0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
-    0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
-    0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
-    0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
-    0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
-    0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
-    0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
-    0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
-    0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
-    0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
-    0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
-    0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
-    0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
-    0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
-    0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
-    0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
-    0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
-    0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
-    0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
-    0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
-    0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
-    0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
-    0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
-    0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
-    0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
-    0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
-    0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
-    0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
-    0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
-    0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
-    0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
-    0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
-    0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
-    0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
-    0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
-    0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
-    0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
-    0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
-    0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
-    0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
-    0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
-    0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
-    0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
-    0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
-    0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
-    0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
-    0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
-    0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
-    0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
-    0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
-    0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
-    0xffffe000UL
-};
-
-//registers,
-// input: xmm0
-// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
-//          rax, rdx, rcx, rbx (tmp)
-void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
-                              XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                              Register eax, Register ecx, Register edx, Register tmp) {
-  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
-  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
-  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
-  Label L_2TAG_PACKET_10_0_2;
-
-  assert_different_registers(tmp, eax, ecx, edx);
-  address static_const_table = (address)_static_const_table_log;
-
-  subl(rsp, 104);
-  movl(Address(rsp, 40), tmp);
-  lea(tmp, ExternalAddress(static_const_table));
-  xorpd(xmm2, xmm2);
-  movl(eax, 16368);
-  pinsrw(xmm2, eax, 3);
-  xorpd(xmm3, xmm3);
-  movl(edx, 30704);
-  pinsrw(xmm3, edx, 3);
-  movsd(xmm0, Address(rsp, 112));
-  movapd(xmm1, xmm0);
-  movl(ecx, 32768);
-  movdl(xmm4, ecx);
-  movsd(xmm5, Address(tmp, 2128));         // 0x00000000UL, 0xffffe000UL
-  pextrw(eax, xmm0, 3);
-  por(xmm0, xmm2);
-  psllq(xmm0, 5);
-  movl(ecx, 16352);
-  psrlq(xmm0, 34);
-  rcpss(xmm0, xmm0);
-  psllq(xmm1, 12);
-  pshufd(xmm6, xmm5, 228);
-  psrlq(xmm1, 12);
-  subl(eax, 16);
-  cmpl(eax, 32736);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
-
-  bind(L_2TAG_PACKET_1_0_2);
-  paddd(xmm0, xmm4);
-  por(xmm1, xmm3);
-  movdl(edx, xmm0);
-  psllq(xmm0, 29);
-  pand(xmm5, xmm1);
-  pand(xmm0, xmm6);
-  subsd(xmm1, xmm5);
-  mulpd(xmm5, xmm0);
-  andl(eax, 32752);
-  subl(eax, ecx);
-  cvtsi2sdl(xmm7, eax);
-  mulsd(xmm1, xmm0);
-  movsd(xmm6, Address(tmp, 2064));         // 0xfefa3800UL, 0x3fa62e42UL
-  movdqu(xmm3, Address(tmp, 2080));        // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
-  subsd(xmm5, xmm2);
-  andl(edx, 16711680);
-  shrl(edx, 12);
-  movdqu(xmm0, Address(tmp, edx));
-  movdqu(xmm4, Address(tmp, 2096));        // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
-  addsd(xmm1, xmm5);
-  movdqu(xmm2, Address(tmp, 2112));        // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
-  mulsd(xmm6, xmm7);
-  pshufd(xmm5, xmm1, 68);
-  mulsd(xmm7, Address(tmp, 2072));         // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
-  mulsd(xmm3, xmm1);
-  addsd(xmm0, xmm6);
-  mulpd(xmm4, xmm5);
-  mulpd(xmm5, xmm5);
-  pshufd(xmm6, xmm0, 228);
-  addsd(xmm0, xmm1);
-  addpd(xmm4, xmm2);
-  mulpd(xmm3, xmm5);
-  subsd(xmm6, xmm0);
-  mulsd(xmm4, xmm1);
-  pshufd(xmm2, xmm0, 238);
-  addsd(xmm1, xmm6);
-  mulsd(xmm5, xmm5);
-  addsd(xmm7, xmm2);
-  addpd(xmm4, xmm3);
-  addsd(xmm1, xmm7);
-  mulpd(xmm4, xmm5);
-  addsd(xmm1, xmm4);
-  pshufd(xmm5, xmm4, 238);
-  addsd(xmm1, xmm5);
-  addsd(xmm0, xmm1);
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_0_0_2);
-  movsd(xmm0, Address(rsp, 112));
-  movdqu(xmm1, xmm0);
-  addl(eax, 16);
-  cmpl(eax, 32768);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
-  cmpl(eax, 16);
-  jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
-
-  bind(L_2TAG_PACKET_5_0_2);
-  addsd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_6_0_2);
-  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
-  cmpl(edx, 0);
-  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
-  jmp(L_2TAG_PACKET_7_0_2);
-
-  bind(L_2TAG_PACKET_3_0_2);
-  movdl(edx, xmm1);
-  psrlq(xmm1, 32);
-  movdl(ecx, xmm1);
-  addl(ecx, ecx);
-  cmpl(ecx, -2097152);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
-  orl(edx, ecx);
-  cmpl(edx, 0);
-  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
-
-  bind(L_2TAG_PACKET_7_0_2);
-  xorpd(xmm1, xmm1);
-  xorpd(xmm0, xmm0);
-  movl(eax, 32752);
-  pinsrw(xmm1, eax, 3);
-  movl(edx, 3);
-  mulsd(xmm0, xmm1);
-
-  bind(L_2TAG_PACKET_9_0_2);
-  movsd(Address(rsp, 0), xmm0);
-  movsd(xmm0, Address(rsp, 112));
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_10_0_2);
-
-  bind(L_2TAG_PACKET_8_0_2);
-  xorpd(xmm1, xmm1);
-  xorpd(xmm0, xmm0);
-  movl(eax, 49136);
-  pinsrw(xmm0, eax, 3);
-  divsd(xmm0, xmm1);
-  movl(edx, 2);
-  jmp(L_2TAG_PACKET_9_0_2);
-
-  bind(L_2TAG_PACKET_4_0_2);
-  movdl(edx, xmm1);
-  psrlq(xmm1, 32);
-  movdl(ecx, xmm1);
-  orl(edx, ecx);
-  cmpl(edx, 0);
-  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
-  xorpd(xmm1, xmm1);
-  movl(eax, 18416);
-  pinsrw(xmm1, eax, 3);
-  mulsd(xmm0, xmm1);
-  movapd(xmm1, xmm0);
-  pextrw(eax, xmm0, 3);
-  por(xmm0, xmm2);
-  psllq(xmm0, 5);
-  movl(ecx, 18416);
-  psrlq(xmm0, 34);
-  rcpss(xmm0, xmm0);
-  psllq(xmm1, 12);
-  pshufd(xmm6, xmm5, 228);
-  psrlq(xmm1, 12);
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_2_0_2);
-  movsd(Address(rsp, 24), xmm0);
-  fld_d(Address(rsp, 24));
-
-  bind(L_2TAG_PACKET_10_0_2);
-  movl(tmp, Address(rsp, 40));
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp
deleted file mode 100644
index 1fc5f49cf75..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
-* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
-* Intel Math Library (LIBM) Source Code
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*
-*/
-
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/******************************************************************************/
-//                     ALGORITHM DESCRIPTION - LOG10()
-//                     ---------------------
-//
-//    Let x=2^k * mx, mx in [1,2)
-//
-//    Get B~1/mx based on the output of rcpss instruction (B0)
-//    B = int((B0*LH*2^7+0.5))/2^7
-//    LH is a short approximation for log10(e)
-//
-//    Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
-//
-//    Result:  k*log10(2) - log(B) + p(r)
-//             p(r) is a degree 7 polynomial
-//             -log(B) read from data table (high, low parts)
-//             Result is formed from high and low parts
-//
-// Special cases:
-//  log10(0) = -INF with divide-by-zero exception raised
-//  log10(1) = +0
-//  log10(x) = NaN with invalid exception raised if x < -0, including -INF
-//  log10(+INF) = +INF
-//
-/******************************************************************************/
-
-// The 32 bit code is at most SSE2 compliant
-
-ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log10[] =
-{
-    0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
-    0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
-    0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
-    0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
-    0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
-    0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
-    0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
-    0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
-    0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
-    0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
-    0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
-    0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
-    0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
-    0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
-    0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
-    0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
-    0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
-    0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
-    0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
-    0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
-    0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
-    0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
-    0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
-    0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
-    0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
-    0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
-    0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
-    0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
-    0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
-    0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
-    0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
-    0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
-    0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
-    0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
-    0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
-    0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
-    0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
-    0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
-    0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
-    0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
-    0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
-    0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
-    0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
-    0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
-    0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
-    0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
-    0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
-    0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
-    0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
-    0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
-    0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
-    0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
-    0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
-    0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
-    0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
-    0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
-    0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
-    0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
-    0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
-    0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
-    0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
-    0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
-    0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
-    0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
-    0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
-    0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
-    0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
-    0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
-    0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
-    0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
-    0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
-    0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
-    0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
-    0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
-    0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
-    0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
-    0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
-    0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
-    0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
-    0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
-    0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
-    0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
-    0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
-    0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
-    0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
-    0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
-    0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
-    0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
-    0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
-    0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
-    0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
-    0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
-    0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
-    0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
-    0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
-    0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
-    0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
-    0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
-    0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
-    0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
-    0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
-    0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
-    0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
-    0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
-    0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
-    0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
-    0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
-};
-//registers,
-// input: xmm0
-// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
-//          rax, rdx, rcx, rbx (tmp)
-
-void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
-                                XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                                Register eax, Register ecx, Register edx, Register tmp) {
-
-  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
-  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
-  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2;
-
-  assert_different_registers(tmp, eax, ecx, edx);
-
-  address static_const_table_log10 = (address)_static_const_table_log10;
-
-  subl(rsp, 104);
-  movl(Address(rsp, 40), tmp);
-  lea(tmp, ExternalAddress(static_const_table_log10));
-  xorpd(xmm2, xmm2);
-  movl(eax, 16368);
-  pinsrw(xmm2, eax, 3);
-  movl(ecx, 1054736384);
-  movdl(xmm7, ecx);
-  xorpd(xmm3, xmm3);
-  movl(edx, 30704);
-  pinsrw(xmm3, edx, 3);
-  movsd(xmm0, Address(rsp, 112));
-  movdqu(xmm1, xmm0);
-  movl(edx, 32768);
-  movdl(xmm4, edx);
-  movdqu(xmm5, Address(tmp, 2128));    //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
-  pextrw(eax, xmm0, 3);
-  por(xmm0, xmm2);
-  movl(ecx, 16352);
-  psllq(xmm0, 5);
-  movsd(xmm2, Address(tmp, 2144));    //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
-  psrlq(xmm0, 34);
-  rcpss(xmm0, xmm0);
-  psllq(xmm1, 12);
-  pshufd(xmm6, xmm5, 78);
-  psrlq(xmm1, 12);
-  subl(eax, 16);
-  cmpl(eax, 32736);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
-
-  bind(L_2TAG_PACKET_1_0_2);
-  mulss(xmm0, xmm7);
-  por(xmm1, xmm3);
-  andpd(xmm5, xmm1);
-  paddd(xmm0, xmm4);
-  subsd(xmm1, xmm5);
-  movdl(edx, xmm0);
-  psllq(xmm0, 29);
-  andpd(xmm0, xmm6);
-  andl(eax, 32752);
-  subl(eax, ecx);
-  cvtsi2sdl(xmm7, eax);
-  mulpd(xmm5, xmm0);
-  mulsd(xmm1, xmm0);
-  movsd(xmm6, Address(tmp, 2064));    //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
-  movdqu(xmm3, Address(tmp, 2080));    //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
-  subsd(xmm5, xmm2);
-  andl(edx, 16711680);
-  shrl(edx, 12);
-  movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
-  movdqu(xmm4, Address(tmp, 2096));    //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
-  addsd(xmm1, xmm5);
-  movdqu(xmm2, Address(tmp, 2112));    //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
-  mulsd(xmm6, xmm7);
-  pshufd(xmm5, xmm1, 68);
-  mulsd(xmm7, Address(tmp, 2072));    //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
-  mulsd(xmm3, xmm1);
-  addsd(xmm0, xmm6);
-  mulpd(xmm4, xmm5);
-  movsd(xmm6, Address(tmp, 2152));    //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
-  mulpd(xmm5, xmm5);
-  addpd(xmm4, xmm2);
-  mulpd(xmm3, xmm5);
-  pshufd(xmm2, xmm0, 228);
-  addsd(xmm0, xmm1);
-  mulsd(xmm4, xmm1);
-  subsd(xmm2, xmm0);
-  mulsd(xmm6, xmm1);
-  addsd(xmm1, xmm2);
-  pshufd(xmm2, xmm0, 238);
-  mulsd(xmm5, xmm5);
-  addsd(xmm7, xmm2);
-  addsd(xmm1, xmm6);
-  addpd(xmm4, xmm3);
-  addsd(xmm1, xmm7);
-  mulpd(xmm4, xmm5);
-  addsd(xmm1, xmm4);
-  pshufd(xmm5, xmm4, 238);
-  addsd(xmm1, xmm5);
-  addsd(xmm0, xmm1);
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_0_0_2);
-  movsd(xmm0, Address(rsp, 112));    //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
-  movdqu(xmm1, xmm0);
-  addl(eax, 16);
-  cmpl(eax, 32768);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
-  cmpl(eax, 16);
-  jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
-
-  bind(L_2TAG_PACKET_5_0_2);
-  addsd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_2_0_2);
-
-  bind(L_2TAG_PACKET_6_0_2);
-  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
-  cmpl(edx, 0);
-  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
-  jmp(L_2TAG_PACKET_7_0_2);
-
-  bind(L_2TAG_PACKET_3_0_2);
-  movdl(edx, xmm1);
-  psrlq(xmm1, 32);
-  movdl(ecx, xmm1);
-  addl(ecx, ecx);
-  cmpl(ecx, -2097152);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
-  orl(edx, ecx);
-  cmpl(edx, 0);
-  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
-
-  bind(L_2TAG_PACKET_7_0_2);
-  xorpd(xmm1, xmm1);
-  xorpd(xmm0, xmm0);
-  movl(eax, 32752);
-  pinsrw(xmm1, eax, 3);
-  movl(edx, 9);
-  mulsd(xmm0, xmm1);
-
-  bind(L_2TAG_PACKET_9_0_2);
-  movsd(Address(rsp, 0), xmm0);
-  movsd(xmm0, Address(rsp, 112));    //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_10_0_2);
-
-  bind(L_2TAG_PACKET_8_0_2);
-  xorpd(xmm1, xmm1);
-  xorpd(xmm0, xmm0);
-  movl(eax, 49136);
-  pinsrw(xmm0, eax, 3);
-  divsd(xmm0, xmm1);
-  movl(edx, 8);
-  jmp(L_2TAG_PACKET_9_0_2);
-
-  bind(L_2TAG_PACKET_4_0_2);
-  movdl(edx, xmm1);
-  psrlq(xmm1, 32);
-  movdl(ecx, xmm1);
-  orl(edx, ecx);
-  cmpl(edx, 0);
-  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
-  xorpd(xmm1, xmm1);
-  movl(eax, 18416);
-  pinsrw(xmm1, eax, 3);
-  mulsd(xmm0, xmm1);
-  xorpd(xmm2, xmm2);
-  movl(eax, 16368);
-  pinsrw(xmm2, eax, 3);
-  movdqu(xmm1, xmm0);
-  pextrw(eax, xmm0, 3);
-  por(xmm0, xmm2);
-  movl(ecx, 18416);
-  psllq(xmm0, 5);
-  movsd(xmm2, Address(tmp, 2144));    //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
-  psrlq(xmm0, 34);
-  rcpss(xmm0, xmm0);
-  psllq(xmm1, 12);
-  pshufd(xmm6, xmm5, 78);
-  psrlq(xmm1, 12);
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_2_0_2);
-  movsd(Address(rsp, 24), xmm0);
-  fld_d(Address(rsp, 24));
-
-  bind(L_2TAG_PACKET_10_0_2);
-  movl(tmp, Address(rsp, 40));
-
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp
deleted file mode 100644
index 2d8a8ef91ac..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp
+++ /dev/null
@@ -1,1855 +0,0 @@
-/*
-* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
-* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
-* Intel Math Library (LIBM) Source Code
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*
-*/
-
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/******************************************************************************/
-//                     ALGORITHM DESCRIPTION  - POW()
-//                     ---------------------
-//
-//    Let x=2^k * mx, mx in [1,2)
-//
-//    log2(x) calculation:
-//
-//    Get B~1/mx based on the output of rcpps instruction (B0)
-//    B = int((B0*LH*2^9+0.5))/2^9
-//    LH is a short approximation for log2(e)
-//
-//    Reduced argument, scaled by LH:
-//                r=B*mx-LH (computed accurately in high and low parts)
-//
-//    log2(x) result:  k - log2(B) + p(r)
-//             p(r) is a degree 8 polynomial
-//             -log2(B) read from data table (high, low parts)
-//             log2(x) is formed from high and low parts
-//    For |x| in [1-1/32, 1+1/16), a slower but more accurate computation
-//    based om the same table design is performed.
-//
-//   Main path is taken if | floor(log2(|log2(|x|)|) + floor(log2|y|) | < 8,
-//   to filter out all potential OF/UF cases.
-//   exp2(y*log2(x)) is computed using an 8-bit index table and a degree 5
-//   polynomial
-//
-// Special cases:
-//  pow(-0,y) = -INF and raises the divide-by-zero exception for y an odd
-//  integer < 0.
-//  pow(-0,y) = +INF and raises the divide-by-zero exception for y < 0 and
-//  not an odd integer.
-//  pow(-0,y) = -0 for y an odd integer > 0.
-//  pow(-0,y) = +0 for y > 0 and not an odd integer.
-//  pow(-1,-INF) = NaN.
-//  pow(+1,y) = NaN for any y, even a NaN.
-//  pow(x,-0) = 1 for any x, even a NaN.
-//  pow(x,y) = a NaN and raises the invalid exception for finite x < 0 and
-//  finite non-integer y.
-//  pow(x,-INF) = +INF for |x|<1.
-//  pow(x,-INF) = +0 for |x|>1.
-//  pow(x,+INF) = +0 for |x|<1.
-//  pow(x,+INF) = +INF for |x|>1.
-//  pow(-INF,y) = -0 for y an odd integer < 0.
-//  pow(-INF,y) = +0 for y < 0 and not an odd integer.
-//  pow(-INF,y) = -INF for y an odd integer > 0.
-//  pow(-INF,y) = +INF for y > 0 and not an odd integer.
-//  pow(+INF,y) = +0 for y <0.
-//  pow(+INF,y) = +INF for y >0.
-//
-/******************************************************************************/
-
-// The 32 bit code is at most SSE2 compliant
-ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_pow[] =
-{
-    0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL, 0xf8000000UL,
-    0xffffffffUL, 0x00000000UL, 0xfffff800UL, 0x00000000UL, 0x3ff00000UL,
-    0x00000000UL, 0x00000000UL, 0x20000000UL, 0x3feff00aUL, 0x96621f95UL,
-    0x3e5b1856UL, 0xe0000000UL, 0x3fefe019UL, 0xe5916f9eUL, 0xbe325278UL,
-    0x00000000UL, 0x3fefd02fUL, 0x859a1062UL, 0x3e595fb7UL, 0xc0000000UL,
-    0x3fefc049UL, 0xb245f18fUL, 0xbe529c38UL, 0xe0000000UL, 0x3fefb069UL,
-    0xad2880a7UL, 0xbe501230UL, 0x60000000UL, 0x3fefa08fUL, 0xc8e72420UL,
-    0x3e597bd1UL, 0x80000000UL, 0x3fef90baUL, 0xc30c4500UL, 0xbe5d6c75UL,
-    0xe0000000UL, 0x3fef80eaUL, 0x02c63f43UL, 0x3e2e1318UL, 0xc0000000UL,
-    0x3fef7120UL, 0xb3d4ccccUL, 0xbe44c52aUL, 0x00000000UL, 0x3fef615cUL,
-    0xdbd91397UL, 0xbe4e7d6cUL, 0xa0000000UL, 0x3fef519cUL, 0x65c5cd68UL,
-    0xbe522dc8UL, 0xa0000000UL, 0x3fef41e2UL, 0x46d1306cUL, 0xbe5a840eUL,
-    0xe0000000UL, 0x3fef322dUL, 0xd2980e94UL, 0x3e5071afUL, 0xa0000000UL,
-    0x3fef227eUL, 0x773abadeUL, 0xbe5891e5UL, 0xa0000000UL, 0x3fef12d4UL,
-    0xdc6bf46bUL, 0xbe5cccbeUL, 0xe0000000UL, 0x3fef032fUL, 0xbc7247faUL,
-    0xbe2bab83UL, 0x80000000UL, 0x3feef390UL, 0xbcaa1e46UL, 0xbe53bb3bUL,
-    0x60000000UL, 0x3feee3f6UL, 0x5f6c682dUL, 0xbe54c619UL, 0x80000000UL,
-    0x3feed461UL, 0x5141e368UL, 0xbe4b6d86UL, 0xe0000000UL, 0x3feec4d1UL,
-    0xec678f76UL, 0xbe369af6UL, 0x80000000UL, 0x3feeb547UL, 0x41301f55UL,
-    0xbe2d4312UL, 0x60000000UL, 0x3feea5c2UL, 0x676da6bdUL, 0xbe4d8dd0UL,
-    0x60000000UL, 0x3fee9642UL, 0x57a891c4UL, 0x3e51f991UL, 0xa0000000UL,
-    0x3fee86c7UL, 0xe4eb491eUL, 0x3e579bf9UL, 0x20000000UL, 0x3fee7752UL,
-    0xfddc4a2cUL, 0xbe3356e6UL, 0xc0000000UL, 0x3fee67e1UL, 0xd75b5bf1UL,
-    0xbe449531UL, 0x80000000UL, 0x3fee5876UL, 0xbd423b8eUL, 0x3df54fe4UL,
-    0x60000000UL, 0x3fee4910UL, 0x330e51b9UL, 0x3e54289cUL, 0x80000000UL,
-    0x3fee39afUL, 0x8651a95fUL, 0xbe55aad6UL, 0xa0000000UL, 0x3fee2a53UL,
-    0x5e98c708UL, 0xbe2fc4a9UL, 0xe0000000UL, 0x3fee1afcUL, 0x0989328dUL,
-    0x3e23958cUL, 0x40000000UL, 0x3fee0babUL, 0xee642abdUL, 0xbe425dd8UL,
-    0xa0000000UL, 0x3fedfc5eUL, 0xc394d236UL, 0x3e526362UL, 0x20000000UL,
-    0x3feded17UL, 0xe104aa8eUL, 0x3e4ce247UL, 0xc0000000UL, 0x3fedddd4UL,
-    0x265a9be4UL, 0xbe5bb77aUL, 0x40000000UL, 0x3fedce97UL, 0x0ecac52fUL,
-    0x3e4a7cb1UL, 0xe0000000UL, 0x3fedbf5eUL, 0x124cb3b8UL, 0x3e257024UL,
-    0x80000000UL, 0x3fedb02bUL, 0xe6d4febeUL, 0xbe2033eeUL, 0x20000000UL,
-    0x3feda0fdUL, 0x39cca00eUL, 0xbe3ddabcUL, 0xc0000000UL, 0x3fed91d3UL,
-    0xef8a552aUL, 0xbe543390UL, 0x40000000UL, 0x3fed82afUL, 0xb8e85204UL,
-    0x3e513850UL, 0xe0000000UL, 0x3fed738fUL, 0x3d59fe08UL, 0xbe5db728UL,
-    0x40000000UL, 0x3fed6475UL, 0x3aa7ead1UL, 0x3e58804bUL, 0xc0000000UL,
-    0x3fed555fUL, 0xf8a35ba9UL, 0xbe5298b0UL, 0x00000000UL, 0x3fed464fUL,
-    0x9a88dd15UL, 0x3e5a8cdbUL, 0x40000000UL, 0x3fed3743UL, 0xb0b0a190UL,
-    0x3e598635UL, 0x80000000UL, 0x3fed283cUL, 0xe2113295UL, 0xbe5c1119UL,
-    0x80000000UL, 0x3fed193aUL, 0xafbf1728UL, 0xbe492e9cUL, 0x60000000UL,
-    0x3fed0a3dUL, 0xe4a4ccf3UL, 0x3e19b90eUL, 0x20000000UL, 0x3fecfb45UL,
-    0xba3cbeb8UL, 0x3e406b50UL, 0xc0000000UL, 0x3fecec51UL, 0x110f7dddUL,
-    0x3e0d6806UL, 0x40000000UL, 0x3fecdd63UL, 0x7dd7d508UL, 0xbe5a8943UL,
-    0x80000000UL, 0x3fecce79UL, 0x9b60f271UL, 0xbe50676aUL, 0x80000000UL,
-    0x3fecbf94UL, 0x0b9ad660UL, 0x3e59174fUL, 0x60000000UL, 0x3fecb0b4UL,
-    0x00823d9cUL, 0x3e5bbf72UL, 0x20000000UL, 0x3feca1d9UL, 0x38a6ec89UL,
-    0xbe4d38f9UL, 0x80000000UL, 0x3fec9302UL, 0x3a0b7d8eUL, 0x3e53dbfdUL,
-    0xc0000000UL, 0x3fec8430UL, 0xc6826b34UL, 0xbe27c5c9UL, 0xc0000000UL,
-    0x3fec7563UL, 0x0c706381UL, 0xbe593653UL, 0x60000000UL, 0x3fec669bUL,
-    0x7df34ec7UL, 0x3e461ab5UL, 0xe0000000UL, 0x3fec57d7UL, 0x40e5e7e8UL,
-    0xbe5c3daeUL, 0x00000000UL, 0x3fec4919UL, 0x5602770fUL, 0xbe55219dUL,
-    0xc0000000UL, 0x3fec3a5eUL, 0xec7911ebUL, 0x3e5a5d25UL, 0x60000000UL,
-    0x3fec2ba9UL, 0xb39ea225UL, 0xbe53c00bUL, 0x80000000UL, 0x3fec1cf8UL,
-    0x967a212eUL, 0x3e5a8ddfUL, 0x60000000UL, 0x3fec0e4cUL, 0x580798bdUL,
-    0x3e5f53abUL, 0x00000000UL, 0x3febffa5UL, 0xb8282df6UL, 0xbe46b874UL,
-    0x20000000UL, 0x3febf102UL, 0xe33a6729UL, 0x3e54963fUL, 0x00000000UL,
-    0x3febe264UL, 0x3b53e88aUL, 0xbe3adce1UL, 0x60000000UL, 0x3febd3caUL,
-    0xc2585084UL, 0x3e5cde9fUL, 0x80000000UL, 0x3febc535UL, 0xa335c5eeUL,
-    0xbe39fd9cUL, 0x20000000UL, 0x3febb6a5UL, 0x7325b04dUL, 0x3e42ba15UL,
-    0x60000000UL, 0x3feba819UL, 0x1564540fUL, 0x3e3a9f35UL, 0x40000000UL,
-    0x3feb9992UL, 0x83fff592UL, 0xbe5465ceUL, 0xa0000000UL, 0x3feb8b0fUL,
-    0xb9da63d3UL, 0xbe4b1a0aUL, 0x80000000UL, 0x3feb7c91UL, 0x6d6f1ea4UL,
-    0x3e557657UL, 0x00000000UL, 0x3feb6e18UL, 0x5e80a1bfUL, 0x3e4ddbb6UL,
-    0x00000000UL, 0x3feb5fa3UL, 0x1c9eacb5UL, 0x3e592877UL, 0xa0000000UL,
-    0x3feb5132UL, 0x6d40beb3UL, 0xbe51858cUL, 0xa0000000UL, 0x3feb42c6UL,
-    0xd740c67bUL, 0x3e427ad2UL, 0x40000000UL, 0x3feb345fUL, 0xa3e0cceeUL,
-    0xbe5c2fc4UL, 0x40000000UL, 0x3feb25fcUL, 0x8e752b50UL, 0xbe3da3c2UL,
-    0xc0000000UL, 0x3feb179dUL, 0xa892e7deUL, 0x3e1fb481UL, 0xc0000000UL,
-    0x3feb0943UL, 0x21ed71e9UL, 0xbe365206UL, 0x20000000UL, 0x3feafaeeUL,
-    0x0e1380a3UL, 0x3e5c5b7bUL, 0x20000000UL, 0x3feaec9dUL, 0x3c3d640eUL,
-    0xbe5dbbd0UL, 0x60000000UL, 0x3feade50UL, 0x8f97a715UL, 0x3e3a8ec5UL,
-    0x20000000UL, 0x3fead008UL, 0x23ab2839UL, 0x3e2fe98aUL, 0x40000000UL,
-    0x3feac1c4UL, 0xf4bbd50fUL, 0x3e54d8f6UL, 0xe0000000UL, 0x3feab384UL,
-    0x14757c4dUL, 0xbe48774cUL, 0xc0000000UL, 0x3feaa549UL, 0x7c7b0eeaUL,
-    0x3e5b51bbUL, 0x20000000UL, 0x3fea9713UL, 0xf56f7013UL, 0x3e386200UL,
-    0xe0000000UL, 0x3fea88e0UL, 0xbe428ebeUL, 0xbe514af5UL, 0xe0000000UL,
-    0x3fea7ab2UL, 0x8d0e4496UL, 0x3e4f9165UL, 0x60000000UL, 0x3fea6c89UL,
-    0xdbacc5d5UL, 0xbe5c063bUL, 0x20000000UL, 0x3fea5e64UL, 0x3f19d970UL,
-    0xbe5a0c8cUL, 0x20000000UL, 0x3fea5043UL, 0x09ea3e6bUL, 0x3e5065dcUL,
-    0x80000000UL, 0x3fea4226UL, 0x78df246cUL, 0x3e5e05f6UL, 0x40000000UL,
-    0x3fea340eUL, 0x4057d4a0UL, 0x3e431b2bUL, 0x40000000UL, 0x3fea25faUL,
-    0x82867bb5UL, 0x3e4b76beUL, 0xa0000000UL, 0x3fea17eaUL, 0x9436f40aUL,
-    0xbe5aad39UL, 0x20000000UL, 0x3fea09dfUL, 0x4b5253b3UL, 0x3e46380bUL,
-    0x00000000UL, 0x3fe9fbd8UL, 0x8fc52466UL, 0xbe386f9bUL, 0x20000000UL,
-    0x3fe9edd5UL, 0x22d3f344UL, 0xbe538347UL, 0x60000000UL, 0x3fe9dfd6UL,
-    0x1ac33522UL, 0x3e5dbc53UL, 0x00000000UL, 0x3fe9d1dcUL, 0xeabdff1dUL,
-    0x3e40fc0cUL, 0xe0000000UL, 0x3fe9c3e5UL, 0xafd30e73UL, 0xbe585e63UL,
-    0xe0000000UL, 0x3fe9b5f3UL, 0xa52f226aUL, 0xbe43e8f9UL, 0x20000000UL,
-    0x3fe9a806UL, 0xecb8698dUL, 0xbe515b36UL, 0x80000000UL, 0x3fe99a1cUL,
-    0xf2b4e89dUL, 0x3e48b62bUL, 0x20000000UL, 0x3fe98c37UL, 0x7c9a88fbUL,
-    0x3e44414cUL, 0x00000000UL, 0x3fe97e56UL, 0xda015741UL, 0xbe5d13baUL,
-    0xe0000000UL, 0x3fe97078UL, 0x5fdace06UL, 0x3e51b947UL, 0x00000000UL,
-    0x3fe962a0UL, 0x956ca094UL, 0x3e518785UL, 0x40000000UL, 0x3fe954cbUL,
-    0x01164c1dUL, 0x3e5d5b57UL, 0xc0000000UL, 0x3fe946faUL, 0xe63b3767UL,
-    0xbe4f84e7UL, 0x40000000UL, 0x3fe9392eUL, 0xe57cc2a9UL, 0x3e34eda3UL,
-    0xe0000000UL, 0x3fe92b65UL, 0x8c75b544UL, 0x3e5766a0UL, 0xc0000000UL,
-    0x3fe91da1UL, 0x37d1d087UL, 0xbe5e2ab1UL, 0x80000000UL, 0x3fe90fe1UL,
-    0xa953dc20UL, 0x3e5fa1f3UL, 0x80000000UL, 0x3fe90225UL, 0xdbd3f369UL,
-    0x3e47d6dbUL, 0xa0000000UL, 0x3fe8f46dUL, 0x1c9be989UL, 0xbe5e2b0aUL,
-    0xa0000000UL, 0x3fe8e6b9UL, 0x3c93d76aUL, 0x3e5c8618UL, 0xe0000000UL,
-    0x3fe8d909UL, 0x2182fc9aUL, 0xbe41aa9eUL, 0x20000000UL, 0x3fe8cb5eUL,
-    0xe6b3539dUL, 0xbe530d19UL, 0x60000000UL, 0x3fe8bdb6UL, 0x49e58cc3UL,
-    0xbe3bb374UL, 0xa0000000UL, 0x3fe8b012UL, 0xa7cfeb8fUL, 0x3e56c412UL,
-    0x00000000UL, 0x3fe8a273UL, 0x8d52bc19UL, 0x3e1429b8UL, 0x60000000UL,
-    0x3fe894d7UL, 0x4dc32c6cUL, 0xbe48604cUL, 0xc0000000UL, 0x3fe8873fUL,
-    0x0c868e56UL, 0xbe564ee5UL, 0x00000000UL, 0x3fe879acUL, 0x56aee828UL,
-    0x3e5e2fd8UL, 0x60000000UL, 0x3fe86c1cUL, 0x7ceab8ecUL, 0x3e493365UL,
-    0xc0000000UL, 0x3fe85e90UL, 0x78d4dadcUL, 0xbe4f7f25UL, 0x00000000UL,
-    0x3fe85109UL, 0x0ccd8280UL, 0x3e31e7a2UL, 0x40000000UL, 0x3fe84385UL,
-    0x34ba4e15UL, 0x3e328077UL, 0x80000000UL, 0x3fe83605UL, 0xa670975aUL,
-    0xbe53eee5UL, 0xa0000000UL, 0x3fe82889UL, 0xf61b77b2UL, 0xbe43a20aUL,
-    0xa0000000UL, 0x3fe81b11UL, 0x13e6643bUL, 0x3e5e5fe5UL, 0xc0000000UL,
-    0x3fe80d9dUL, 0x82cc94e8UL, 0xbe5ff1f9UL, 0xa0000000UL, 0x3fe8002dUL,
-    0x8a0c9c5dUL, 0xbe42b0e7UL, 0x60000000UL, 0x3fe7f2c1UL, 0x22a16f01UL,
-    0x3e5d9ea0UL, 0x20000000UL, 0x3fe7e559UL, 0xc38cd451UL, 0x3e506963UL,
-    0xc0000000UL, 0x3fe7d7f4UL, 0x9902bc71UL, 0x3e4503d7UL, 0x40000000UL,
-    0x3fe7ca94UL, 0xdef2a3c0UL, 0x3e3d98edUL, 0xa0000000UL, 0x3fe7bd37UL,
-    0xed49abb0UL, 0x3e24c1ffUL, 0xe0000000UL, 0x3fe7afdeUL, 0xe3b0be70UL,
-    0xbe40c467UL, 0x00000000UL, 0x3fe7a28aUL, 0xaf9f193cUL, 0xbe5dff6cUL,
-    0xe0000000UL, 0x3fe79538UL, 0xb74cf6b6UL, 0xbe258ed0UL, 0xa0000000UL,
-    0x3fe787ebUL, 0x1d9127c7UL, 0x3e345fb0UL, 0x40000000UL, 0x3fe77aa2UL,
-    0x1028c21dUL, 0xbe4619bdUL, 0xa0000000UL, 0x3fe76d5cUL, 0x7cb0b5e4UL,
-    0x3e40f1a2UL, 0xe0000000UL, 0x3fe7601aUL, 0x2b1bc4adUL, 0xbe32e8bbUL,
-    0xe0000000UL, 0x3fe752dcUL, 0x6839f64eUL, 0x3e41f57bUL, 0xc0000000UL,
-    0x3fe745a2UL, 0xc4121f7eUL, 0xbe52c40aUL, 0x60000000UL, 0x3fe7386cUL,
-    0xd6852d72UL, 0xbe5c4e6bUL, 0xc0000000UL, 0x3fe72b39UL, 0x91d690f7UL,
-    0xbe57f88fUL, 0xe0000000UL, 0x3fe71e0aUL, 0x627a2159UL, 0xbe4425d5UL,
-    0xc0000000UL, 0x3fe710dfUL, 0x50a54033UL, 0x3e422b7eUL, 0x60000000UL,
-    0x3fe703b8UL, 0x3b0b5f91UL, 0x3e5d3857UL, 0xe0000000UL, 0x3fe6f694UL,
-    0x84d628a2UL, 0xbe51f090UL, 0x00000000UL, 0x3fe6e975UL, 0x306d8894UL,
-    0xbe414d83UL, 0xe0000000UL, 0x3fe6dc58UL, 0x30bf24aaUL, 0xbe4650caUL,
-    0x80000000UL, 0x3fe6cf40UL, 0xd4628d69UL, 0xbe5db007UL, 0xc0000000UL,
-    0x3fe6c22bUL, 0xa2aae57bUL, 0xbe31d279UL, 0xc0000000UL, 0x3fe6b51aUL,
-    0x860edf7eUL, 0xbe2d4c4aUL, 0x80000000UL, 0x3fe6a80dUL, 0xf3559341UL,
-    0xbe5f7e98UL, 0xe0000000UL, 0x3fe69b03UL, 0xa885899eUL, 0xbe5c2011UL,
-    0xe0000000UL, 0x3fe68dfdUL, 0x2bdc6d37UL, 0x3e224a82UL, 0xa0000000UL,
-    0x3fe680fbUL, 0xc12ad1b9UL, 0xbe40cf56UL, 0x00000000UL, 0x3fe673fdUL,
-    0x1bcdf659UL, 0xbdf52f2dUL, 0x00000000UL, 0x3fe66702UL, 0x5df10408UL,
-    0x3e5663e0UL, 0xc0000000UL, 0x3fe65a0aUL, 0xa4070568UL, 0xbe40b12fUL,
-    0x00000000UL, 0x3fe64d17UL, 0x71c54c47UL, 0x3e5f5e8bUL, 0x00000000UL,
-    0x3fe64027UL, 0xbd4b7e83UL, 0x3e42ead6UL, 0xa0000000UL, 0x3fe6333aUL,
-    0x61598bd2UL, 0xbe4c48d4UL, 0xc0000000UL, 0x3fe62651UL, 0x6f538d61UL,
-    0x3e548401UL, 0xa0000000UL, 0x3fe6196cUL, 0x14344120UL, 0xbe529af6UL,
-    0x00000000UL, 0x3fe60c8bUL, 0x5982c587UL, 0xbe3e1e4fUL, 0x00000000UL,
-    0x3fe5ffadUL, 0xfe51d4eaUL, 0xbe4c897aUL, 0x80000000UL, 0x3fe5f2d2UL,
-    0xfd46ebe1UL, 0x3e552e00UL, 0xa0000000UL, 0x3fe5e5fbUL, 0xa4695699UL,
-    0x3e5ed471UL, 0x60000000UL, 0x3fe5d928UL, 0x80d118aeUL, 0x3e456b61UL,
-    0xa0000000UL, 0x3fe5cc58UL, 0x304c330bUL, 0x3e54dc29UL, 0x80000000UL,
-    0x3fe5bf8cUL, 0x0af2dedfUL, 0xbe3aa9bdUL, 0xe0000000UL, 0x3fe5b2c3UL,
-    0x15fc9258UL, 0xbe479a37UL, 0xc0000000UL, 0x3fe5a5feUL, 0x9292c7eaUL,
-    0x3e188650UL, 0x20000000UL, 0x3fe5993dUL, 0x33b4d380UL, 0x3e5d6d93UL,
-    0x20000000UL, 0x3fe58c7fUL, 0x02fd16c7UL, 0x3e2fe961UL, 0xa0000000UL,
-    0x3fe57fc4UL, 0x4a05edb6UL, 0xbe4d55b4UL, 0xa0000000UL, 0x3fe5730dUL,
-    0x3d443abbUL, 0xbe5e6954UL, 0x00000000UL, 0x3fe5665aUL, 0x024acfeaUL,
-    0x3e50e61bUL, 0x00000000UL, 0x3fe559aaUL, 0xcc9edd09UL, 0xbe325403UL,
-    0x60000000UL, 0x3fe54cfdUL, 0x1fe26950UL, 0x3e5d500eUL, 0x60000000UL,
-    0x3fe54054UL, 0x6c5ae164UL, 0xbe4a79b4UL, 0xc0000000UL, 0x3fe533aeUL,
-    0x154b0287UL, 0xbe401571UL, 0xa0000000UL, 0x3fe5270cUL, 0x0673f401UL,
-    0xbe56e56bUL, 0xe0000000UL, 0x3fe51a6dUL, 0x751b639cUL, 0x3e235269UL,
-    0xa0000000UL, 0x3fe50dd2UL, 0x7c7b2bedUL, 0x3ddec887UL, 0xc0000000UL,
-    0x3fe5013aUL, 0xafab4e17UL, 0x3e5e7575UL, 0x60000000UL, 0x3fe4f4a6UL,
-    0x2e308668UL, 0x3e59aed6UL, 0x80000000UL, 0x3fe4e815UL, 0xf33e2a76UL,
-    0xbe51f184UL, 0xe0000000UL, 0x3fe4db87UL, 0x839f3e3eUL, 0x3e57db01UL,
-    0xc0000000UL, 0x3fe4cefdUL, 0xa9eda7bbUL, 0x3e535e0fUL, 0x00000000UL,
-    0x3fe4c277UL, 0x2a8f66a5UL, 0x3e5ce451UL, 0xc0000000UL, 0x3fe4b5f3UL,
-    0x05192456UL, 0xbe4e8518UL, 0xc0000000UL, 0x3fe4a973UL, 0x4aa7cd1dUL,
-    0x3e46784aUL, 0x40000000UL, 0x3fe49cf7UL, 0x8e23025eUL, 0xbe5749f2UL,
-    0x00000000UL, 0x3fe4907eUL, 0x18d30215UL, 0x3e360f39UL, 0x20000000UL,
-    0x3fe48408UL, 0x63dcf2f3UL, 0x3e5e00feUL, 0xc0000000UL, 0x3fe47795UL,
-    0x46182d09UL, 0xbe5173d9UL, 0xa0000000UL, 0x3fe46b26UL, 0x8f0e62aaUL,
-    0xbe48f281UL, 0xe0000000UL, 0x3fe45ebaUL, 0x5775c40cUL, 0xbe56aad4UL,
-    0x60000000UL, 0x3fe45252UL, 0x0fe25f69UL, 0x3e48bd71UL, 0x40000000UL,
-    0x3fe445edUL, 0xe9989ec5UL, 0x3e590d97UL, 0x80000000UL, 0x3fe4398bUL,
-    0xb3d9ffe3UL, 0x3e479dbcUL, 0x20000000UL, 0x3fe42d2dUL, 0x388e4d2eUL,
-    0xbe5eed80UL, 0xe0000000UL, 0x3fe420d1UL, 0x6f797c18UL, 0x3e554b4cUL,
-    0x20000000UL, 0x3fe4147aUL, 0x31048bb4UL, 0xbe5b1112UL, 0x80000000UL,
-    0x3fe40825UL, 0x2efba4f9UL, 0x3e48ebc7UL, 0x40000000UL, 0x3fe3fbd4UL,
-    0x50201119UL, 0x3e40b701UL, 0x40000000UL, 0x3fe3ef86UL, 0x0a4db32cUL,
-    0x3e551de8UL, 0xa0000000UL, 0x3fe3e33bUL, 0x0c9c148bUL, 0xbe50c1f6UL,
-    0x20000000UL, 0x3fe3d6f4UL, 0xc9129447UL, 0x3e533fa0UL, 0x00000000UL,
-    0x3fe3cab0UL, 0xaae5b5a0UL, 0xbe22b68eUL, 0x20000000UL, 0x3fe3be6fUL,
-    0x02305e8aUL, 0xbe54fc08UL, 0x60000000UL, 0x3fe3b231UL, 0x7f908258UL,
-    0x3e57dc05UL, 0x00000000UL, 0x3fe3a5f7UL, 0x1a09af78UL, 0x3e08038bUL,
-    0xe0000000UL, 0x3fe399bfUL, 0x490643c1UL, 0xbe5dbe42UL, 0xe0000000UL,
-    0x3fe38d8bUL, 0x5e8ad724UL, 0xbe3c2b72UL, 0x20000000UL, 0x3fe3815bUL,
-    0xc67196b6UL, 0x3e1713cfUL, 0xa0000000UL, 0x3fe3752dUL, 0x6182e429UL,
-    0xbe3ec14cUL, 0x40000000UL, 0x3fe36903UL, 0xab6eb1aeUL, 0x3e5a2cc5UL,
-    0x40000000UL, 0x3fe35cdcUL, 0xfe5dc064UL, 0xbe5c5878UL, 0x40000000UL,
-    0x3fe350b8UL, 0x0ba6b9e4UL, 0x3e51619bUL, 0x80000000UL, 0x3fe34497UL,
-    0x857761aaUL, 0x3e5fff53UL, 0x00000000UL, 0x3fe3387aUL, 0xf872d68cUL,
-    0x3e484f4dUL, 0xa0000000UL, 0x3fe32c5fUL, 0x087e97c2UL, 0x3e52842eUL,
-    0x80000000UL, 0x3fe32048UL, 0x73d6d0c0UL, 0xbe503edfUL, 0x80000000UL,
-    0x3fe31434UL, 0x0c1456a1UL, 0xbe5f72adUL, 0xa0000000UL, 0x3fe30823UL,
-    0x83a1a4d5UL, 0xbe5e65ccUL, 0xe0000000UL, 0x3fe2fc15UL, 0x855a7390UL,
-    0xbe506438UL, 0x40000000UL, 0x3fe2f00bUL, 0xa2898287UL, 0x3e3d22a2UL,
-    0xe0000000UL, 0x3fe2e403UL, 0x8b56f66fUL, 0xbe5aa5fdUL, 0x80000000UL,
-    0x3fe2d7ffUL, 0x52db119aUL, 0x3e3a2e3dUL, 0x60000000UL, 0x3fe2cbfeUL,
-    0xe2ddd4c0UL, 0xbe586469UL, 0x40000000UL, 0x3fe2c000UL, 0x6b01bf10UL,
-    0x3e352b9dUL, 0x40000000UL, 0x3fe2b405UL, 0xb07a1cdfUL, 0x3e5c5cdaUL,
-    0x80000000UL, 0x3fe2a80dUL, 0xc7b5f868UL, 0xbe5668b3UL, 0xc0000000UL,
-    0x3fe29c18UL, 0x185edf62UL, 0xbe563d66UL, 0x00000000UL, 0x3fe29027UL,
-    0xf729e1ccUL, 0x3e59a9a0UL, 0x80000000UL, 0x3fe28438UL, 0x6433c727UL,
-    0xbe43cc89UL, 0x00000000UL, 0x3fe2784dUL, 0x41782631UL, 0xbe30750cUL,
-    0xa0000000UL, 0x3fe26c64UL, 0x914911b7UL, 0xbe58290eUL, 0x40000000UL,
-    0x3fe2607fUL, 0x3dcc73e1UL, 0xbe4269cdUL, 0x00000000UL, 0x3fe2549dUL,
-    0x2751bf70UL, 0xbe5a6998UL, 0xc0000000UL, 0x3fe248bdUL, 0x4248b9fbUL,
-    0xbe4ddb00UL, 0x80000000UL, 0x3fe23ce1UL, 0xf35cf82fUL, 0x3e561b71UL,
-    0x60000000UL, 0x3fe23108UL, 0x8e481a2dUL, 0x3e518fb9UL, 0x60000000UL,
-    0x3fe22532UL, 0x5ab96edcUL, 0xbe5fafc5UL, 0x40000000UL, 0x3fe2195fUL,
-    0x80943911UL, 0xbe07f819UL, 0x40000000UL, 0x3fe20d8fUL, 0x386f2d6cUL,
-    0xbe54ba8bUL, 0x40000000UL, 0x3fe201c2UL, 0xf29664acUL, 0xbe5eb815UL,
-    0x20000000UL, 0x3fe1f5f8UL, 0x64f03390UL, 0x3e5e320cUL, 0x20000000UL,
-    0x3fe1ea31UL, 0x747ff696UL, 0x3e5ef0a5UL, 0x40000000UL, 0x3fe1de6dUL,
-    0x3e9ceb51UL, 0xbe5f8d27UL, 0x20000000UL, 0x3fe1d2acUL, 0x4ae0b55eUL,
-    0x3e5faa21UL, 0x20000000UL, 0x3fe1c6eeUL, 0x28569a5eUL, 0x3e598a4fUL,
-    0x20000000UL, 0x3fe1bb33UL, 0x54b33e07UL, 0x3e46130aUL, 0x20000000UL,
-    0x3fe1af7bUL, 0x024f1078UL, 0xbe4dbf93UL, 0x00000000UL, 0x3fe1a3c6UL,
-    0xb0783bfaUL, 0x3e419248UL, 0xe0000000UL, 0x3fe19813UL, 0x2f02b836UL,
-    0x3e4e02b7UL, 0xc0000000UL, 0x3fe18c64UL, 0x28dec9d4UL, 0x3e09064fUL,
-    0x80000000UL, 0x3fe180b8UL, 0x45cbf406UL, 0x3e5b1f46UL, 0x40000000UL,
-    0x3fe1750fUL, 0x03d9964cUL, 0x3e5b0a79UL, 0x00000000UL, 0x3fe16969UL,
-    0x8b5b882bUL, 0xbe238086UL, 0xa0000000UL, 0x3fe15dc5UL, 0x73bad6f8UL,
-    0xbdf1fca4UL, 0x20000000UL, 0x3fe15225UL, 0x5385769cUL, 0x3e5e8d76UL,
-    0xa0000000UL, 0x3fe14687UL, 0x1676dc6bUL, 0x3e571d08UL, 0x20000000UL,
-    0x3fe13aedUL, 0xa8c41c7fUL, 0xbe598a25UL, 0x60000000UL, 0x3fe12f55UL,
-    0xc4e1aaf0UL, 0x3e435277UL, 0xa0000000UL, 0x3fe123c0UL, 0x403638e1UL,
-    0xbe21aa7cUL, 0xc0000000UL, 0x3fe1182eUL, 0x557a092bUL, 0xbdd0116bUL,
-    0xc0000000UL, 0x3fe10c9fUL, 0x7d779f66UL, 0x3e4a61baUL, 0xc0000000UL,
-    0x3fe10113UL, 0x2b09c645UL, 0xbe5d586eUL, 0x20000000UL, 0x3fe0ea04UL,
-    0xea2cad46UL, 0x3e5aa97cUL, 0x20000000UL, 0x3fe0d300UL, 0x23190e54UL,
-    0x3e50f1a7UL, 0xa0000000UL, 0x3fe0bc07UL, 0x1379a5a6UL, 0xbe51619dUL,
-    0x60000000UL, 0x3fe0a51aUL, 0x926a3d4aUL, 0x3e5cf019UL, 0xa0000000UL,
-    0x3fe08e38UL, 0xa8c24358UL, 0x3e35241eUL, 0x20000000UL, 0x3fe07762UL,
-    0x24317e7aUL, 0x3e512cfaUL, 0x00000000UL, 0x3fe06097UL, 0xfd9cf274UL,
-    0xbe55bef3UL, 0x00000000UL, 0x3fe049d7UL, 0x3689b49dUL, 0xbe36d26dUL,
-    0x40000000UL, 0x3fe03322UL, 0xf72ef6c4UL, 0xbe54cd08UL, 0xa0000000UL,
-    0x3fe01c78UL, 0x23702d2dUL, 0xbe5900bfUL, 0x00000000UL, 0x3fe005daUL,
-    0x3f59c14cUL, 0x3e57d80bUL, 0x40000000UL, 0x3fdfde8dUL, 0xad67766dUL,
-    0xbe57fad4UL, 0x40000000UL, 0x3fdfb17cUL, 0x644f4ae7UL, 0x3e1ee43bUL,
-    0x40000000UL, 0x3fdf8481UL, 0x903234d2UL, 0x3e501a86UL, 0x40000000UL,
-    0x3fdf579cUL, 0xafe9e509UL, 0xbe267c3eUL, 0x00000000UL, 0x3fdf2acdUL,
-    0xb7dfda0bUL, 0xbe48149bUL, 0x40000000UL, 0x3fdefe13UL, 0x3b94305eUL,
-    0x3e5f4ea7UL, 0x80000000UL, 0x3fded16fUL, 0x5d95da61UL, 0xbe55c198UL,
-    0x00000000UL, 0x3fdea4e1UL, 0x406960c9UL, 0xbdd99a19UL, 0x00000000UL,
-    0x3fde7868UL, 0xd22f3539UL, 0x3e470c78UL, 0x80000000UL, 0x3fde4c04UL,
-    0x83eec535UL, 0xbe3e1232UL, 0x40000000UL, 0x3fde1fb6UL, 0x3dfbffcbUL,
-    0xbe4b7d71UL, 0x40000000UL, 0x3fddf37dUL, 0x7e1be4e0UL, 0xbe5b8f8fUL,
-    0x40000000UL, 0x3fddc759UL, 0x46dae887UL, 0xbe350458UL, 0x80000000UL,
-    0x3fdd9b4aUL, 0xed6ecc49UL, 0xbe5f0045UL, 0x80000000UL, 0x3fdd6f50UL,
-    0x2e9e883cUL, 0x3e2915daUL, 0x80000000UL, 0x3fdd436bUL, 0xf0bccb32UL,
-    0x3e4a68c9UL, 0x80000000UL, 0x3fdd179bUL, 0x9bbfc779UL, 0xbe54a26aUL,
-    0x00000000UL, 0x3fdcebe0UL, 0x7cea33abUL, 0x3e43c6b7UL, 0x40000000UL,
-    0x3fdcc039UL, 0xe740fd06UL, 0x3e5526c2UL, 0x40000000UL, 0x3fdc94a7UL,
-    0x9eadeb1aUL, 0xbe396d8dUL, 0xc0000000UL, 0x3fdc6929UL, 0xf0a8f95aUL,
-    0xbe5c0ab2UL, 0x80000000UL, 0x3fdc3dc0UL, 0x6ee2693bUL, 0x3e0992e6UL,
-    0xc0000000UL, 0x3fdc126bUL, 0x5ac6b581UL, 0xbe2834b6UL, 0x40000000UL,
-    0x3fdbe72bUL, 0x8cc226ffUL, 0x3e3596a6UL, 0x00000000UL, 0x3fdbbbffUL,
-    0xf92a74bbUL, 0x3e3c5813UL, 0x00000000UL, 0x3fdb90e7UL, 0x479664c0UL,
-    0xbe50d644UL, 0x00000000UL, 0x3fdb65e3UL, 0x5004975bUL, 0xbe55258fUL,
-    0x00000000UL, 0x3fdb3af3UL, 0xe4b23194UL, 0xbe588407UL, 0xc0000000UL,
-    0x3fdb1016UL, 0xe65d4d0aUL, 0x3e527c26UL, 0x80000000UL, 0x3fdae54eUL,
-    0x814fddd6UL, 0x3e5962a2UL, 0x40000000UL, 0x3fdaba9aUL, 0xe19d0913UL,
-    0xbe562f4eUL, 0x80000000UL, 0x3fda8ff9UL, 0x43cfd006UL, 0xbe4cfdebUL,
-    0x40000000UL, 0x3fda656cUL, 0x686f0a4eUL, 0x3e5e47a8UL, 0xc0000000UL,
-    0x3fda3af2UL, 0x7200d410UL, 0x3e5e1199UL, 0xc0000000UL, 0x3fda108cUL,
-    0xabd2266eUL, 0x3e5ee4d1UL, 0x40000000UL, 0x3fd9e63aUL, 0x396f8f2cUL,
-    0x3e4dbffbUL, 0x00000000UL, 0x3fd9bbfbUL, 0xe32b25ddUL, 0x3e5c3a54UL,
-    0x40000000UL, 0x3fd991cfUL, 0x431e4035UL, 0xbe457925UL, 0x80000000UL,
-    0x3fd967b6UL, 0x7bed3dd3UL, 0x3e40c61dUL, 0x00000000UL, 0x3fd93db1UL,
-    0xd7449365UL, 0x3e306419UL, 0x80000000UL, 0x3fd913beUL, 0x1746e791UL,
-    0x3e56fcfcUL, 0x40000000UL, 0x3fd8e9dfUL, 0xf3a9028bUL, 0xbe5041b9UL,
-    0xc0000000UL, 0x3fd8c012UL, 0x56840c50UL, 0xbe26e20aUL, 0x40000000UL,
-    0x3fd89659UL, 0x19763102UL, 0xbe51f466UL, 0x80000000UL, 0x3fd86cb2UL,
-    0x7032de7cUL, 0xbe4d298aUL, 0x80000000UL, 0x3fd8431eUL, 0xdeb39fabUL,
-    0xbe4361ebUL, 0x40000000UL, 0x3fd8199dUL, 0x5d01cbe0UL, 0xbe5425b3UL,
-    0x80000000UL, 0x3fd7f02eUL, 0x3ce99aa9UL, 0x3e146fa8UL, 0x80000000UL,
-    0x3fd7c6d2UL, 0xd1a262b9UL, 0xbe5a1a69UL, 0xc0000000UL, 0x3fd79d88UL,
-    0x8606c236UL, 0x3e423a08UL, 0x80000000UL, 0x3fd77451UL, 0x8fd1e1b7UL,
-    0x3e5a6a63UL, 0xc0000000UL, 0x3fd74b2cUL, 0xe491456aUL, 0x3e42c1caUL,
-    0x40000000UL, 0x3fd7221aUL, 0x4499a6d7UL, 0x3e36a69aUL, 0x00000000UL,
-    0x3fd6f91aUL, 0x5237df94UL, 0xbe0f8f02UL, 0x00000000UL, 0x3fd6d02cUL,
-    0xb6482c6eUL, 0xbe5abcf7UL, 0x00000000UL, 0x3fd6a750UL, 0x1919fd61UL,
-    0xbe57ade2UL, 0x00000000UL, 0x3fd67e86UL, 0xaa7a994dUL, 0xbe3f3fbdUL,
-    0x00000000UL, 0x3fd655ceUL, 0x67db014cUL, 0x3e33c550UL, 0x00000000UL,
-    0x3fd62d28UL, 0xa82856b7UL, 0xbe1409d1UL, 0xc0000000UL, 0x3fd60493UL,
-    0x1e6a300dUL, 0x3e55d899UL, 0x80000000UL, 0x3fd5dc11UL, 0x1222bd5cUL,
-    0xbe35bfc0UL, 0xc0000000UL, 0x3fd5b3a0UL, 0x6e8dc2d3UL, 0x3e5d4d79UL,
-    0x00000000UL, 0x3fd58b42UL, 0xe0e4ace6UL, 0xbe517303UL, 0x80000000UL,
-    0x3fd562f4UL, 0xb306e0a8UL, 0x3e5edf0fUL, 0xc0000000UL, 0x3fd53ab8UL,
-    0x6574bc54UL, 0x3e5ee859UL, 0x80000000UL, 0x3fd5128eUL, 0xea902207UL,
-    0x3e5f6188UL, 0xc0000000UL, 0x3fd4ea75UL, 0x9f911d79UL, 0x3e511735UL,
-    0x80000000UL, 0x3fd4c26eUL, 0xf9c77397UL, 0xbe5b1643UL, 0x40000000UL,
-    0x3fd49a78UL, 0x15fc9258UL, 0x3e479a37UL, 0x80000000UL, 0x3fd47293UL,
-    0xd5a04dd9UL, 0xbe426e56UL, 0xc0000000UL, 0x3fd44abfUL, 0xe04042f5UL,
-    0x3e56f7c6UL, 0x40000000UL, 0x3fd422fdUL, 0x1d8bf2c8UL, 0x3e5d8810UL,
-    0x00000000UL, 0x3fd3fb4cUL, 0x88a8ddeeUL, 0xbe311454UL, 0xc0000000UL,
-    0x3fd3d3abUL, 0x3e3b5e47UL, 0xbe5d1b72UL, 0x40000000UL, 0x3fd3ac1cUL,
-    0xc2ab5d59UL, 0x3e31b02bUL, 0xc0000000UL, 0x3fd3849dUL, 0xd4e34b9eUL,
-    0x3e51cb2fUL, 0x40000000UL, 0x3fd35d30UL, 0x177204fbUL, 0xbe2b8cd7UL,
-    0x80000000UL, 0x3fd335d3UL, 0xfcd38c82UL, 0xbe4356e1UL, 0x80000000UL,
-    0x3fd30e87UL, 0x64f54accUL, 0xbe4e6224UL, 0x00000000UL, 0x3fd2e74cUL,
-    0xaa7975d9UL, 0x3e5dc0feUL, 0x80000000UL, 0x3fd2c021UL, 0x516dab3fUL,
-    0xbe50ffa3UL, 0x40000000UL, 0x3fd29907UL, 0x2bfb7313UL, 0x3e5674a2UL,
-    0xc0000000UL, 0x3fd271fdUL, 0x0549fc99UL, 0x3e385d29UL, 0xc0000000UL,
-    0x3fd24b04UL, 0x55b63073UL, 0xbe500c6dUL, 0x00000000UL, 0x3fd2241cUL,
-    0x3f91953aUL, 0x3e389977UL, 0xc0000000UL, 0x3fd1fd43UL, 0xa1543f71UL,
-    0xbe3487abUL, 0xc0000000UL, 0x3fd1d67bUL, 0x4ec8867cUL, 0x3df6a2dcUL,
-    0x00000000UL, 0x3fd1afc4UL, 0x4328e3bbUL, 0x3e41d9c0UL, 0x80000000UL,
-    0x3fd1891cUL, 0x2e1cda84UL, 0x3e3bdd87UL, 0x40000000UL, 0x3fd16285UL,
-    0x4b5331aeUL, 0xbe53128eUL, 0x00000000UL, 0x3fd13bfeUL, 0xb9aec164UL,
-    0xbe52ac98UL, 0xc0000000UL, 0x3fd11586UL, 0xd91e1316UL, 0xbe350630UL,
-    0x80000000UL, 0x3fd0ef1fUL, 0x7cacc12cUL, 0x3e3f5219UL, 0x40000000UL,
-    0x3fd0c8c8UL, 0xbce277b7UL, 0x3e3d30c0UL, 0x00000000UL, 0x3fd0a281UL,
-    0x2a63447dUL, 0xbe541377UL, 0x80000000UL, 0x3fd07c49UL, 0xfac483b5UL,
-    0xbe5772ecUL, 0xc0000000UL, 0x3fd05621UL, 0x36b8a570UL, 0xbe4fd4bdUL,
-    0xc0000000UL, 0x3fd03009UL, 0xbae505f7UL, 0xbe450388UL, 0x80000000UL,
-    0x3fd00a01UL, 0x3e35aeadUL, 0xbe5430fcUL, 0x80000000UL, 0x3fcfc811UL,
-    0x707475acUL, 0x3e38806eUL, 0x80000000UL, 0x3fcf7c3fUL, 0xc91817fcUL,
-    0xbe40cceaUL, 0x80000000UL, 0x3fcf308cUL, 0xae05d5e9UL, 0xbe4919b8UL,
-    0x80000000UL, 0x3fcee4f8UL, 0xae6cc9e6UL, 0xbe530b94UL, 0x00000000UL,
-    0x3fce9983UL, 0x1efe3e8eUL, 0x3e57747eUL, 0x00000000UL, 0x3fce4e2dUL,
-    0xda78d9bfUL, 0xbe59a608UL, 0x00000000UL, 0x3fce02f5UL, 0x8abe2c2eUL,
-    0x3e4a35adUL, 0x00000000UL, 0x3fcdb7dcUL, 0x1495450dUL, 0xbe0872ccUL,
-    0x80000000UL, 0x3fcd6ce1UL, 0x86ee0ba0UL, 0xbe4f59a0UL, 0x00000000UL,
-    0x3fcd2205UL, 0xe81ca888UL, 0x3e5402c3UL, 0x00000000UL, 0x3fccd747UL,
-    0x3b4424b9UL, 0x3e5dfdc3UL, 0x80000000UL, 0x3fcc8ca7UL, 0xd305b56cUL,
-    0x3e202da6UL, 0x00000000UL, 0x3fcc4226UL, 0x399a6910UL, 0xbe482a1cUL,
-    0x80000000UL, 0x3fcbf7c2UL, 0x747f7938UL, 0xbe587372UL, 0x80000000UL,
-    0x3fcbad7cUL, 0x6fc246a0UL, 0x3e50d83dUL, 0x00000000UL, 0x3fcb6355UL,
-    0xee9e9be5UL, 0xbe5c35bdUL, 0x80000000UL, 0x3fcb194aUL, 0x8416c0bcUL,
-    0x3e546d4fUL, 0x00000000UL, 0x3fcacf5eUL, 0x49f7f08fUL, 0x3e56da76UL,
-    0x00000000UL, 0x3fca858fUL, 0x5dc30de2UL, 0x3e5f390cUL, 0x00000000UL,
-    0x3fca3bdeUL, 0x950583b6UL, 0xbe5e4169UL, 0x80000000UL, 0x3fc9f249UL,
-    0x33631553UL, 0x3e52aeb1UL, 0x00000000UL, 0x3fc9a8d3UL, 0xde8795a6UL,
-    0xbe59a504UL, 0x00000000UL, 0x3fc95f79UL, 0x076bf41eUL, 0x3e5122feUL,
-    0x80000000UL, 0x3fc9163cUL, 0x2914c8e7UL, 0x3e3dd064UL, 0x00000000UL,
-    0x3fc8cd1dUL, 0x3a30eca3UL, 0xbe21b4aaUL, 0x80000000UL, 0x3fc8841aUL,
-    0xb2a96650UL, 0xbe575444UL, 0x80000000UL, 0x3fc83b34UL, 0x2376c0cbUL,
-    0xbe2a74c7UL, 0x80000000UL, 0x3fc7f26bUL, 0xd8a0b653UL, 0xbe5181b6UL,
-    0x00000000UL, 0x3fc7a9bfUL, 0x32257882UL, 0xbe4a78b4UL, 0x00000000UL,
-    0x3fc7612fUL, 0x1eee8bd9UL, 0xbe1bfe9dUL, 0x80000000UL, 0x3fc718bbUL,
-    0x0c603cc4UL, 0x3e36fdc9UL, 0x80000000UL, 0x3fc6d064UL, 0x3728b8cfUL,
-    0xbe1e542eUL, 0x80000000UL, 0x3fc68829UL, 0xc79a4067UL, 0x3e5c380fUL,
-    0x00000000UL, 0x3fc6400bUL, 0xf69eac69UL, 0x3e550a84UL, 0x80000000UL,
-    0x3fc5f808UL, 0xb7a780a4UL, 0x3e5d9224UL, 0x80000000UL, 0x3fc5b022UL,
-    0xad9dfb1eUL, 0xbe55242fUL, 0x00000000UL, 0x3fc56858UL, 0x659b18beUL,
-    0xbe4bfda3UL, 0x80000000UL, 0x3fc520a9UL, 0x66ee3631UL, 0xbe57d769UL,
-    0x80000000UL, 0x3fc4d916UL, 0x1ec62819UL, 0x3e2427f7UL, 0x80000000UL,
-    0x3fc4919fUL, 0xdec25369UL, 0xbe435431UL, 0x00000000UL, 0x3fc44a44UL,
-    0xa8acfc4bUL, 0xbe3c62e8UL, 0x00000000UL, 0x3fc40304UL, 0xcf1d3eabUL,
-    0xbdfba29fUL, 0x80000000UL, 0x3fc3bbdfUL, 0x79aba3eaUL, 0xbdf1b7c8UL,
-    0x80000000UL, 0x3fc374d6UL, 0xb8d186daUL, 0xbe5130cfUL, 0x80000000UL,
-    0x3fc32de8UL, 0x9d74f152UL, 0x3e2285b6UL, 0x00000000UL, 0x3fc2e716UL,
-    0x50ae7ca9UL, 0xbe503920UL, 0x80000000UL, 0x3fc2a05eUL, 0x6caed92eUL,
-    0xbe533924UL, 0x00000000UL, 0x3fc259c2UL, 0x9cb5034eUL, 0xbe510e31UL,
-    0x80000000UL, 0x3fc21340UL, 0x12c4d378UL, 0xbe540b43UL, 0x80000000UL,
-    0x3fc1ccd9UL, 0xcc418706UL, 0x3e59887aUL, 0x00000000UL, 0x3fc1868eUL,
-    0x921f4106UL, 0xbe528e67UL, 0x80000000UL, 0x3fc1405cUL, 0x3969441eUL,
-    0x3e5d8051UL, 0x00000000UL, 0x3fc0fa46UL, 0xd941ef5bUL, 0x3e5f9079UL,
-    0x80000000UL, 0x3fc0b44aUL, 0x5a3e81b2UL, 0xbe567691UL, 0x00000000UL,
-    0x3fc06e69UL, 0x9d66afe7UL, 0xbe4d43fbUL, 0x00000000UL, 0x3fc028a2UL,
-    0x0a92a162UL, 0xbe52f394UL, 0x00000000UL, 0x3fbfc5eaUL, 0x209897e5UL,
-    0x3e529e37UL, 0x00000000UL, 0x3fbf3ac5UL, 0x8458bd7bUL, 0x3e582831UL,
-    0x00000000UL, 0x3fbeafd5UL, 0xb8d8b4b8UL, 0xbe486b4aUL, 0x00000000UL,
-    0x3fbe2518UL, 0xe0a3b7b6UL, 0x3e5bafd2UL, 0x00000000UL, 0x3fbd9a90UL,
-    0x2bf2710eUL, 0x3e383b2bUL, 0x00000000UL, 0x3fbd103cUL, 0x73eb6ab7UL,
-    0xbe56d78dUL, 0x00000000UL, 0x3fbc861bUL, 0x32ceaff5UL, 0xbe32dc5aUL,
-    0x00000000UL, 0x3fbbfc2eUL, 0xbee04cb7UL, 0xbe4a71a4UL, 0x00000000UL,
-    0x3fbb7274UL, 0x35ae9577UL, 0x3e38142fUL, 0x00000000UL, 0x3fbae8eeUL,
-    0xcbaddab4UL, 0xbe5490f0UL, 0x00000000UL, 0x3fba5f9aUL, 0x95ce1114UL,
-    0x3e597c71UL, 0x00000000UL, 0x3fb9d67aUL, 0x6d7c0f78UL, 0x3e3abc2dUL,
-    0x00000000UL, 0x3fb94d8dUL, 0x2841a782UL, 0xbe566cbcUL, 0x00000000UL,
-    0x3fb8c4d2UL, 0x6ed429c6UL, 0xbe3cfff9UL, 0x00000000UL, 0x3fb83c4aUL,
-    0xe4a49fbbUL, 0xbe552964UL, 0x00000000UL, 0x3fb7b3f4UL, 0x2193d81eUL,
-    0xbe42fa72UL, 0x00000000UL, 0x3fb72bd0UL, 0xdd70c122UL, 0x3e527a8cUL,
-    0x00000000UL, 0x3fb6a3dfUL, 0x03108a54UL, 0xbe450393UL, 0x00000000UL,
-    0x3fb61c1fUL, 0x30ff7954UL, 0x3e565840UL, 0x00000000UL, 0x3fb59492UL,
-    0xdedd460cUL, 0xbe5422b5UL, 0x00000000UL, 0x3fb50d36UL, 0x950f9f45UL,
-    0xbe5313f6UL, 0x00000000UL, 0x3fb4860bUL, 0x582cdcb1UL, 0x3e506d39UL,
-    0x00000000UL, 0x3fb3ff12UL, 0x7216d3a6UL, 0x3e4aa719UL, 0x00000000UL,
-    0x3fb3784aUL, 0x57a423fdUL, 0x3e5a9b9fUL, 0x00000000UL, 0x3fb2f1b4UL,
-    0x7a138b41UL, 0xbe50b418UL, 0x00000000UL, 0x3fb26b4eUL, 0x2fbfd7eaUL,
-    0x3e23a53eUL, 0x00000000UL, 0x3fb1e519UL, 0x18913ccbUL, 0x3e465fc1UL,
-    0x00000000UL, 0x3fb15f15UL, 0x7ea24e21UL, 0x3e042843UL, 0x00000000UL,
-    0x3fb0d941UL, 0x7c6d9c77UL, 0x3e59f61eUL, 0x00000000UL, 0x3fb0539eUL,
-    0x114efd44UL, 0x3e4ccab7UL, 0x00000000UL, 0x3faf9c56UL, 0x1777f657UL,
-    0x3e552f65UL, 0x00000000UL, 0x3fae91d2UL, 0xc317b86aUL, 0xbe5a61e0UL,
-    0x00000000UL, 0x3fad87acUL, 0xb7664efbUL, 0xbe41f64eUL, 0x00000000UL,
-    0x3fac7de6UL, 0x5d3d03a9UL, 0x3e0807a0UL, 0x00000000UL, 0x3fab7480UL,
-    0x743c38ebUL, 0xbe3726e1UL, 0x00000000UL, 0x3faa6b78UL, 0x06a253f1UL,
-    0x3e5ad636UL, 0x00000000UL, 0x3fa962d0UL, 0xa35f541bUL, 0x3e5a187aUL,
-    0x00000000UL, 0x3fa85a88UL, 0x4b86e446UL, 0xbe508150UL, 0x00000000UL,
-    0x3fa7529cUL, 0x2589cacfUL, 0x3e52938aUL, 0x00000000UL, 0x3fa64b10UL,
-    0xaf6b11f2UL, 0xbe3454cdUL, 0x00000000UL, 0x3fa543e2UL, 0x97506fefUL,
-    0xbe5fdec5UL, 0x00000000UL, 0x3fa43d10UL, 0xe75f7dd9UL, 0xbe388dd3UL,
-    0x00000000UL, 0x3fa3369cUL, 0xa4139632UL, 0xbdea5177UL, 0x00000000UL,
-    0x3fa23086UL, 0x352d6f1eUL, 0xbe565ad6UL, 0x00000000UL, 0x3fa12accUL,
-    0x77449eb7UL, 0xbe50d5c7UL, 0x00000000UL, 0x3fa0256eUL, 0x7478da78UL,
-    0x3e404724UL, 0x00000000UL, 0x3f9e40dcUL, 0xf59cef7fUL, 0xbe539d0aUL,
-    0x00000000UL, 0x3f9c3790UL, 0x1511d43cUL, 0x3e53c2c8UL, 0x00000000UL,
-    0x3f9a2f00UL, 0x9b8bff3cUL, 0xbe43b3e1UL, 0x00000000UL, 0x3f982724UL,
-    0xad1e22a5UL, 0x3e46f0bdUL, 0x00000000UL, 0x3f962000UL, 0x130d9356UL,
-    0x3e475ba0UL, 0x00000000UL, 0x3f941994UL, 0x8f86f883UL, 0xbe513d0bUL,
-    0x00000000UL, 0x3f9213dcUL, 0x914d0dc8UL, 0xbe534335UL, 0x00000000UL,
-    0x3f900ed8UL, 0x2d73e5e7UL, 0xbe22ba75UL, 0x00000000UL, 0x3f8c1510UL,
-    0xc5b7d70eUL, 0x3e599c5dUL, 0x00000000UL, 0x3f880de0UL, 0x8a27857eUL,
-    0xbe3d28c8UL, 0x00000000UL, 0x3f840810UL, 0xda767328UL, 0x3e531b3dUL,
-    0x00000000UL, 0x3f8003b0UL, 0x77bacaf3UL, 0xbe5f04e3UL, 0x00000000UL,
-    0x3f780150UL, 0xdf4b0720UL, 0x3e5a8bffUL, 0x00000000UL, 0x3f6ffc40UL,
-    0x34c48e71UL, 0xbe3fcd99UL, 0x00000000UL, 0x3f5ff6c0UL, 0x1ad218afUL,
-    0xbe4c78a7UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL,
-    0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL, 0x00000000UL,
-    0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL, 0x6dc96112UL, 0xbf836578UL,
-    0xee241472UL, 0xbf9b0301UL, 0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL,
-    0xbfd619b6UL, 0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL,
-    0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL, 0x9f95985aUL,
-    0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL, 0x486ececbUL, 0x3fc4635eUL,
-    0x412055ccUL, 0xbdd61bb2UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL,
-    0xffffffffUL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3b700000UL,
-    0xfa5abcbfUL, 0x3ff00b1aUL, 0xa7609f71UL, 0xbc84f6b2UL, 0xa9fb3335UL,
-    0x3ff0163dUL, 0x9ab8cdb7UL, 0x3c9b6129UL, 0x143b0281UL, 0x3ff02168UL,
-    0x0fc54eb6UL, 0xbc82bf31UL, 0x3e778061UL, 0x3ff02c9aUL, 0x535b085dUL,
-    0xbc719083UL, 0x2e11bbccUL, 0x3ff037d4UL, 0xeeade11aUL, 0x3c656811UL,
-    0xe86e7f85UL, 0x3ff04315UL, 0x1977c96eUL, 0xbc90a31cUL, 0x72f654b1UL,
-    0x3ff04e5fUL, 0x3aa0d08cUL, 0x3c84c379UL, 0xd3158574UL, 0x3ff059b0UL,
-    0xa475b465UL, 0x3c8d73e2UL, 0x0e3c1f89UL, 0x3ff0650aUL, 0x5799c397UL,
-    0xbc95cb7bUL, 0x29ddf6deUL, 0x3ff0706bUL, 0xe2b13c27UL, 0xbc8c91dfUL,
-    0x2b72a836UL, 0x3ff07bd4UL, 0x54458700UL, 0x3c832334UL, 0x18759bc8UL,
-    0x3ff08745UL, 0x4bb284ffUL, 0x3c6186beUL, 0xf66607e0UL, 0x3ff092bdUL,
-    0x800a3fd1UL, 0xbc968063UL, 0xcac6f383UL, 0x3ff09e3eUL, 0x18316136UL,
-    0x3c914878UL, 0x9b1f3919UL, 0x3ff0a9c7UL, 0x873d1d38UL, 0x3c85d16cUL,
-    0x6cf9890fUL, 0x3ff0b558UL, 0x4adc610bUL, 0x3c98a62eUL, 0x45e46c85UL,
-    0x3ff0c0f1UL, 0x06d21cefUL, 0x3c94f989UL, 0x2b7247f7UL, 0x3ff0cc92UL,
-    0x16e24f71UL, 0x3c901edcUL, 0x23395decUL, 0x3ff0d83bUL, 0xe43f316aUL,
-    0xbc9bc14dUL, 0x32d3d1a2UL, 0x3ff0e3ecUL, 0x27c57b52UL, 0x3c403a17UL,
-    0x5fdfa9c5UL, 0x3ff0efa5UL, 0xbc54021bUL, 0xbc949db9UL, 0xaffed31bUL,
-    0x3ff0fb66UL, 0xc44ebd7bUL, 0xbc6b9bedUL, 0x28d7233eUL, 0x3ff10730UL,
-    0x1692fdd5UL, 0x3c8d46ebUL, 0xd0125b51UL, 0x3ff11301UL, 0x39449b3aUL,
-    0xbc96c510UL, 0xab5e2ab6UL, 0x3ff11edbUL, 0xf703fb72UL, 0xbc9ca454UL,
-    0xc06c31ccUL, 0x3ff12abdUL, 0xb36ca5c7UL, 0xbc51b514UL, 0x14f204abUL,
-    0x3ff136a8UL, 0xba48dcf0UL, 0xbc67108fUL, 0xaea92de0UL, 0x3ff1429aUL,
-    0x9af1369eUL, 0xbc932fbfUL, 0x934f312eUL, 0x3ff14e95UL, 0x39bf44abUL,
-    0xbc8b91e8UL, 0xc8a58e51UL, 0x3ff15a98UL, 0xb9eeab0aUL, 0x3c82406aUL,
-    0x5471c3c2UL, 0x3ff166a4UL, 0x82ea1a32UL, 0x3c58f23bUL, 0x3c7d517bUL,
-    0x3ff172b8UL, 0xb9d78a76UL, 0xbc819041UL, 0x8695bbc0UL, 0x3ff17ed4UL,
-    0xe2ac5a64UL, 0x3c709e3fUL, 0x388c8deaUL, 0x3ff18af9UL, 0xd1970f6cUL,
-    0xbc911023UL, 0x58375d2fUL, 0x3ff19726UL, 0x85f17e08UL, 0x3c94aaddUL,
-    0xeb6fcb75UL, 0x3ff1a35bUL, 0x7b4968e4UL, 0x3c8e5b4cUL, 0xf8138a1cUL,
-    0x3ff1af99UL, 0xa4b69280UL, 0x3c97bf85UL, 0x84045cd4UL, 0x3ff1bbe0UL,
-    0x352ef607UL, 0xbc995386UL, 0x95281c6bUL, 0x3ff1c82fUL, 0x8010f8c9UL,
-    0x3c900977UL, 0x3168b9aaUL, 0x3ff1d487UL, 0x00a2643cUL, 0x3c9e016eUL,
-    0x5eb44027UL, 0x3ff1e0e7UL, 0x088cb6deUL, 0xbc96fdd8UL, 0x22fcd91dUL,
-    0x3ff1ed50UL, 0x027bb78cUL, 0xbc91df98UL, 0x8438ce4dUL, 0x3ff1f9c1UL,
-    0xa097af5cUL, 0xbc9bf524UL, 0x88628cd6UL, 0x3ff2063bUL, 0x814a8495UL,
-    0x3c8dc775UL, 0x3578a819UL, 0x3ff212beUL, 0x2cfcaac9UL, 0x3c93592dUL,
-    0x917ddc96UL, 0x3ff21f49UL, 0x9494a5eeUL, 0x3c82a97eUL, 0xa27912d1UL,
-    0x3ff22bddUL, 0x5577d69fUL, 0x3c8d34fbUL, 0x6e756238UL, 0x3ff2387aUL,
-    0xb6c70573UL, 0x3c99b07eUL, 0xfb82140aUL, 0x3ff2451fUL, 0x911ca996UL,
-    0x3c8acfccUL, 0x4fb2a63fUL, 0x3ff251ceUL, 0xbef4f4a4UL, 0x3c8ac155UL,
-    0x711ece75UL, 0x3ff25e85UL, 0x4ac31b2cUL, 0x3c93e1a2UL, 0x65e27cddUL,
-    0x3ff26b45UL, 0x9940e9d9UL, 0x3c82bd33UL, 0x341ddf29UL, 0x3ff2780eUL,
-    0x05f9e76cUL, 0x3c9e067cUL, 0xe1f56381UL, 0x3ff284dfUL, 0x8c3f0d7eUL,
-    0xbc9a4c3aUL, 0x7591bb70UL, 0x3ff291baUL, 0x28401cbdUL, 0xbc82cc72UL,
-    0xf51fdee1UL, 0x3ff29e9dUL, 0xafad1255UL, 0x3c8612e8UL, 0x66d10f13UL,
-    0x3ff2ab8aUL, 0x191690a7UL, 0xbc995743UL, 0xd0dad990UL, 0x3ff2b87fUL,
-    0xd6381aa4UL, 0xbc410adcUL, 0x39771b2fUL, 0x3ff2c57eUL, 0xa6eb5124UL,
-    0xbc950145UL, 0xa6e4030bUL, 0x3ff2d285UL, 0x54db41d5UL, 0x3c900247UL,
-    0x1f641589UL, 0x3ff2df96UL, 0xfbbce198UL, 0x3c9d16cfUL, 0xa93e2f56UL,
-    0x3ff2ecafUL, 0x45d52383UL, 0x3c71ca0fUL, 0x4abd886bUL, 0x3ff2f9d2UL,
-    0x532bda93UL, 0xbc653c55UL, 0x0a31b715UL, 0x3ff306feUL, 0xd23182e4UL,
-    0x3c86f46aUL, 0xedeeb2fdUL, 0x3ff31432UL, 0xf3f3fcd1UL, 0x3c8959a3UL,
-    0xfc4cd831UL, 0x3ff32170UL, 0x8e18047cUL, 0x3c8a9ce7UL, 0x3ba8ea32UL,
-    0x3ff32eb8UL, 0x3cb4f318UL, 0xbc9c45e8UL, 0xb26416ffUL, 0x3ff33c08UL,
-    0x843659a6UL, 0x3c932721UL, 0x66e3fa2dUL, 0x3ff34962UL, 0x930881a4UL,
-    0xbc835a75UL, 0x5f929ff1UL, 0x3ff356c5UL, 0x5c4e4628UL, 0xbc8b5ceeUL,
-    0xa2de883bUL, 0x3ff36431UL, 0xa06cb85eUL, 0xbc8c3144UL, 0x373aa9cbUL,
-    0x3ff371a7UL, 0xbf42eae2UL, 0xbc963aeaUL, 0x231e754aUL, 0x3ff37f26UL,
-    0x9eceb23cUL, 0xbc99f5caUL, 0x6d05d866UL, 0x3ff38caeUL, 0x3c9904bdUL,
-    0xbc9e958dUL, 0x1b7140efUL, 0x3ff39a40UL, 0xfc8e2934UL, 0xbc99a9a5UL,
-    0x34e59ff7UL, 0x3ff3a7dbUL, 0xd661f5e3UL, 0xbc75e436UL, 0xbfec6cf4UL,
-    0x3ff3b57fUL, 0xe26fff18UL, 0x3c954c66UL, 0xc313a8e5UL, 0x3ff3c32dUL,
-    0x375d29c3UL, 0xbc9efff8UL, 0x44ede173UL, 0x3ff3d0e5UL, 0x8c284c71UL,
-    0x3c7fe8d0UL, 0x4c123422UL, 0x3ff3dea6UL, 0x11f09ebcUL, 0x3c8ada09UL,
-    0xdf1c5175UL, 0x3ff3ec70UL, 0x7b8c9bcaUL, 0xbc8af663UL, 0x04ac801cUL,
-    0x3ff3fa45UL, 0xf956f9f3UL, 0xbc97d023UL, 0xc367a024UL, 0x3ff40822UL,
-    0xb6f4d048UL, 0x3c8bddf8UL, 0x21f72e2aUL, 0x3ff4160aUL, 0x1c309278UL,
-    0xbc5ef369UL, 0x2709468aUL, 0x3ff423fbUL, 0xc0b314ddUL, 0xbc98462dUL,
-    0xd950a897UL, 0x3ff431f5UL, 0xe35f7999UL, 0xbc81c7ddUL, 0x3f84b9d4UL,
-    0x3ff43ffaUL, 0x9704c003UL, 0x3c8880beUL, 0x6061892dUL, 0x3ff44e08UL,
-    0x04ef80d0UL, 0x3c489b7aUL, 0x42a7d232UL, 0x3ff45c20UL, 0x82fb1f8eUL,
-    0xbc686419UL, 0xed1d0057UL, 0x3ff46a41UL, 0xd1648a76UL, 0x3c9c944bUL,
-    0x668b3237UL, 0x3ff4786dUL, 0xed445733UL, 0xbc9c20f0UL, 0xb5c13cd0UL,
-    0x3ff486a2UL, 0xb69062f0UL, 0x3c73c1a3UL, 0xe192aed2UL, 0x3ff494e1UL,
-    0x5e499ea0UL, 0xbc83b289UL, 0xf0d7d3deUL, 0x3ff4a32aUL, 0xf3d1be56UL,
-    0x3c99cb62UL, 0xea6db7d7UL, 0x3ff4b17dUL, 0x7f2897f0UL, 0xbc8125b8UL,
-    0xd5362a27UL, 0x3ff4bfdaUL, 0xafec42e2UL, 0x3c7d4397UL, 0xb817c114UL,
-    0x3ff4ce41UL, 0x690abd5dUL, 0x3c905e29UL, 0x99fddd0dUL, 0x3ff4dcb2UL,
-    0xbc6a7833UL, 0x3c98ecdbUL, 0x81d8abffUL, 0x3ff4eb2dUL, 0x2e5d7a52UL,
-    0xbc95257dUL, 0x769d2ca7UL, 0x3ff4f9b2UL, 0xd25957e3UL, 0xbc94b309UL,
-    0x7f4531eeUL, 0x3ff50841UL, 0x49b7465fUL, 0x3c7a249bUL, 0xa2cf6642UL,
-    0x3ff516daUL, 0x69bd93efUL, 0xbc8f7685UL, 0xe83f4eefUL, 0x3ff5257dUL,
-    0x43efef71UL, 0xbc7c998dUL, 0x569d4f82UL, 0x3ff5342bUL, 0x1db13cadUL,
-    0xbc807abeUL, 0xf4f6ad27UL, 0x3ff542e2UL, 0x192d5f7eUL, 0x3c87926dUL,
-    0xca5d920fUL, 0x3ff551a4UL, 0xefede59bUL, 0xbc8d689cUL, 0xdde910d2UL,
-    0x3ff56070UL, 0x168eebf0UL, 0xbc90fb6eUL, 0x36b527daUL, 0x3ff56f47UL,
-    0x011d93adUL, 0x3c99bb2cUL, 0xdbe2c4cfUL, 0x3ff57e27UL, 0x8a57b9c4UL,
-    0xbc90b98cUL, 0xd497c7fdUL, 0x3ff58d12UL, 0x5b9a1de8UL, 0x3c8295e1UL,
-    0x27ff07ccUL, 0x3ff59c08UL, 0xe467e60fUL, 0xbc97e2ceUL, 0xdd485429UL,
-    0x3ff5ab07UL, 0x054647adUL, 0x3c96324cUL, 0xfba87a03UL, 0x3ff5ba11UL,
-    0x4c233e1aUL, 0xbc9b77a1UL, 0x8a5946b7UL, 0x3ff5c926UL, 0x816986a2UL,
-    0x3c3c4b1bUL, 0x90998b93UL, 0x3ff5d845UL, 0xa8b45643UL, 0xbc9cd6a7UL,
-    0x15ad2148UL, 0x3ff5e76fUL, 0x3080e65eUL, 0x3c9ba6f9UL, 0x20dceb71UL,
-    0x3ff5f6a3UL, 0xe3cdcf92UL, 0xbc89eaddUL, 0xb976dc09UL, 0x3ff605e1UL,
-    0x9b56de47UL, 0xbc93e242UL, 0xe6cdf6f4UL, 0x3ff6152aUL, 0x4ab84c27UL,
-    0x3c9e4b3eUL, 0xb03a5585UL, 0x3ff6247eUL, 0x7e40b497UL, 0xbc9383c1UL,
-    0x1d1929fdUL, 0x3ff633ddUL, 0xbeb964e5UL, 0x3c984710UL, 0x34ccc320UL,
-    0x3ff64346UL, 0x759d8933UL, 0xbc8c483cUL, 0xfebc8fb7UL, 0x3ff652b9UL,
-    0xc9a73e09UL, 0xbc9ae3d5UL, 0x82552225UL, 0x3ff66238UL, 0x87591c34UL,
-    0xbc9bb609UL, 0xc70833f6UL, 0x3ff671c1UL, 0x586c6134UL, 0xbc8e8732UL,
-    0xd44ca973UL, 0x3ff68155UL, 0x44f73e65UL, 0x3c6038aeUL, 0xb19e9538UL,
-    0x3ff690f4UL, 0x9aeb445dUL, 0x3c8804bdUL, 0x667f3bcdUL, 0x3ff6a09eUL,
-    0x13b26456UL, 0xbc9bdd34UL, 0xfa75173eUL, 0x3ff6b052UL, 0x2c9a9d0eUL,
-    0x3c7a38f5UL, 0x750bdabfUL, 0x3ff6c012UL, 0x67ff0b0dUL, 0xbc728956UL,
-    0xddd47645UL, 0x3ff6cfdcUL, 0xb6f17309UL, 0x3c9c7aa9UL, 0x3c651a2fUL,
-    0x3ff6dfb2UL, 0x683c88abUL, 0xbc6bbe3aUL, 0x98593ae5UL, 0x3ff6ef92UL,
-    0x9e1ac8b2UL, 0xbc90b974UL, 0xf9519484UL, 0x3ff6ff7dUL, 0x25860ef6UL,
-    0xbc883c0fUL, 0x66f42e87UL, 0x3ff70f74UL, 0xd45aa65fUL, 0x3c59d644UL,
-    0xe8ec5f74UL, 0x3ff71f75UL, 0x86887a99UL, 0xbc816e47UL, 0x86ead08aUL,
-    0x3ff72f82UL, 0x2cd62c72UL, 0xbc920aa0UL, 0x48a58174UL, 0x3ff73f9aUL,
-    0x6c65d53cUL, 0xbc90a8d9UL, 0x35d7cbfdUL, 0x3ff74fbdUL, 0x618a6e1cUL,
-    0x3c9047fdUL, 0x564267c9UL, 0x3ff75febUL, 0x57316dd3UL, 0xbc902459UL,
-    0xb1ab6e09UL, 0x3ff77024UL, 0x169147f8UL, 0x3c9b7877UL, 0x4fde5d3fUL,
-    0x3ff78069UL, 0x0a02162dUL, 0x3c9866b8UL, 0x38ac1cf6UL, 0x3ff790b9UL,
-    0x62aadd3eUL, 0x3c9349a8UL, 0x73eb0187UL, 0x3ff7a114UL, 0xee04992fUL,
-    0xbc841577UL, 0x0976cfdbUL, 0x3ff7b17bUL, 0x8468dc88UL, 0xbc9bebb5UL,
-    0x0130c132UL, 0x3ff7c1edUL, 0xd1164dd6UL, 0x3c9f124cUL, 0x62ff86f0UL,
-    0x3ff7d26aUL, 0xfb72b8b4UL, 0x3c91bddbUL, 0x36cf4e62UL, 0x3ff7e2f3UL,
-    0xba15797eUL, 0x3c705d02UL, 0x8491c491UL, 0x3ff7f387UL, 0xcf9311aeUL,
-    0xbc807f11UL, 0x543e1a12UL, 0x3ff80427UL, 0x626d972bUL, 0xbc927c86UL,
-    0xadd106d9UL, 0x3ff814d2UL, 0x0d151d4dUL, 0x3c946437UL, 0x994cce13UL,
-    0x3ff82589UL, 0xd41532d8UL, 0xbc9d4c1dUL, 0x1eb941f7UL, 0x3ff8364cUL,
-    0x31df2bd5UL, 0x3c999b9aUL, 0x4623c7adUL, 0x3ff8471aUL, 0xa341cdfbUL,
-    0xbc88d684UL, 0x179f5b21UL, 0x3ff857f4UL, 0xf8b216d0UL, 0xbc5ba748UL,
-    0x9b4492edUL, 0x3ff868d9UL, 0x9bd4f6baUL, 0xbc9fc6f8UL, 0xd931a436UL,
-    0x3ff879caUL, 0xd2db47bdUL, 0x3c85d2d7UL, 0xd98a6699UL, 0x3ff88ac7UL,
-    0xf37cb53aUL, 0x3c9994c2UL, 0xa478580fUL, 0x3ff89bd0UL, 0x4475202aUL,
-    0x3c9d5395UL, 0x422aa0dbUL, 0x3ff8ace5UL, 0x56864b27UL, 0x3c96e9f1UL,
-    0xbad61778UL, 0x3ff8be05UL, 0xfc43446eUL, 0x3c9ecb5eUL, 0x16b5448cUL,
-    0x3ff8cf32UL, 0x32e9e3aaUL, 0xbc70d55eUL, 0x5e0866d9UL, 0x3ff8e06aUL,
-    0x6fc9b2e6UL, 0xbc97114aUL, 0x99157736UL, 0x3ff8f1aeUL, 0xa2e3976cUL,
-    0x3c85cc13UL, 0xd0282c8aUL, 0x3ff902feUL, 0x85fe3fd2UL, 0x3c9592caUL,
-    0x0b91ffc6UL, 0x3ff9145bUL, 0x2e582524UL, 0xbc9dd679UL, 0x53aa2fe2UL,
-    0x3ff925c3UL, 0xa639db7fUL, 0xbc83455fUL, 0xb0cdc5e5UL, 0x3ff93737UL,
-    0x81b57ebcUL, 0xbc675fc7UL, 0x2b5f98e5UL, 0x3ff948b8UL, 0x797d2d99UL,
-    0xbc8dc3d6UL, 0xcbc8520fUL, 0x3ff95a44UL, 0x96a5f039UL, 0xbc764b7cUL,
-    0x9a7670b3UL, 0x3ff96bddUL, 0x7f19c896UL, 0xbc5ba596UL, 0x9fde4e50UL,
-    0x3ff97d82UL, 0x7c1b85d1UL, 0xbc9d185bUL, 0xe47a22a2UL, 0x3ff98f33UL,
-    0xa24c78ecUL, 0x3c7cabdaUL, 0x70ca07baUL, 0x3ff9a0f1UL, 0x91cee632UL,
-    0xbc9173bdUL, 0x4d53fe0dUL, 0x3ff9b2bbUL, 0x4df6d518UL, 0xbc9dd84eUL,
-    0x82a3f090UL, 0x3ff9c491UL, 0xb071f2beUL, 0x3c7c7c46UL, 0x194bb8d5UL,
-    0x3ff9d674UL, 0xa3dd8233UL, 0xbc9516beUL, 0x19e32323UL, 0x3ff9e863UL,
-    0x78e64c6eUL, 0x3c7824caUL, 0x8d07f29eUL, 0x3ff9fa5eUL, 0xaaf1faceUL,
-    0xbc84a9ceUL, 0x7b5de565UL, 0x3ffa0c66UL, 0x5d1cd533UL, 0xbc935949UL,
-    0xed8eb8bbUL, 0x3ffa1e7aUL, 0xee8be70eUL, 0x3c9c6618UL, 0xec4a2d33UL,
-    0x3ffa309bUL, 0x7ddc36abUL, 0x3c96305cUL, 0x80460ad8UL, 0x3ffa42c9UL,
-    0x589fb120UL, 0xbc9aa780UL, 0xb23e255dUL, 0x3ffa5503UL, 0xdb8d41e1UL,
-    0xbc9d2f6eUL, 0x8af46052UL, 0x3ffa674aUL, 0x30670366UL, 0x3c650f56UL,
-    0x1330b358UL, 0x3ffa799eUL, 0xcac563c7UL, 0x3c9bcb7eUL, 0x53c12e59UL,
-    0x3ffa8bfeUL, 0xb2ba15a9UL, 0xbc94f867UL, 0x5579fdbfUL, 0x3ffa9e6bUL,
-    0x0ef7fd31UL, 0x3c90fac9UL, 0x21356ebaUL, 0x3ffab0e5UL, 0xdae94545UL,
-    0x3c889c31UL, 0xbfd3f37aUL, 0x3ffac36bUL, 0xcae76cd0UL, 0xbc8f9234UL,
-    0x3a3c2774UL, 0x3ffad5ffUL, 0xb6b1b8e5UL, 0x3c97ef3bUL, 0x995ad3adUL,
-    0x3ffae89fUL, 0x345dcc81UL, 0x3c97a1cdUL, 0xe622f2ffUL, 0x3ffafb4cUL,
-    0x0f315ecdUL, 0xbc94b2fcUL, 0x298db666UL, 0x3ffb0e07UL, 0x4c80e425UL,
-    0xbc9bdef5UL, 0x6c9a8952UL, 0x3ffb20ceUL, 0x4a0756ccUL, 0x3c94dd02UL,
-    0xb84f15fbUL, 0x3ffb33a2UL, 0x3084d708UL, 0xbc62805eUL, 0x15b749b1UL,
-    0x3ffb4684UL, 0xe9df7c90UL, 0xbc7f763dUL, 0x8de5593aUL, 0x3ffb5972UL,
-    0xbbba6de3UL, 0xbc9c71dfUL, 0x29f1c52aUL, 0x3ffb6c6eUL, 0x52883f6eUL,
-    0x3c92a8f3UL, 0xf2fb5e47UL, 0x3ffb7f76UL, 0x7e54ac3bUL, 0xbc75584fUL,
-    0xf22749e4UL, 0x3ffb928cUL, 0x54cb65c6UL, 0xbc9b7216UL, 0x30a1064aUL,
-    0x3ffba5b0UL, 0x0e54292eUL, 0xbc9efcd3UL, 0xb79a6f1fUL, 0x3ffbb8e0UL,
-    0xc9696205UL, 0xbc3f52d1UL, 0x904bc1d2UL, 0x3ffbcc1eUL, 0x7a2d9e84UL,
-    0x3c823dd0UL, 0xc3f3a207UL, 0x3ffbdf69UL, 0x60ea5b53UL, 0xbc3c2623UL,
-    0x5bd71e09UL, 0x3ffbf2c2UL, 0x3f6b9c73UL, 0xbc9efdcaUL, 0x6141b33dUL,
-    0x3ffc0628UL, 0xa1fbca34UL, 0xbc8d8a5aUL, 0xdd85529cUL, 0x3ffc199bUL,
-    0x895048ddUL, 0x3c811065UL, 0xd9fa652cUL, 0x3ffc2d1cUL, 0x17c8a5d7UL,
-    0xbc96e516UL, 0x5fffd07aUL, 0x3ffc40abUL, 0xe083c60aUL, 0x3c9b4537UL,
-    0x78fafb22UL, 0x3ffc5447UL, 0x2493b5afUL, 0x3c912f07UL, 0x2e57d14bUL,
-    0x3ffc67f1UL, 0xff483cadUL, 0x3c92884dUL, 0x8988c933UL, 0x3ffc7ba8UL,
-    0xbe255559UL, 0xbc8e76bbUL, 0x9406e7b5UL, 0x3ffc8f6dUL, 0x48805c44UL,
-    0x3c71acbcUL, 0x5751c4dbUL, 0x3ffca340UL, 0xd10d08f5UL, 0xbc87f2beUL,
-    0xdcef9069UL, 0x3ffcb720UL, 0xd1e949dbUL, 0x3c7503cbUL, 0x2e6d1675UL,
-    0x3ffccb0fUL, 0x86009092UL, 0xbc7d220fUL, 0x555dc3faUL, 0x3ffcdf0bUL,
-    0x53829d72UL, 0xbc8dd83bUL, 0x5b5bab74UL, 0x3ffcf315UL, 0xb86dff57UL,
-    0xbc9a08e9UL, 0x4a07897cUL, 0x3ffd072dUL, 0x43797a9cUL, 0xbc9cbc37UL,
-    0x2b08c968UL, 0x3ffd1b53UL, 0x219a36eeUL, 0x3c955636UL, 0x080d89f2UL,
-    0x3ffd2f87UL, 0x719d8578UL, 0xbc9d487bUL, 0xeacaa1d6UL, 0x3ffd43c8UL,
-    0xbf5a1614UL, 0x3c93db53UL, 0xdcfba487UL, 0x3ffd5818UL, 0xd75b3707UL,
-    0x3c82ed02UL, 0xe862e6d3UL, 0x3ffd6c76UL, 0x4a8165a0UL, 0x3c5fe87aUL,
-    0x16c98398UL, 0x3ffd80e3UL, 0x8beddfe8UL, 0xbc911ec1UL, 0x71ff6075UL,
-    0x3ffd955dUL, 0xbb9af6beUL, 0x3c9a052dUL, 0x03db3285UL, 0x3ffda9e6UL,
-    0x696db532UL, 0x3c9c2300UL, 0xd63a8315UL, 0x3ffdbe7cUL, 0x926b8be4UL,
-    0xbc9b76f1UL, 0xf301b460UL, 0x3ffdd321UL, 0x78f018c3UL, 0x3c92da57UL,
-    0x641c0658UL, 0x3ffde7d5UL, 0x8e79ba8fUL, 0xbc9ca552UL, 0x337b9b5fUL,
-    0x3ffdfc97UL, 0x4f184b5cUL, 0xbc91a5cdUL, 0x6b197d17UL, 0x3ffe1167UL,
-    0xbd5c7f44UL, 0xbc72b529UL, 0x14f5a129UL, 0x3ffe2646UL, 0x817a1496UL,
-    0xbc97b627UL, 0x3b16ee12UL, 0x3ffe3b33UL, 0x31fdc68bUL, 0xbc99f4a4UL,
-    0xe78b3ff6UL, 0x3ffe502eUL, 0x80a9cc8fUL, 0x3c839e89UL, 0x24676d76UL,
-    0x3ffe6539UL, 0x7522b735UL, 0xbc863ff8UL, 0xfbc74c83UL, 0x3ffe7a51UL,
-    0xca0c8de2UL, 0x3c92d522UL, 0x77cdb740UL, 0x3ffe8f79UL, 0x80b054b1UL,
-    0xbc910894UL, 0xa2a490daUL, 0x3ffea4afUL, 0x179c2893UL, 0xbc9e9c23UL,
-    0x867cca6eUL, 0x3ffeb9f4UL, 0x2293e4f2UL, 0x3c94832fUL, 0x2d8e67f1UL,
-    0x3ffecf48UL, 0xb411ad8cUL, 0xbc9c93f3UL, 0xa2188510UL, 0x3ffee4aaUL,
-    0xa487568dUL, 0x3c91c68dUL, 0xee615a27UL, 0x3ffefa1bUL, 0x86a4b6b0UL,
-    0x3c9dc7f4UL, 0x1cb6412aUL, 0x3fff0f9cUL, 0x65181d45UL, 0xbc932200UL,
-    0x376bba97UL, 0x3fff252bUL, 0xbf0d8e43UL, 0x3c93a1a5UL, 0x48dd7274UL,
-    0x3fff3ac9UL, 0x3ed837deUL, 0xbc795a5aUL, 0x5b6e4540UL, 0x3fff5076UL,
-    0x2dd8a18bUL, 0x3c99d3e1UL, 0x798844f8UL, 0x3fff6632UL, 0x3539343eUL,
-    0x3c9fa37bUL, 0xad9cbe14UL, 0x3fff7bfdUL, 0xd006350aUL, 0xbc9dbb12UL,
-    0x02243c89UL, 0x3fff91d8UL, 0xa779f689UL, 0xbc612ea8UL, 0x819e90d8UL,
-    0x3fffa7c1UL, 0xf3a5931eUL, 0x3c874853UL, 0x3692d514UL, 0x3fffbdbaUL,
-    0x15098eb6UL, 0xbc796773UL, 0x2b8f71f1UL, 0x3fffd3c2UL, 0x966579e7UL,
-    0x3c62eb74UL, 0x6b2a23d9UL, 0x3fffe9d9UL, 0x7442fde3UL, 0x3c74a603UL,
-    0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL, 0x6fba4e77UL,
-    0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL, 0xfefa39efUL, 0x3fe62e42UL,
-    0x00000000UL, 0x00000000UL, 0xfefa39efUL, 0x3fe62e42UL, 0xfefa39efUL,
-    0xbfe62e42UL, 0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL,
-    0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL
-
-};
-
-ATTRIBUTE_ALIGNED(8) static const double _DOUBLE2 = 2.0;
-ATTRIBUTE_ALIGNED(8) static const double _DOUBLE0 = 0.0;
-ATTRIBUTE_ALIGNED(8) static const double _DOUBLE0DOT5 = 0.5;
-
-//registers,
-// input: xmm0, xmm1
-// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
-//          eax, edx, ecx, ebx
-
-// Code generated by Intel C compiler for LIBM library
-
-void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
-  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
-  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
-  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
-  Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, L_2TAG_PACKET_14_0_2, L_2TAG_PACKET_15_0_2;
-  Label L_2TAG_PACKET_16_0_2, L_2TAG_PACKET_17_0_2, L_2TAG_PACKET_18_0_2, L_2TAG_PACKET_19_0_2;
-  Label L_2TAG_PACKET_20_0_2, L_2TAG_PACKET_21_0_2, L_2TAG_PACKET_22_0_2, L_2TAG_PACKET_23_0_2;
-  Label L_2TAG_PACKET_24_0_2, L_2TAG_PACKET_25_0_2, L_2TAG_PACKET_26_0_2, L_2TAG_PACKET_27_0_2;
-  Label L_2TAG_PACKET_28_0_2, L_2TAG_PACKET_29_0_2, L_2TAG_PACKET_30_0_2, L_2TAG_PACKET_31_0_2;
-  Label L_2TAG_PACKET_32_0_2, L_2TAG_PACKET_33_0_2, L_2TAG_PACKET_34_0_2, L_2TAG_PACKET_35_0_2;
-  Label L_2TAG_PACKET_36_0_2, L_2TAG_PACKET_37_0_2, L_2TAG_PACKET_38_0_2, L_2TAG_PACKET_39_0_2;
-  Label L_2TAG_PACKET_40_0_2, L_2TAG_PACKET_41_0_2, L_2TAG_PACKET_42_0_2, L_2TAG_PACKET_43_0_2;
-  Label L_2TAG_PACKET_44_0_2, L_2TAG_PACKET_45_0_2, L_2TAG_PACKET_46_0_2, L_2TAG_PACKET_47_0_2;
-  Label L_2TAG_PACKET_48_0_2, L_2TAG_PACKET_49_0_2, L_2TAG_PACKET_50_0_2, L_2TAG_PACKET_51_0_2;
-  Label L_2TAG_PACKET_52_0_2, L_2TAG_PACKET_53_0_2, L_2TAG_PACKET_54_0_2, L_2TAG_PACKET_55_0_2;
-  Label L_2TAG_PACKET_56_0_2, L_2TAG_PACKET_57_0_2, L_2TAG_PACKET_58_0_2, start;
-  Label L_NOT_DOUBLE2, L_NOT_DOUBLE0DOT5;
-
-  assert_different_registers(tmp, eax, ecx, edx);
-
-  address static_const_table_pow = (address)_static_const_table_pow;
-  address DOUBLE2 = (address) &_DOUBLE2;
-  address DOUBLE0 = (address) &_DOUBLE0;
-  address DOUBLE0DOT5 = (address) &_DOUBLE0DOT5;
-
-  subl(rsp, 120);
-  movl(Address(rsp, 64), tmp);
-  lea(tmp, ExternalAddress(static_const_table_pow));
-  movsd(xmm0, Address(rsp, 128));
-  movsd(xmm1, Address(rsp, 136));
-
-  // Special case: pow(x, 2.0) => x * x
-  ucomisd(xmm1, ExternalAddress(DOUBLE2));
-  jccb(Assembler::notEqual, L_NOT_DOUBLE2);
-  jccb(Assembler::parity, L_NOT_DOUBLE2);
-  mulsd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_NOT_DOUBLE2);
-  // Special case: pow(x, 0.5) => sqrt(x)
-  ucomisd(xmm1, ExternalAddress(DOUBLE0DOT5)); // For pow(x, y), check whether y == 0.5
-  jccb(Assembler::notEqual, L_NOT_DOUBLE0DOT5);
-  jccb(Assembler::parity, L_NOT_DOUBLE0DOT5);
-  ucomisd(xmm0, ExternalAddress(DOUBLE0));
-  // According to the API specs, pow(-0.0, 0.5) = 0.0 and sqrt(-0.0) = -0.0.
-  // So pow(-0.0, 0.5) shouldn't be replaced with sqrt(-0.0).
-  // -0.0/+0.0 are both excluded since floating-point comparison doesn't distinguish -0.0 from +0.0.
-  jccb(Assembler::belowEqual, L_NOT_DOUBLE0DOT5); // pow(x, 0.5) => sqrt(x) only for x > 0.0
-  sqrtsd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_NOT_DOUBLE0DOT5);
-  xorpd(xmm2, xmm2);
-  movl(eax, 16368);
-  pinsrw(xmm2, eax, 3);
-  movl(ecx, 1069088768);
-  movdl(xmm7, ecx);
-  movsd(Address(rsp, 16), xmm1);
-  xorpd(xmm1, xmm1);
-  movl(edx, 30704);
-  pinsrw(xmm1, edx, 3);
-  movsd(Address(rsp, 8), xmm0);
-  movdqu(xmm3, xmm0);
-  movl(edx, 8192);
-  movdl(xmm4, edx);
-  movdqu(xmm6, Address(tmp, 8240));
-  pextrw(eax, xmm0, 3);
-  por(xmm0, xmm2);
-  psllq(xmm0, 5);
-  movsd(xmm2, Address(tmp, 8256));
-  psrlq(xmm0, 34);
-  movl(edx, eax);
-  andl(edx, 32752);
-  subl(edx, 16368);
-  movl(ecx, edx);
-  sarl(edx, 31);
-  addl(ecx, edx);
-  xorl(ecx, edx);
-  rcpss(xmm0, xmm0);
-  psllq(xmm3, 12);
-  addl(ecx, 16);
-  bsrl(ecx, ecx);
-  psrlq(xmm3, 12);
-  movl(Address(rsp, 24), rsi);
-  subl(eax, 16);
-  cmpl(eax, 32736);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
-  movl(rsi, 0);
-
-  bind(L_2TAG_PACKET_1_0_2);
-  mulss(xmm0, xmm7);
-  movl(edx, -1);
-  subl(ecx, 4);
-  shll(edx);
-  movdl(xmm5, edx);
-  por(xmm3, xmm1);
-  subl(eax, 16351);
-  cmpl(eax, 1);
-  jcc(Assembler::belowEqual, L_2TAG_PACKET_2_0_2);
-  paddd(xmm0, xmm4);
-  psllq(xmm5, 32);
-  movdl(edx, xmm0);
-  psllq(xmm0, 29);
-  pand(xmm5, xmm3);
-
-  bind(L_2TAG_PACKET_3_0_2);
-  pand(xmm0, xmm6);
-  subsd(xmm3, xmm5);
-  subl(eax, 1);
-  sarl(eax, 4);
-  cvtsi2sdl(xmm7, eax);
-  mulpd(xmm5, xmm0);
-
-  bind(L_2TAG_PACKET_4_0_2);
-  mulsd(xmm3, xmm0);
-  movdqu(xmm1, Address(tmp, 8272));
-  subsd(xmm5, xmm2);
-  movdqu(xmm4, Address(tmp, 8288));
-  movl(ecx, eax);
-  sarl(eax, 31);
-  addl(ecx, eax);
-  xorl(eax, ecx);
-  addl(eax, 1);
-  bsrl(eax, eax);
-  unpcklpd(xmm5, xmm3);
-  movdqu(xmm6, Address(tmp, 8304));
-  addsd(xmm3, xmm5);
-  andl(edx, 16760832);
-  shrl(edx, 10);
-  addpd(xmm5, Address(tmp, edx, Address::times_1, -3616));
-  movdqu(xmm0, Address(tmp, 8320));
-  pshufd(xmm2, xmm3, 68);
-  mulsd(xmm3, xmm3);
-  mulpd(xmm1, xmm2);
-  mulpd(xmm4, xmm2);
-  addsd(xmm5, xmm7);
-  mulsd(xmm2, xmm3);
-  addpd(xmm6, xmm1);
-  mulsd(xmm3, xmm3);
-  addpd(xmm0, xmm4);
-  movsd(xmm1, Address(rsp, 16));
-  movzwl(ecx, Address(rsp, 22));
-  pshufd(xmm7, xmm5, 238);
-  movsd(xmm4, Address(tmp, 8368));
-  mulpd(xmm6, xmm2);
-  pshufd(xmm3, xmm3, 68);
-  mulpd(xmm0, xmm2);
-  shll(eax, 4);
-  subl(eax, 15872);
-  andl(ecx, 32752);
-  addl(eax, ecx);
-  mulpd(xmm3, xmm6);
-  cmpl(eax, 624);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
-  xorpd(xmm6, xmm6);
-  movl(edx, 17080);
-  pinsrw(xmm6, edx, 3);
-  movdqu(xmm2, xmm1);
-  pand(xmm4, xmm1);
-  subsd(xmm1, xmm4);
-  mulsd(xmm4, xmm5);
-  addsd(xmm0, xmm7);
-  mulsd(xmm1, xmm5);
-  movdqu(xmm7, xmm6);
-  addsd(xmm6, xmm4);
-  addpd(xmm3, xmm0);
-  movdl(edx, xmm6);
-  subsd(xmm6, xmm7);
-  pshufd(xmm0, xmm3, 238);
-  subsd(xmm4, xmm6);
-  addsd(xmm0, xmm3);
-  movl(ecx, edx);
-  andl(edx, 255);
-  addl(edx, edx);
-  movdqu(xmm5, Address(tmp, edx, Address::times_8, 8384));
-  addsd(xmm4, xmm1);
-  mulsd(xmm2, xmm0);
-  movdqu(xmm7, Address(tmp, 12480));
-  movdqu(xmm3, Address(tmp, 12496));
-  shll(ecx, 12);
-  xorl(ecx, rsi);
-  andl(ecx, -1048576);
-  movdl(xmm6, ecx);
-  addsd(xmm2, xmm4);
-  movsd(xmm1, Address(tmp, 12512));
-  pshufd(xmm0, xmm2, 68);
-  pshufd(xmm4, xmm2, 68);
-  mulpd(xmm0, xmm0);
-  movl(rsi, Address(rsp, 24));
-  mulpd(xmm7, xmm4);
-  pshufd(xmm6, xmm6, 17);
-  mulsd(xmm1, xmm2);
-  mulsd(xmm0, xmm0);
-  paddd(xmm5, xmm6);
-  addpd(xmm3, xmm7);
-  mulsd(xmm1, xmm5);
-  pshufd(xmm6, xmm5, 238);
-  mulpd(xmm0, xmm3);
-  addsd(xmm1, xmm6);
-  pshufd(xmm3, xmm0, 238);
-  mulsd(xmm0, xmm5);
-  mulsd(xmm3, xmm5);
-  addsd(xmm0, xmm1);
-  addsd(xmm0, xmm3);
-  addsd(xmm0, xmm5);
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_6_0_2);
-
-  bind(L_2TAG_PACKET_7_0_2);
-  movsd(xmm0, Address(rsp, 128));
-  movsd(xmm1, Address(rsp, 136));
-  mulsd(xmm0, xmm1);
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_6_0_2);
-
-  bind(L_2TAG_PACKET_0_0_2);
-  addl(eax, 16);
-  movl(edx, 32752);
-  andl(edx, eax);
-  cmpl(edx, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
-  testl(eax, 32768);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_2);
-
-  bind(L_2TAG_PACKET_10_0_2);
-  movl(ecx, Address(rsp, 16));
-  xorl(edx, edx);
-  testl(ecx, ecx);
-  movl(ecx, 1);
-  cmovl(Assembler::notEqual, edx, ecx);
-  orl(edx, Address(rsp, 20));
-  cmpl(edx, 1072693248);
-  jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
-  movsd(xmm0, Address(rsp, 8));
-  movsd(xmm3, Address(rsp, 8));
-  movdl(edx, xmm3);
-  psrlq(xmm3, 32);
-  movdl(ecx, xmm3);
-  orl(edx, ecx);
-  cmpl(edx, 0);
-  jcc(Assembler::equal, L_2TAG_PACKET_11_0_2);
-  xorpd(xmm3, xmm3);
-  movl(eax, 18416);
-  pinsrw(xmm3, eax, 3);
-  mulsd(xmm0, xmm3);
-  xorpd(xmm2, xmm2);
-  movl(eax, 16368);
-  pinsrw(xmm2, eax, 3);
-  movdqu(xmm3, xmm0);
-  pextrw(eax, xmm0, 3);
-  por(xmm0, xmm2);
-  movl(ecx, 18416);
-  psllq(xmm0, 5);
-  movsd(xmm2, Address(tmp, 8256));
-  psrlq(xmm0, 34);
-  rcpss(xmm0, xmm0);
-  psllq(xmm3, 12);
-  movdqu(xmm6, Address(tmp, 8240));
-  psrlq(xmm3, 12);
-  mulss(xmm0, xmm7);
-  movl(edx, -1024);
-  movdl(xmm5, edx);
-  por(xmm3, xmm1);
-  paddd(xmm0, xmm4);
-  psllq(xmm5, 32);
-  movdl(edx, xmm0);
-  psllq(xmm0, 29);
-  pand(xmm5, xmm3);
-  movl(rsi, 0);
-  pand(xmm0, xmm6);
-  subsd(xmm3, xmm5);
-  andl(eax, 32752);
-  subl(eax, 18416);
-  sarl(eax, 4);
-  cvtsi2sdl(xmm7, eax);
-  mulpd(xmm5, xmm0);
-  jmp(L_2TAG_PACKET_4_0_2);
-
-  bind(L_2TAG_PACKET_12_0_2);
-  movl(ecx, Address(rsp, 16));
-  xorl(edx, edx);
-  testl(ecx, ecx);
-  movl(ecx, 1);
-  cmovl(Assembler::notEqual, edx, ecx);
-  orl(edx, Address(rsp, 20));
-  cmpl(edx, 1072693248);
-  jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
-  movsd(xmm0, Address(rsp, 8));
-  movsd(xmm3, Address(rsp, 8));
-  movdl(edx, xmm3);
-  psrlq(xmm3, 32);
-  movdl(ecx, xmm3);
-  orl(edx, ecx);
-  cmpl(edx, 0);
-  jcc(Assembler::equal, L_2TAG_PACKET_11_0_2);
-  xorpd(xmm3, xmm3);
-  movl(eax, 18416);
-  pinsrw(xmm3, eax, 3);
-  mulsd(xmm0, xmm3);
-  xorpd(xmm2, xmm2);
-  movl(eax, 16368);
-  pinsrw(xmm2, eax, 3);
-  movdqu(xmm3, xmm0);
-  pextrw(eax, xmm0, 3);
-  por(xmm0, xmm2);
-  movl(ecx, 18416);
-  psllq(xmm0, 5);
-  movsd(xmm2, Address(tmp, 8256));
-  psrlq(xmm0, 34);
-  rcpss(xmm0, xmm0);
-  psllq(xmm3, 12);
-  movdqu(xmm6, Address(tmp, 8240));
-  psrlq(xmm3, 12);
-  mulss(xmm0, xmm7);
-  movl(edx, -1024);
-  movdl(xmm5, edx);
-  por(xmm3, xmm1);
-  paddd(xmm0, xmm4);
-  psllq(xmm5, 32);
-  movdl(edx, xmm0);
-  psllq(xmm0, 29);
-  pand(xmm5, xmm3);
-  movl(rsi, INT_MIN);
-  pand(xmm0, xmm6);
-  subsd(xmm3, xmm5);
-  andl(eax, 32752);
-  subl(eax, 18416);
-  sarl(eax, 4);
-  cvtsi2sdl(xmm7, eax);
-  mulpd(xmm5, xmm0);
-  jmp(L_2TAG_PACKET_4_0_2);
-
-  bind(L_2TAG_PACKET_5_0_2);
-  cmpl(eax, 0);
-  jcc(Assembler::less, L_2TAG_PACKET_13_0_2);
-  cmpl(eax, 752);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_14_0_2);
-
-  bind(L_2TAG_PACKET_15_0_2);
-  addsd(xmm0, xmm7);
-  movsd(xmm2, Address(tmp, 12544));
-  addpd(xmm3, xmm0);
-  xorpd(xmm6, xmm6);
-  movl(eax, 17080);
-  pinsrw(xmm6, eax, 3);
-  pshufd(xmm0, xmm3, 238);
-  addsd(xmm0, xmm3);
-  movdqu(xmm3, xmm5);
-  addsd(xmm5, xmm0);
-  movdqu(xmm4, xmm2);
-  subsd(xmm3, xmm5);
-  movdqu(xmm7, xmm5);
-  pand(xmm5, xmm2);
-  movdqu(xmm2, xmm1);
-  pand(xmm4, xmm1);
-  subsd(xmm7, xmm5);
-  addsd(xmm0, xmm3);
-  subsd(xmm1, xmm4);
-  mulsd(xmm4, xmm5);
-  addsd(xmm0, xmm7);
-  mulsd(xmm2, xmm0);
-  movdqu(xmm7, xmm6);
-  mulsd(xmm1, xmm5);
-  addsd(xmm6, xmm4);
-  movdl(eax, xmm6);
-  subsd(xmm6, xmm7);
-  addsd(xmm2, xmm1);
-  movdqu(xmm7, Address(tmp, 12480));
-  movdqu(xmm3, Address(tmp, 12496));
-  subsd(xmm4, xmm6);
-  pextrw(edx, xmm6, 3);
-  movl(ecx, eax);
-  andl(eax, 255);
-  addl(eax, eax);
-  movdqu(xmm5, Address(tmp, eax, Address::times_8, 8384));
-  addsd(xmm2, xmm4);
-  sarl(ecx, 8);
-  movl(eax, ecx);
-  sarl(ecx, 1);
-  subl(eax, ecx);
-  shll(ecx, 20);
-  xorl(ecx, rsi);
-  movdl(xmm6, ecx);
-  movsd(xmm1, Address(tmp, 12512));
-  andl(edx, 32767);
-  cmpl(edx, 16529);
-  jcc(Assembler::above, L_2TAG_PACKET_14_0_2);
-  pshufd(xmm0, xmm2, 68);
-  pshufd(xmm4, xmm2, 68);
-  mulpd(xmm0, xmm0);
-  mulpd(xmm7, xmm4);
-  pshufd(xmm6, xmm6, 17);
-  mulsd(xmm1, xmm2);
-  mulsd(xmm0, xmm0);
-  paddd(xmm5, xmm6);
-  addpd(xmm3, xmm7);
-  mulsd(xmm1, xmm5);
-  pshufd(xmm6, xmm5, 238);
-  mulpd(xmm0, xmm3);
-  addsd(xmm1, xmm6);
-  pshufd(xmm3, xmm0, 238);
-  mulsd(xmm0, xmm5);
-  mulsd(xmm3, xmm5);
-  shll(eax, 4);
-  xorpd(xmm4, xmm4);
-  addl(eax, 16368);
-  pinsrw(xmm4, eax, 3);
-  addsd(xmm0, xmm1);
-  movl(rsi, Address(rsp, 24));
-  addsd(xmm0, xmm3);
-  movdqu(xmm1, xmm0);
-  addsd(xmm0, xmm5);
-  mulsd(xmm0, xmm4);
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_16_0_2);
-  cmpl(eax, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_17_0_2);
-
-  bind(L_2TAG_PACKET_18_0_2);
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_6_0_2);
-
-  bind(L_2TAG_PACKET_8_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  movsd(xmm0, Address(rsp, 8));
-  movdqu(xmm2, xmm0);
-  movdl(eax, xmm2);
-  psrlq(xmm2, 20);
-  movdl(edx, xmm2);
-  orl(eax, edx);
-  jcc(Assembler::equal, L_2TAG_PACKET_19_0_2);
-  addsd(xmm0, xmm0);
-  movdl(eax, xmm1);
-  psrlq(xmm1, 32);
-  movdl(edx, xmm1);
-  movl(ecx, edx);
-  addl(edx, edx);
-  orl(eax, edx);
-  jcc(Assembler::equal, L_2TAG_PACKET_20_0_2);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_20_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 16368);
-  pinsrw(xmm0, eax, 3);
-  movl(edx, 29);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_2TAG_PACKET_22_0_2);
-  movsd(xmm0, Address(rsp, 16));
-  addpd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_19_0_2);
-  movdl(eax, xmm1);
-  movdqu(xmm2, xmm1);
-  psrlq(xmm1, 32);
-  movdl(edx, xmm1);
-  movl(ecx, edx);
-  addl(edx, edx);
-  orl(eax, edx);
-  jcc(Assembler::equal, L_2TAG_PACKET_23_0_2);
-  pextrw(eax, xmm2, 3);
-  andl(eax, 32752);
-  cmpl(eax, 32752);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_24_0_2);
-  movdl(eax, xmm2);
-  psrlq(xmm2, 20);
-  movdl(edx, xmm2);
-  orl(eax, edx);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2);
-
-  bind(L_2TAG_PACKET_24_0_2);
-  pextrw(eax, xmm0, 3);
-  testl(eax, 32768);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_25_0_2);
-  testl(ecx, INT_MIN);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_26_0_2);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_27_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  movdl(eax, xmm1);
-  testl(eax, 1);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_28_0_2);
-  testl(eax, 2);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_29_0_2);
-  jmp(L_2TAG_PACKET_28_0_2);
-
-  bind(L_2TAG_PACKET_25_0_2);
-  shrl(ecx, 20);
-  andl(ecx, 2047);
-  cmpl(ecx, 1075);
-  jcc(Assembler::above, L_2TAG_PACKET_28_0_2);
-  jcc(Assembler::equal, L_2TAG_PACKET_30_0_2);
-  cmpl(ecx, 1074);
-  jcc(Assembler::above, L_2TAG_PACKET_27_0_2);
-  cmpl(ecx, 1023);
-  jcc(Assembler::below, L_2TAG_PACKET_28_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  movl(eax, 17208);
-  xorpd(xmm3, xmm3);
-  pinsrw(xmm3, eax, 3);
-  movdqu(xmm4, xmm3);
-  addsd(xmm3, xmm1);
-  subsd(xmm4, xmm3);
-  addsd(xmm1, xmm4);
-  pextrw(eax, xmm1, 3);
-  andl(eax, 32752);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_28_0_2);
-  movdl(eax, xmm3);
-  andl(eax, 1);
-  jcc(Assembler::equal, L_2TAG_PACKET_28_0_2);
-
-  bind(L_2TAG_PACKET_29_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  pextrw(eax, xmm1, 3);
-  andl(eax, 32768);
-  jcc(Assembler::equal, L_2TAG_PACKET_18_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 32768);
-  pinsrw(xmm0, eax, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_28_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  pextrw(eax, xmm1, 3);
-  andl(eax, 32768);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_26_0_2);
-
-  bind(L_2TAG_PACKET_31_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 32752);
-  pinsrw(xmm0, eax, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_30_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  movdl(eax, xmm1);
-  andl(eax, 1);
-  jcc(Assembler::equal, L_2TAG_PACKET_28_0_2);
-  jmp(L_2TAG_PACKET_29_0_2);
-
-  bind(L_2TAG_PACKET_32_0_2);
-  movdl(eax, xmm1);
-  psrlq(xmm1, 20);
-  movdl(edx, xmm1);
-  orl(eax, edx);
-  jcc(Assembler::equal, L_2TAG_PACKET_33_0_2);
-  movsd(xmm0, Address(rsp, 16));
-  addsd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_33_0_2);
-  movsd(xmm0, Address(rsp, 8));
-  pextrw(eax, xmm0, 3);
-  cmpl(eax, 49136);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2);
-  movdl(ecx, xmm0);
-  psrlq(xmm0, 20);
-  movdl(edx, xmm0);
-  orl(ecx, edx);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 32760);
-  pinsrw(xmm0, eax, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_34_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  andl(eax, 32752);
-  subl(eax, 16368);
-  pextrw(edx, xmm1, 3);
-  xorpd(xmm0, xmm0);
-  xorl(eax, edx);
-  andl(eax, 32768);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2);
-  movl(ecx, 32752);
-  pinsrw(xmm0, ecx, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_35_0_2);
-  movdl(eax, xmm1);
-  cmpl(edx, 17184);
-  jcc(Assembler::above, L_2TAG_PACKET_36_0_2);
-  testl(eax, 1);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_37_0_2);
-  testl(eax, 2);
-  jcc(Assembler::equal, L_2TAG_PACKET_38_0_2);
-  jmp(L_2TAG_PACKET_39_0_2);
-
-  bind(L_2TAG_PACKET_36_0_2);
-  testl(eax, 1);
-  jcc(Assembler::equal, L_2TAG_PACKET_38_0_2);
-  jmp(L_2TAG_PACKET_39_0_2);
-
-  bind(L_2TAG_PACKET_9_0_2);
-  movsd(xmm2, Address(rsp, 8));
-  movdl(eax, xmm2);
-  psrlq(xmm2, 31);
-  movdl(ecx, xmm2);
-  orl(eax, ecx);
-  jcc(Assembler::equal, L_2TAG_PACKET_11_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  pextrw(edx, xmm1, 3);
-  movdl(eax, xmm1);
-  movdqu(xmm2, xmm1);
-  psrlq(xmm2, 32);
-  movdl(ecx, xmm2);
-  addl(ecx, ecx);
-  orl(ecx, eax);
-  jcc(Assembler::equal, L_2TAG_PACKET_40_0_2);
-  andl(edx, 32752);
-  cmpl(edx, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_32_0_2);
-  cmpl(edx, 17200);
-  jcc(Assembler::above, L_2TAG_PACKET_38_0_2);
-  cmpl(edx, 17184);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_35_0_2);
-  cmpl(edx, 16368);
-  jcc(Assembler::below, L_2TAG_PACKET_37_0_2);
-  movl(eax, 17208);
-  xorpd(xmm2, xmm2);
-  pinsrw(xmm2, eax, 3);
-  movdqu(xmm4, xmm2);
-  addsd(xmm2, xmm1);
-  subsd(xmm4, xmm2);
-  addsd(xmm1, xmm4);
-  pextrw(eax, xmm1, 3);
-  andl(eax, 32767);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_37_0_2);
-  movdl(eax, xmm2);
-  andl(eax, 1);
-  jcc(Assembler::equal, L_2TAG_PACKET_38_0_2);
-
-  bind(L_2TAG_PACKET_39_0_2);
-  xorpd(xmm1, xmm1);
-  movl(edx, 30704);
-  pinsrw(xmm1, edx, 3);
-  movsd(xmm2, Address(tmp, 8256));
-  movsd(xmm4, Address(rsp, 8));
-  pextrw(eax, xmm4, 3);
-  movl(edx, 8192);
-  movdl(xmm4, edx);
-  andl(eax, 32767);
-  subl(eax, 16);
-  jcc(Assembler::less, L_2TAG_PACKET_12_0_2);
-  movl(edx, eax);
-  andl(edx, 32752);
-  subl(edx, 16368);
-  movl(ecx, edx);
-  sarl(edx, 31);
-  addl(ecx, edx);
-  xorl(ecx, edx);
-  addl(ecx, 16);
-  bsrl(ecx, ecx);
-  movl(rsi, INT_MIN);
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_37_0_2);
-  xorpd(xmm1, xmm1);
-  movl(eax, 32752);
-  pinsrw(xmm1, eax, 3);
-  xorpd(xmm0, xmm0);
-  mulsd(xmm0, xmm1);
-  movl(edx, 28);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_2TAG_PACKET_38_0_2);
-  xorpd(xmm1, xmm1);
-  movl(edx, 30704);
-  pinsrw(xmm1, edx, 3);
-  movsd(xmm2, Address(tmp, 8256));
-  movsd(xmm4, Address(rsp, 8));
-  pextrw(eax, xmm4, 3);
-  movl(edx, 8192);
-  movdl(xmm4, edx);
-  andl(eax, 32767);
-  subl(eax, 16);
-  jcc(Assembler::less, L_2TAG_PACKET_10_0_2);
-  movl(edx, eax);
-  andl(edx, 32752);
-  subl(edx, 16368);
-  movl(ecx, edx);
-  sarl(edx, 31);
-  addl(ecx, edx);
-  xorl(ecx, edx);
-  addl(ecx, 16);
-  bsrl(ecx, ecx);
-  movl(rsi, 0);
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_23_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 16368);
-  pinsrw(xmm0, eax, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_26_0_2);
-  xorpd(xmm0, xmm0);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_13_0_2);
-  addl(eax, 384);
-  cmpl(eax, 0);
-  jcc(Assembler::less, L_2TAG_PACKET_41_0_2);
-  mulsd(xmm5, xmm1);
-  addsd(xmm0, xmm7);
-  shrl(rsi, 31);
-  addpd(xmm3, xmm0);
-  pshufd(xmm0, xmm3, 238);
-  addsd(xmm3, xmm0);
-  movsd(xmm4, Address(tmp, rsi, Address::times_8, 12528));
-  mulsd(xmm1, xmm3);
-  xorpd(xmm0, xmm0);
-  movl(eax, 16368);
-  shll(rsi, 15);
-  orl(eax, rsi);
-  pinsrw(xmm0, eax, 3);
-  addsd(xmm5, xmm1);
-  movl(rsi, Address(rsp, 24));
-  mulsd(xmm5, xmm4);
-  addsd(xmm0, xmm5);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_41_0_2);
-  movl(rsi, Address(rsp, 24));
-  xorpd(xmm0, xmm0);
-  movl(eax, 16368);
-  pinsrw(xmm0, eax, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_40_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 16368);
-  pinsrw(xmm0, eax, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_42_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 16368);
-  pinsrw(xmm0, eax, 3);
-  movl(edx, 26);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_2TAG_PACKET_11_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  movdqu(xmm2, xmm1);
-  pextrw(eax, xmm1, 3);
-  andl(eax, 32752);
-  cmpl(eax, 32752);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_43_0_2);
-  movdl(eax, xmm2);
-  psrlq(xmm2, 20);
-  movdl(edx, xmm2);
-  orl(eax, edx);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2);
-
-  bind(L_2TAG_PACKET_43_0_2);
-  movdl(eax, xmm1);
-  psrlq(xmm1, 32);
-  movdl(edx, xmm1);
-  movl(ecx, edx);
-  addl(edx, edx);
-  orl(eax, edx);
-  jcc(Assembler::equal, L_2TAG_PACKET_42_0_2);
-  shrl(edx, 21);
-  cmpl(edx, 1075);
-  jcc(Assembler::above, L_2TAG_PACKET_44_0_2);
-  jcc(Assembler::equal, L_2TAG_PACKET_45_0_2);
-  cmpl(edx, 1023);
-  jcc(Assembler::below, L_2TAG_PACKET_44_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  movl(eax, 17208);
-  xorpd(xmm3, xmm3);
-  pinsrw(xmm3, eax, 3);
-  movdqu(xmm4, xmm3);
-  addsd(xmm3, xmm1);
-  subsd(xmm4, xmm3);
-  addsd(xmm1, xmm4);
-  pextrw(eax, xmm1, 3);
-  andl(eax, 32752);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_44_0_2);
-  movdl(eax, xmm3);
-  andl(eax, 1);
-  jcc(Assembler::equal, L_2TAG_PACKET_44_0_2);
-
-  bind(L_2TAG_PACKET_46_0_2);
-  movsd(xmm0, Address(rsp, 8));
-  testl(ecx, INT_MIN);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_47_0_2);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_45_0_2);
-  movsd(xmm1, Address(rsp, 16));
-  movdl(eax, xmm1);
-  testl(eax, 1);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_46_0_2);
-
-  bind(L_2TAG_PACKET_44_0_2);
-  testl(ecx, INT_MIN);
-  jcc(Assembler::equal, L_2TAG_PACKET_26_0_2);
-  xorpd(xmm0, xmm0);
-
-  bind(L_2TAG_PACKET_47_0_2);
-  movl(eax, 16368);
-  xorpd(xmm1, xmm1);
-  pinsrw(xmm1, eax, 3);
-  divsd(xmm1, xmm0);
-  movdqu(xmm0, xmm1);
-  movl(edx, 27);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_2TAG_PACKET_14_0_2);
-  movsd(xmm2, Address(rsp, 8));
-  movsd(xmm6, Address(rsp, 16));
-  pextrw(eax, xmm2, 3);
-  pextrw(edx, xmm6, 3);
-  movl(ecx, 32752);
-  andl(ecx, edx);
-  cmpl(ecx, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_48_0_2);
-  andl(eax, 32752);
-  subl(eax, 16368);
-  xorl(edx, eax);
-  testl(edx, 32768);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_49_0_2);
-
-  bind(L_2TAG_PACKET_50_0_2);
-  movl(eax, 32736);
-  pinsrw(xmm0, eax, 3);
-  shrl(rsi, 16);
-  orl(eax, rsi);
-  pinsrw(xmm1, eax, 3);
-  movl(rsi, Address(rsp, 24));
-  mulsd(xmm0, xmm1);
-
-  bind(L_2TAG_PACKET_17_0_2);
-  movl(edx, 24);
-
-  bind(L_2TAG_PACKET_21_0_2);
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_6_0_2);
-
-  bind(L_2TAG_PACKET_49_0_2);
-  movl(eax, 16);
-  pinsrw(xmm0, eax, 3);
-  mulsd(xmm0, xmm0);
-  testl(rsi, INT_MIN);
-  jcc(Assembler::equal, L_2TAG_PACKET_51_0_2);
-  movsd(xmm2, Address(tmp, 12560));
-  xorpd(xmm0, xmm2);
-
-  bind(L_2TAG_PACKET_51_0_2);
-  movl(rsi, Address(rsp, 24));
-  movl(edx, 25);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_2TAG_PACKET_16_0_2);
-  pextrw(ecx, xmm5, 3);
-  pextrw(edx, xmm4, 3);
-  movl(eax, -1);
-  andl(ecx, 32752);
-  subl(ecx, 16368);
-  andl(edx, 32752);
-  addl(edx, ecx);
-  movl(ecx, -31);
-  sarl(edx, 4);
-  subl(ecx, edx);
-  jcc(Assembler::lessEqual, L_2TAG_PACKET_52_0_2);
-  cmpl(ecx, 20);
-  jcc(Assembler::above, L_2TAG_PACKET_53_0_2);
-  shll(eax);
-
-  bind(L_2TAG_PACKET_52_0_2);
-  movdl(xmm0, eax);
-  psllq(xmm0, 32);
-  pand(xmm0, xmm5);
-  subsd(xmm5, xmm0);
-  addsd(xmm5, xmm1);
-  mulsd(xmm0, xmm4);
-  mulsd(xmm5, xmm4);
-  addsd(xmm0, xmm5);
-
-  bind(L_2TAG_PACKET_53_0_2);
-  movl(edx, 25);
-  jmp(L_2TAG_PACKET_21_0_2);
-
-  bind(L_2TAG_PACKET_2_0_2);
-  movzwl(ecx, Address(rsp, 22));
-  movl(edx, INT_MIN);
-  movdl(xmm1, edx);
-  xorpd(xmm7, xmm7);
-  paddd(xmm0, xmm4);
-  psllq(xmm5, 32);
-  movdl(edx, xmm0);
-  psllq(xmm0, 29);
-  paddq(xmm1, xmm3);
-  pand(xmm5, xmm1);
-  andl(ecx, 32752);
-  cmpl(ecx, 16560);
-  jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
-  pand(xmm0, xmm6);
-  subsd(xmm3, xmm5);
-  addl(eax, 16351);
-  shrl(eax, 4);
-  subl(eax, 1022);
-  cvtsi2sdl(xmm7, eax);
-  mulpd(xmm5, xmm0);
-  movsd(xmm4, Address(tmp, 0));
-  mulsd(xmm3, xmm0);
-  movsd(xmm6, Address(tmp, 0));
-  subsd(xmm5, xmm2);
-  movsd(xmm1, Address(tmp, 8));
-  pshufd(xmm2, xmm3, 68);
-  unpcklpd(xmm5, xmm3);
-  addsd(xmm3, xmm5);
-  movsd(xmm0, Address(tmp, 8));
-  andl(edx, 16760832);
-  shrl(edx, 10);
-  addpd(xmm7, Address(tmp, edx, Address::times_1, -3616));
-  mulsd(xmm4, xmm5);
-  mulsd(xmm0, xmm5);
-  mulsd(xmm6, xmm2);
-  mulsd(xmm1, xmm2);
-  movdqu(xmm2, xmm5);
-  mulsd(xmm4, xmm5);
-  addsd(xmm5, xmm0);
-  movdqu(xmm0, xmm7);
-  addsd(xmm2, xmm3);
-  addsd(xmm7, xmm5);
-  mulsd(xmm6, xmm2);
-  subsd(xmm0, xmm7);
-  movdqu(xmm2, xmm7);
-  addsd(xmm7, xmm4);
-  addsd(xmm0, xmm5);
-  subsd(xmm2, xmm7);
-  addsd(xmm4, xmm2);
-  pshufd(xmm2, xmm5, 238);
-  movdqu(xmm5, xmm7);
-  addsd(xmm7, xmm2);
-  addsd(xmm4, xmm0);
-  movdqu(xmm0, Address(tmp, 8272));
-  subsd(xmm5, xmm7);
-  addsd(xmm6, xmm4);
-  movdqu(xmm4, xmm7);
-  addsd(xmm5, xmm2);
-  addsd(xmm7, xmm1);
-  movdqu(xmm2, Address(tmp, 8336));
-  subsd(xmm4, xmm7);
-  addsd(xmm6, xmm5);
-  addsd(xmm4, xmm1);
-  pshufd(xmm5, xmm7, 238);
-  movdqu(xmm1, xmm7);
-  addsd(xmm7, xmm5);
-  subsd(xmm1, xmm7);
-  addsd(xmm1, xmm5);
-  movdqu(xmm5, Address(tmp, 8352));
-  pshufd(xmm3, xmm3, 68);
-  addsd(xmm6, xmm4);
-  addsd(xmm6, xmm1);
-  movdqu(xmm1, Address(tmp, 8304));
-  mulpd(xmm0, xmm3);
-  mulpd(xmm2, xmm3);
-  pshufd(xmm4, xmm3, 68);
-  mulpd(xmm3, xmm3);
-  addpd(xmm0, xmm1);
-  addpd(xmm5, xmm2);
-  mulsd(xmm4, xmm3);
-  movsd(xmm2, Address(tmp, 16));
-  mulpd(xmm3, xmm3);
-  movsd(xmm1, Address(rsp, 16));
-  movzwl(ecx, Address(rsp, 22));
-  mulpd(xmm0, xmm4);
-  pextrw(eax, xmm7, 3);
-  mulpd(xmm5, xmm4);
-  mulpd(xmm0, xmm3);
-  movsd(xmm4, Address(tmp, 8376));
-  pand(xmm2, xmm7);
-  addsd(xmm5, xmm6);
-  subsd(xmm7, xmm2);
-  addpd(xmm5, xmm0);
-  andl(eax, 32752);
-  subl(eax, 16368);
-  andl(ecx, 32752);
-  cmpl(ecx, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_48_0_2);
-  addl(ecx, eax);
-  cmpl(ecx, 16576);
-  jcc(Assembler::aboveEqual, L_2TAG_PACKET_54_0_2);
-  pshufd(xmm0, xmm5, 238);
-  pand(xmm4, xmm1);
-  movdqu(xmm3, xmm1);
-  addsd(xmm5, xmm0);
-  subsd(xmm1, xmm4);
-  xorpd(xmm6, xmm6);
-  movl(edx, 17080);
-  pinsrw(xmm6, edx, 3);
-  addsd(xmm7, xmm5);
-  mulsd(xmm4, xmm2);
-  mulsd(xmm1, xmm2);
-  movdqu(xmm5, xmm6);
-  mulsd(xmm3, xmm7);
-  addsd(xmm6, xmm4);
-  addsd(xmm1, xmm3);
-  movdqu(xmm7, Address(tmp, 12480));
-  movdl(edx, xmm6);
-  subsd(xmm6, xmm5);
-  movdqu(xmm3, Address(tmp, 12496));
-  movsd(xmm2, Address(tmp, 12512));
-  subsd(xmm4, xmm6);
-  movl(ecx, edx);
-  andl(edx, 255);
-  addl(edx, edx);
-  movdqu(xmm5, Address(tmp, edx, Address::times_8, 8384));
-  addsd(xmm4, xmm1);
-  pextrw(edx, xmm6, 3);
-  shrl(ecx, 8);
-  movl(eax, ecx);
-  shrl(ecx, 1);
-  subl(eax, ecx);
-  shll(ecx, 20);
-  movdl(xmm6, ecx);
-  pshufd(xmm0, xmm4, 68);
-  pshufd(xmm1, xmm4, 68);
-  mulpd(xmm0, xmm0);
-  mulpd(xmm7, xmm1);
-  pshufd(xmm6, xmm6, 17);
-  mulsd(xmm2, xmm4);
-  andl(edx, 32767);
-  cmpl(edx, 16529);
-  jcc(Assembler::above, L_2TAG_PACKET_14_0_2);
-  mulsd(xmm0, xmm0);
-  paddd(xmm5, xmm6);
-  addpd(xmm3, xmm7);
-  mulsd(xmm2, xmm5);
-  pshufd(xmm6, xmm5, 238);
-  mulpd(xmm0, xmm3);
-  addsd(xmm2, xmm6);
-  pshufd(xmm3, xmm0, 238);
-  addl(eax, 1023);
-  shll(eax, 20);
-  orl(eax, rsi);
-  movdl(xmm4, eax);
-  mulsd(xmm0, xmm5);
-  mulsd(xmm3, xmm5);
-  addsd(xmm0, xmm2);
-  psllq(xmm4, 32);
-  addsd(xmm0, xmm3);
-  movdqu(xmm1, xmm0);
-  addsd(xmm0, xmm5);
-  movl(rsi, Address(rsp, 24));
-  mulsd(xmm0, xmm4);
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_16_0_2);
-  cmpl(eax, 32752);
-  jcc(Assembler::equal, L_2TAG_PACKET_17_0_2);
-
-  bind(L_2TAG_PACKET_55_0_2);
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_6_0_2);
-
-  bind(L_2TAG_PACKET_48_0_2);
-  movl(rsi, Address(rsp, 24));
-
-  bind(L_2TAG_PACKET_56_0_2);
-  movsd(xmm0, Address(rsp, 8));
-  movsd(xmm1, Address(rsp, 16));
-  addsd(xmm1, xmm1);
-  xorpd(xmm2, xmm2);
-  movl(eax, 49136);
-  pinsrw(xmm2, eax, 3);
-  addsd(xmm2, xmm0);
-  pextrw(eax, xmm2, 3);
-  cmpl(eax, 0);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_57_0_2);
-  xorpd(xmm0, xmm0);
-  movl(eax, 32760);
-  pinsrw(xmm0, eax, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_57_0_2);
-  movdl(edx, xmm1);
-  movdqu(xmm3, xmm1);
-  psrlq(xmm3, 20);
-  movdl(ecx, xmm3);
-  orl(ecx, edx);
-  jcc(Assembler::equal, L_2TAG_PACKET_58_0_2);
-  addsd(xmm1, xmm1);
-  movdqu(xmm0, xmm1);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_58_0_2);
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32752);
-  pextrw(edx, xmm1, 3);
-  xorpd(xmm0, xmm0);
-  subl(eax, 16368);
-  xorl(eax, edx);
-  testl(eax, 32768);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2);
-  movl(edx, 32752);
-  pinsrw(xmm0, edx, 3);
-  jmp(L_2TAG_PACKET_18_0_2);
-
-  bind(L_2TAG_PACKET_54_0_2);
-  pextrw(eax, xmm1, 3);
-  pextrw(ecx, xmm2, 3);
-  xorl(eax, ecx);
-  testl(eax, 32768);
-  jcc(Assembler::equal, L_2TAG_PACKET_50_0_2);
-  jmp(L_2TAG_PACKET_49_0_2);
-
-  bind(L_2TAG_PACKET_6_0_2);
-  movl(tmp, Address(rsp, 64));
-
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp
deleted file mode 100644
index cd593ba3356..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp
+++ /dev/null
@@ -1,1742 +0,0 @@
-/*
-* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
-* Intel Math Library (LIBM) Source Code
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*
-*/
-
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "stubRoutines_x86.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/******************************************************************************/
-//                     ALGORITHM DESCRIPTION - SIN()
-//                     ---------------------
-//
-//     1. RANGE REDUCTION
-//
-//     We perform an initial range reduction from X to r with
-//
-//          X =~= N * pi/32 + r
-//
-//     so that |r| <= pi/64 + epsilon. We restrict inputs to those
-//     where |N| <= 932560. Beyond this, the range reduction is
-//     insufficiently accurate. For extremely small inputs,
-//     denormalization can occur internally, impacting performance.
-//     This means that the main path is actually only taken for
-//     2^-252 <= |X| < 90112.
-//
-//     To avoid branches, we perform the range reduction to full
-//     accuracy each time.
-//
-//          X - N * (P_1 + P_2 + P_3)
-//
-//     where P_1 and P_2 are 32-bit numbers (so multiplication by N
-//     is exact) and P_3 is a 53-bit number. Together, these
-//     approximate pi well enough for all cases in the restricted
-//     range.
-//
-//     The main reduction sequence is:
-//
-//             y = 32/pi * x
-//             N = integer(y)
-//     (computed by adding and subtracting off SHIFTER)
-//
-//             m_1 = N * P_1
-//             m_2 = N * P_2
-//             r_1 = x - m_1
-//             r = r_1 - m_2
-//     (this r can be used for most of the calculation)
-//
-//             c_1 = r_1 - r
-//             m_3 = N * P_3
-//             c_2 = c_1 - m_2
-//             c = c_2 - m_3
-//
-//     2. MAIN ALGORITHM
-//
-//     The algorithm uses a table lookup based on B = M * pi / 32
-//     where M = N mod 64. The stored values are:
-//       sigma             closest power of 2 to cos(B)
-//       C_hl              53-bit cos(B) - sigma
-//       S_hi + S_lo       2 * 53-bit sin(B)
-//
-//     The computation is organized as follows:
-//
-//          sin(B + r + c) = [sin(B) + sigma * r] +
-//                           r * (cos(B) - sigma) +
-//                           sin(B) * [cos(r + c) - 1] +
-//                           cos(B) * [sin(r + c) - r]
-//
-//     which is approximately:
-//
-//          [S_hi + sigma * r] +
-//          C_hl * r +
-//          S_lo + S_hi * [(cos(r) - 1) - r * c] +
-//          (C_hl + sigma) * [(sin(r) - r) + c]
-//
-//     and this is what is actually computed. We separate this sum
-//     into four parts:
-//
-//          hi + med + pols + corr
-//
-//     where
-//
-//          hi       = S_hi + sigma r
-//          med      = C_hl * r
-//          pols     = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
-//          corr     = S_lo + c * ((C_hl + sigma) - S_hi * r)
-//
-//     3. POLYNOMIAL
-//
-//     The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
-//     (sin(r) - r) can be rearranged freely, since it is quite
-//     small, so we exploit parallelism to the fullest.
-//
-//          psc4       =   SC_4 * r_1
-//          msc4       =   psc4 * r
-//          r2         =   r * r
-//          msc2       =   SC_2 * r2
-//          r4         =   r2 * r2
-//          psc3       =   SC_3 + msc4
-//          psc1       =   SC_1 + msc2
-//          msc3       =   r4 * psc3
-//          sincospols =   psc1 + msc3
-//          pols       =   sincospols *
-//                         <S_hi * r^2 | (C_hl + sigma) * r^3>
-//
-//     4. CORRECTION TERM
-//
-//     This is where the "c" component of the range reduction is
-//     taken into account; recall that just "r" is used for most of
-//     the calculation.
-//
-//          -c   = m_3 - c_2
-//          -d   = S_hi * r - (C_hl + sigma)
-//          corr = -c * -d + S_lo
-//
-//     5. COMPENSATED SUMMATIONS
-//
-//     The two successive compensated summations add up the high
-//     and medium parts, leaving just the low parts to add up at
-//     the end.
-//
-//          rs        =  sigma * r
-//          res_int   =  S_hi + rs
-//          k_0       =  S_hi - res_int
-//          k_2       =  k_0 + rs
-//          med       =  C_hl * r
-//          res_hi    =  res_int + med
-//          k_1       =  res_int - res_hi
-//          k_3       =  k_1 + med
-//
-//     6. FINAL SUMMATION
-//
-//     We now add up all the small parts:
-//
-//          res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
-//
-//     Now the overall result is just:
-//
-//          res_hi + res_lo
-//
-//     7. SMALL ARGUMENTS
-//
-//     If |x| < SNN (SNN meaning the smallest normal number), we
-//     simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we
-//     do 2^-55 * (2^55 * x - x).
-//
-// Special cases:
-//  sin(NaN) = quiet NaN, and raise invalid exception
-//  sin(INF) = NaN and raise invalid exception
-//  sin(+/-0) = +/-0
-//
-/******************************************************************************/
-
-// The 32 bit code is at most SSE2 compliant
-ATTRIBUTE_ALIGNED(8) static const juint _zero_none[] =
-{
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint __4onpi_d[] =
-{
-    0x6dc9c883UL, 0x3ff45f30UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint _TWO_32H[] =
-{
-    0x00000000UL, 0x41f80000UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint _pi04_3d[] =
-{
-    0x54442d00UL, 0x3fe921fbUL, 0x98cc5180UL, 0x3ce84698UL, 0xcbb5bf6cUL,
-    0xb9dfc8f8UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint _pi04_5d[] =
-{
-    0x54400000UL, 0x3fe921fbUL, 0x1a600000UL, 0x3dc0b461UL, 0x2e000000UL,
-    0x3b93198aUL, 0x25200000UL, 0x396b839aUL, 0x533e63a0UL, 0x37027044UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint _SCALE[] =
-{
-    0x00000000UL, 0x32600000UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint _zeros[] =
-{
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint _pi04_2d[] =
-{
-    0x54400000UL, 0x3fe921fbUL, 0x1a626331UL, 0x3dc0b461UL
-};
-
-ATTRIBUTE_ALIGNED(4) static const juint _TWO_12H[] =
-{
-    0x00000000UL, 0x40b80000UL
-};
-
-ATTRIBUTE_ALIGNED(2) static const jushort __4onpi_31l[] =
-{
-    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x836e, 0xa2f9,
-    0x40d8, 0x0000, 0x0000, 0x0000, 0x2a50, 0x9c88, 0x40b7, 0x0000, 0x0000, 0x0000,
-    0xabe8, 0xfe13, 0x4099, 0x0000, 0x0000, 0x0000, 0x6ee0, 0xfa9a, 0x4079, 0x0000,
-    0x0000, 0x0000, 0x9580, 0xdb62, 0x4058, 0x0000, 0x0000, 0x0000, 0x1c82, 0xc9e2,
-    0x403d, 0x0000, 0x0000, 0x0000, 0xb1c0, 0xff28, 0x4019, 0x0000, 0x0000, 0x0000,
-    0xef14, 0xaf7a, 0x3ffe, 0x0000, 0x0000, 0x0000, 0x48dc, 0xc36e, 0x3fdf, 0x0000,
-    0x0000, 0x0000, 0x3740, 0xe909, 0x3fbe, 0x0000, 0x0000, 0x0000, 0x924a, 0xb801,
-    0x3fa2, 0x0000, 0x0000, 0x0000, 0x3a32, 0xdd41, 0x3f83, 0x0000, 0x0000, 0x0000,
-    0x8778, 0x873f, 0x3f62, 0x0000, 0x0000, 0x0000, 0x1298, 0xb1cb, 0x3f44, 0x0000,
-    0x0000, 0x0000, 0xa208, 0x9cfb, 0x3f26, 0x0000, 0x0000, 0x0000, 0xbaec, 0xd7d4,
-    0x3f06, 0x0000, 0x0000, 0x0000, 0xd338, 0x8909, 0x3ee7, 0x0000, 0x0000, 0x0000,
-    0x68b8, 0xe04d, 0x3ec7, 0x0000, 0x0000, 0x0000, 0x4e64, 0xdf90, 0x3eaa, 0x0000,
-    0x0000, 0x0000, 0xc1a8, 0xeb1c, 0x3e89, 0x0000, 0x0000, 0x0000, 0x2720, 0xce7d,
-    0x3e6a, 0x0000, 0x0000, 0x0000, 0x77b8, 0x8bf1, 0x3e4b, 0x0000, 0x0000, 0x0000,
-    0xec7e, 0xe4a0, 0x3e2e, 0x0000, 0x0000, 0x0000, 0xffbc, 0xf12f, 0x3e0f, 0x0000,
-    0x0000, 0x0000, 0xfdc0, 0xb301, 0x3deb, 0x0000, 0x0000, 0x0000, 0xc5ac, 0x9788,
-    0x3dd1, 0x0000, 0x0000, 0x0000, 0x47da, 0x829b, 0x3db2, 0x0000, 0x0000, 0x0000,
-    0xd9e4, 0xa6cf, 0x3d93, 0x0000, 0x0000, 0x0000, 0x36e8, 0xf961, 0x3d73, 0x0000,
-    0x0000, 0x0000, 0xf668, 0xf463, 0x3d54, 0x0000, 0x0000, 0x0000, 0x5168, 0xf2ff,
-    0x3d35, 0x0000, 0x0000, 0x0000, 0x758e, 0xea4f, 0x3d17, 0x0000, 0x0000, 0x0000,
-    0xf17a, 0xebe5, 0x3cf8, 0x0000, 0x0000, 0x0000, 0x9cfa, 0x9e83, 0x3cd9, 0x0000,
-    0x0000, 0x0000, 0xa4ba, 0xe294, 0x3cba, 0x0000, 0x0000, 0x0000, 0xd7ec, 0x9afe,
-    0x3c9a, 0x0000, 0x0000, 0x0000, 0xae80, 0x8fc6, 0x3c79, 0x0000, 0x0000, 0x0000,
-    0x3304, 0x8560, 0x3c5c, 0x0000, 0x0000, 0x0000, 0x6d70, 0xdf8f, 0x3c3b, 0x0000,
-    0x0000, 0x0000, 0x3ef0, 0xafc3, 0x3c1e, 0x0000, 0x0000, 0x0000, 0xd0d8, 0x826b,
-    0x3bfe, 0x0000, 0x0000, 0x0000, 0x1c80, 0xed4f, 0x3bdd, 0x0000, 0x0000, 0x0000,
-    0x730c, 0xb0af, 0x3bc1, 0x0000, 0x0000, 0x0000, 0x6660, 0xc219, 0x3ba2, 0x0000,
-    0x0000, 0x0000, 0x940c, 0xabe2, 0x3b83, 0x0000, 0x0000, 0x0000, 0xdffc, 0x8408,
-    0x3b64, 0x0000, 0x0000, 0x0000, 0x6b98, 0xc402, 0x3b45, 0x0000, 0x0000, 0x0000,
-    0x1818, 0x9cc4, 0x3b26, 0x0000, 0x0000, 0x0000, 0x5390, 0xaab6, 0x3b05, 0x0000,
-    0x0000, 0x0000, 0xb070, 0xd464, 0x3ae9, 0x0000, 0x0000, 0x0000, 0x231a, 0x9ef0,
-    0x3aca, 0x0000, 0x0000, 0x0000, 0x0670, 0xd1f1, 0x3aaa, 0x0000, 0x0000, 0x0000,
-    0x7738, 0xd9f3, 0x3a8a, 0x0000, 0x0000, 0x0000, 0xa834, 0x8092, 0x3a6c, 0x0000,
-    0x0000, 0x0000, 0xb45c, 0xce23, 0x3a4d, 0x0000, 0x0000, 0x0000, 0x36e8, 0xb0e5,
-    0x3a2d, 0x0000, 0x0000, 0x0000, 0xd156, 0xaf44, 0x3a10, 0x0000, 0x0000, 0x0000,
-    0x9f52, 0x8c82, 0x39f1, 0x0000, 0x0000, 0x0000, 0x829c, 0xff83, 0x39d1, 0x0000,
-    0x0000, 0x0000, 0x7d06, 0xefc6, 0x39b3, 0x0000, 0x0000, 0x0000, 0x93e0, 0xb0b7,
-    0x3992, 0x0000, 0x0000, 0x0000, 0xedde, 0xc193, 0x3975, 0x0000, 0x0000, 0x0000,
-    0xbbc0, 0xcf49, 0x3952, 0x0000, 0x0000, 0x0000, 0xbdf0, 0xd63c, 0x3937, 0x0000,
-    0x0000, 0x0000, 0x1f34, 0x9f3a, 0x3918, 0x0000, 0x0000, 0x0000, 0x3f8e, 0xe579,
-    0x38f9, 0x0000, 0x0000, 0x0000, 0x90c8, 0xc3f8, 0x38d9, 0x0000, 0x0000, 0x0000,
-    0x48c0, 0xf8f8, 0x38b7, 0x0000, 0x0000, 0x0000, 0xed56, 0xafa6, 0x389c, 0x0000,
-    0x0000, 0x0000, 0x8218, 0xb969, 0x387d, 0x0000, 0x0000, 0x0000, 0x1852, 0xec57,
-    0x385e, 0x0000, 0x0000, 0x0000, 0x670c, 0xd674, 0x383e, 0x0000, 0x0000, 0x0000,
-    0xad40, 0xc2c4, 0x3820, 0x0000, 0x0000, 0x0000, 0x2e80, 0xa696, 0x3801, 0x0000,
-    0x0000, 0x0000, 0xd800, 0xc467, 0x37dc, 0x0000, 0x0000, 0x0000, 0x3c72, 0xc5ae,
-    0x37c3, 0x0000, 0x0000, 0x0000, 0xb006, 0xac69, 0x37a4, 0x0000, 0x0000, 0x0000,
-    0x34a0, 0x8cdf, 0x3782, 0x0000, 0x0000, 0x0000, 0x9ed2, 0xd25e, 0x3766, 0x0000,
-    0x0000, 0x0000, 0x6fec, 0xaaaa, 0x3747, 0x0000, 0x0000, 0x0000, 0x6040, 0xfb5c,
-    0x3726, 0x0000, 0x0000, 0x0000, 0x764c, 0xa3fc, 0x3708, 0x0000, 0x0000, 0x0000,
-    0xb254, 0x954e, 0x36e9, 0x0000, 0x0000, 0x0000, 0x3e1c, 0xf5dc, 0x36ca, 0x0000,
-    0x0000, 0x0000, 0x7b06, 0xc635, 0x36ac, 0x0000, 0x0000, 0x0000, 0xa8ba, 0xd738,
-    0x368d, 0x0000, 0x0000, 0x0000, 0x06cc, 0xb24e, 0x366d, 0x0000, 0x0000, 0x0000,
-    0x7108, 0xac76, 0x364f, 0x0000, 0x0000, 0x0000, 0x2324, 0xa7cb, 0x3630, 0x0000,
-    0x0000, 0x0000, 0xac40, 0xef15, 0x360f, 0x0000, 0x0000, 0x0000, 0xae46, 0xd516,
-    0x35f2, 0x0000, 0x0000, 0x0000, 0x615e, 0xe003, 0x35d3, 0x0000, 0x0000, 0x0000,
-    0x0cf0, 0xefe7, 0x35b1, 0x0000, 0x0000, 0x0000, 0xfb50, 0xf98c, 0x3595, 0x0000,
-    0x0000, 0x0000, 0x0abc, 0xf333, 0x3575, 0x0000, 0x0000, 0x0000, 0xdd60, 0xca3f,
-    0x3555, 0x0000, 0x0000, 0x0000, 0x7eb6, 0xd87f, 0x3538, 0x0000, 0x0000, 0x0000,
-    0x44f4, 0xb291, 0x3519, 0x0000, 0x0000, 0x0000, 0xff80, 0xc982, 0x34f6, 0x0000,
-    0x0000, 0x0000, 0x9de0, 0xd9b8, 0x34db, 0x0000, 0x0000, 0x0000, 0xcd42, 0x9366,
-    0x34bc, 0x0000, 0x0000, 0x0000, 0xbef0, 0xfaee, 0x349d, 0x0000, 0x0000, 0x0000,
-    0xdac4, 0xb6f1, 0x347d, 0x0000, 0x0000, 0x0000, 0xf140, 0x94de, 0x345d, 0x0000,
-    0x0000, 0x0000, 0xa218, 0x8b4b, 0x343e, 0x0000, 0x0000, 0x0000, 0x6380, 0xa135,
-    0x341e, 0x0000, 0x0000, 0x0000, 0xb184, 0x8cb2, 0x3402, 0x0000, 0x0000, 0x0000,
-    0x196e, 0xdc61, 0x33e3, 0x0000, 0x0000, 0x0000, 0x0c00, 0xde05, 0x33c4, 0x0000,
-    0x0000, 0x0000, 0xef9a, 0xbd38, 0x33a5, 0x0000, 0x0000, 0x0000, 0xc1a0, 0xdf00,
-    0x3385, 0x0000, 0x0000, 0x0000, 0x1090, 0x9973, 0x3365, 0x0000, 0x0000, 0x0000,
-    0x4882, 0x8301, 0x3348, 0x0000, 0x0000, 0x0000, 0x7abe, 0xadc7, 0x3329, 0x0000,
-    0x0000, 0x0000, 0x7cba, 0xec2b, 0x330a, 0x0000, 0x0000, 0x0000, 0xa520, 0x8f21,
-    0x32e9, 0x0000, 0x0000, 0x0000, 0x710c, 0x8d36, 0x32cc, 0x0000, 0x0000, 0x0000,
-    0x5212, 0xc6ed, 0x32ad, 0x0000, 0x0000, 0x0000, 0x7308, 0xfd76, 0x328d, 0x0000,
-    0x0000, 0x0000, 0x5014, 0xd548, 0x326f, 0x0000, 0x0000, 0x0000, 0xd3f2, 0xb499,
-    0x3250, 0x0000, 0x0000, 0x0000, 0x7f74, 0xa606, 0x3230, 0x0000, 0x0000, 0x0000,
-    0xf0a8, 0xd720, 0x3212, 0x0000, 0x0000, 0x0000, 0x185c, 0xe20f, 0x31f2, 0x0000,
-    0x0000, 0x0000, 0xa5a8, 0x8738, 0x31d4, 0x0000, 0x0000, 0x0000, 0xdd74, 0xcafb,
-    0x31b4, 0x0000, 0x0000, 0x0000, 0x98b6, 0xbd8e, 0x3196, 0x0000, 0x0000, 0x0000,
-    0xe9de, 0x977f, 0x3177, 0x0000, 0x0000, 0x0000, 0x67c0, 0x818d, 0x3158, 0x0000,
-    0x0000, 0x0000, 0xe52a, 0x9322, 0x3139, 0x0000, 0x0000, 0x0000, 0xe568, 0x9b6c,
-    0x3119, 0x0000, 0x0000, 0x0000, 0x2358, 0xaa0a, 0x30fa, 0x0000, 0x0000, 0x0000,
-    0xe480, 0xe13b, 0x30d9, 0x0000, 0x0000, 0x0000, 0x3024, 0x90a1, 0x30bd, 0x0000,
-    0x0000, 0x0000, 0x9620, 0xda30, 0x309d, 0x0000, 0x0000, 0x0000, 0x898a, 0xb388,
-    0x307f, 0x0000, 0x0000, 0x0000, 0xb24c, 0xc891, 0x3060, 0x0000, 0x0000, 0x0000,
-    0x8056, 0xf98b, 0x3041, 0x0000, 0x0000, 0x0000, 0x72a4, 0xa1ea, 0x3021, 0x0000,
-    0x0000, 0x0000, 0x6af8, 0x9488, 0x3001, 0x0000, 0x0000, 0x0000, 0xe00c, 0xdfcb,
-    0x2fe4, 0x0000, 0x0000, 0x0000, 0xeeec, 0xc941, 0x2fc4, 0x0000, 0x0000, 0x0000,
-    0x53e0, 0xe70f, 0x2fa4, 0x0000, 0x0000, 0x0000, 0x8f60, 0x9c07, 0x2f85, 0x0000,
-    0x0000, 0x0000, 0xb328, 0xc3e7, 0x2f68, 0x0000, 0x0000, 0x0000, 0x9404, 0xf8c7,
-    0x2f48, 0x0000, 0x0000, 0x0000, 0x38e0, 0xc99f, 0x2f29, 0x0000, 0x0000, 0x0000,
-    0x9778, 0xd984, 0x2f09, 0x0000, 0x0000, 0x0000, 0xe700, 0xd142, 0x2eea, 0x0000,
-    0x0000, 0x0000, 0xd904, 0x9443, 0x2ecd, 0x0000, 0x0000, 0x0000, 0xd4ba, 0xae7e,
-    0x2eae, 0x0000, 0x0000, 0x0000, 0x8e5e, 0x8524, 0x2e8f, 0x0000, 0x0000, 0x0000,
-    0xb550, 0xc9ed, 0x2e6e, 0x0000, 0x0000, 0x0000, 0x53b8, 0x8648, 0x2e51, 0x0000,
-    0x0000, 0x0000, 0xdae4, 0x87f9, 0x2e32, 0x0000, 0x0000, 0x0000, 0x2942, 0xd966,
-    0x2e13, 0x0000, 0x0000, 0x0000, 0x4f28, 0xcf3c, 0x2df3, 0x0000, 0x0000, 0x0000,
-    0xfa40, 0xc4ef, 0x2dd1, 0x0000, 0x0000, 0x0000, 0x4424, 0xbca7, 0x2db5, 0x0000,
-    0x0000, 0x0000, 0x2e62, 0xcdc5, 0x2d97, 0x0000, 0x0000, 0x0000, 0xed88, 0x996b,
-    0x2d78, 0x0000, 0x0000, 0x0000, 0x7c30, 0xd97d, 0x2d56, 0x0000, 0x0000, 0x0000,
-    0xed26, 0xbf6e, 0x2d3a, 0x0000, 0x0000, 0x0000, 0x2918, 0x921b, 0x2d1a, 0x0000,
-    0x0000, 0x0000, 0x4e24, 0xe84e, 0x2cfb, 0x0000, 0x0000, 0x0000, 0x6dc0, 0x92ec,
-    0x2cdd, 0x0000, 0x0000, 0x0000, 0x4f2c, 0xacf8, 0x2cbd, 0x0000, 0x0000, 0x0000,
-    0xc634, 0xf094, 0x2c9e, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe5d3, 0x2c7e, 0x0000,
-    0x0000, 0x0000, 0x2180, 0xa600, 0x2c5b, 0x0000, 0x0000, 0x0000, 0x8480, 0xd680,
-    0x2c3c, 0x0000, 0x0000, 0x0000, 0x8b24, 0xd63b, 0x2c22, 0x0000, 0x0000, 0x0000,
-    0x02e0, 0xaa47, 0x2c00, 0x0000, 0x0000, 0x0000, 0x9ad0, 0xee84, 0x2be3, 0x0000,
-    0x0000, 0x0000, 0xf7dc, 0xf699, 0x2bc6, 0x0000, 0x0000, 0x0000, 0xddde, 0xe490,
-    0x2ba7, 0x0000, 0x0000, 0x0000, 0x34a0, 0xb4fd, 0x2b85, 0x0000, 0x0000, 0x0000,
-    0x91b4, 0x8ef6, 0x2b68, 0x0000, 0x0000, 0x0000, 0xa3e0, 0xa2a7, 0x2b47, 0x0000,
-    0x0000, 0x0000, 0xcce4, 0x82b3, 0x2b2a, 0x0000, 0x0000, 0x0000, 0xe4be, 0x8207,
-    0x2b0c, 0x0000, 0x0000, 0x0000, 0x1d92, 0xab43, 0x2aed, 0x0000, 0x0000, 0x0000,
-    0xe818, 0xf9f6, 0x2acd, 0x0000, 0x0000, 0x0000, 0xff12, 0xba80, 0x2aaf, 0x0000,
-    0x0000, 0x0000, 0x5254, 0x8529, 0x2a90, 0x0000, 0x0000, 0x0000, 0x1b88, 0xe032,
-    0x2a71, 0x0000, 0x0000, 0x0000, 0x3248, 0xd86d, 0x2a50, 0x0000, 0x0000, 0x0000,
-    0x3140, 0xc9d5, 0x2a2e, 0x0000, 0x0000, 0x0000, 0x14e6, 0xbd47, 0x2a14, 0x0000,
-    0x0000, 0x0000, 0x5c10, 0xe544, 0x29f4, 0x0000, 0x0000, 0x0000, 0x9f50, 0x90b6,
-    0x29d4, 0x0000, 0x0000, 0x0000, 0x9850, 0xab55, 0x29b6, 0x0000, 0x0000, 0x0000,
-    0x2750, 0x9d07, 0x2998, 0x0000, 0x0000, 0x0000, 0x6700, 0x8bbb, 0x2973, 0x0000,
-    0x0000, 0x0000, 0x5dba, 0xed31, 0x295a, 0x0000, 0x0000, 0x0000, 0x61dc, 0x85fe,
-    0x293a, 0x0000, 0x0000, 0x0000, 0x9ba2, 0xd6b4, 0x291c, 0x0000, 0x0000, 0x0000,
-    0x2d30, 0xe3a5, 0x28fb, 0x0000, 0x0000, 0x0000, 0x6630, 0xb566, 0x28dd, 0x0000,
-    0x0000, 0x0000, 0x5ad4, 0xa829, 0x28bf, 0x0000, 0x0000, 0x0000, 0x89d8, 0xe290,
-    0x28a0, 0x0000, 0x0000, 0x0000, 0x3916, 0xc428, 0x2881, 0x0000, 0x0000, 0x0000,
-    0x0490, 0xbea4, 0x2860, 0x0000, 0x0000, 0x0000, 0xee06, 0x80ee, 0x2843, 0x0000,
-    0x0000, 0x0000, 0xfc00, 0xf327, 0x2820, 0x0000, 0x0000, 0x0000, 0xea40, 0xa871,
-    0x2800, 0x0000, 0x0000, 0x0000, 0x63d8, 0x9c26, 0x27e4, 0x0000, 0x0000, 0x0000,
-    0x07ba, 0xc0c9, 0x27c7, 0x0000, 0x0000, 0x0000, 0x3fa2, 0x9797, 0x27a8, 0x0000,
-    0x0000, 0x0000, 0x21c6, 0xfeca, 0x2789, 0x0000, 0x0000, 0x0000, 0xde40, 0x860d,
-    0x2768, 0x0000, 0x0000, 0x0000, 0x9cc8, 0x98ce, 0x2749, 0x0000, 0x0000, 0x0000,
-    0x3778, 0xa31c, 0x272a, 0x0000, 0x0000, 0x0000, 0xe778, 0xf6e2, 0x270b, 0x0000,
-    0x0000, 0x0000, 0x59b8, 0xf841, 0x26ed, 0x0000, 0x0000, 0x0000, 0x02e0, 0xad04,
-    0x26cd, 0x0000, 0x0000, 0x0000, 0x5a92, 0x9380, 0x26b0, 0x0000, 0x0000, 0x0000,
-    0xc740, 0x8886, 0x268d, 0x0000, 0x0000, 0x0000, 0x0680, 0xfaf8, 0x266c, 0x0000,
-    0x0000, 0x0000, 0xfb60, 0x897f, 0x2653, 0x0000, 0x0000, 0x0000, 0x8760, 0xf903,
-    0x2634, 0x0000, 0x0000, 0x0000, 0xad2a, 0xc2c8, 0x2615, 0x0000, 0x0000, 0x0000,
-    0x2d86, 0x8aef, 0x25f6, 0x0000, 0x0000, 0x0000, 0x1ef4, 0xe627, 0x25d6, 0x0000,
-    0x0000, 0x0000, 0x09e4, 0x8020, 0x25b7, 0x0000, 0x0000, 0x0000, 0x7548, 0xd227,
-    0x2598, 0x0000, 0x0000, 0x0000, 0x75dc, 0xfb5b, 0x2579, 0x0000, 0x0000, 0x0000,
-    0xea84, 0xc8b6, 0x255a, 0x0000, 0x0000, 0x0000, 0xe4d0, 0x8145, 0x253b, 0x0000,
-    0x0000, 0x0000, 0x3640, 0x9768, 0x251c, 0x0000, 0x0000, 0x0000, 0x246a, 0xccec,
-    0x24fe, 0x0000, 0x0000, 0x0000, 0x51d0, 0xa075, 0x24dd, 0x0000, 0x0000, 0x0000,
-    0x4638, 0xa385, 0x24bf, 0x0000, 0x0000, 0x0000, 0xd788, 0xd776, 0x24a1, 0x0000,
-    0x0000, 0x0000, 0x1370, 0x8997, 0x2482, 0x0000, 0x0000, 0x0000, 0x1e88, 0x9b67,
-    0x2462, 0x0000, 0x0000, 0x0000, 0x6c08, 0xd975, 0x2444, 0x0000, 0x0000, 0x0000,
-    0xfdb0, 0xcfc0, 0x2422, 0x0000, 0x0000, 0x0000, 0x3100, 0xc026, 0x2406, 0x0000,
-    0x0000, 0x0000, 0xc5b4, 0xae64, 0x23e6, 0x0000, 0x0000, 0x0000, 0x2280, 0xf687,
-    0x23c3, 0x0000, 0x0000, 0x0000, 0x2de0, 0x9006, 0x23a9, 0x0000, 0x0000, 0x0000,
-    0x24bc, 0xf631, 0x238a, 0x0000, 0x0000, 0x0000, 0xb8d4, 0xa975, 0x236b, 0x0000,
-    0x0000, 0x0000, 0xd9a4, 0xb949, 0x234b, 0x0000, 0x0000, 0x0000, 0xb54e, 0xbd39,
-    0x232d, 0x0000, 0x0000, 0x0000, 0x4aac, 0x9a52, 0x230e, 0x0000, 0x0000, 0x0000,
-    0xbbbc, 0xd085, 0x22ef, 0x0000, 0x0000, 0x0000, 0xdf18, 0xc633, 0x22cf, 0x0000,
-    0x0000, 0x0000, 0x16d0, 0xeca5, 0x22af, 0x0000, 0x0000, 0x0000, 0xf2a0, 0xdf6f,
-    0x228e, 0x0000, 0x0000, 0x0000, 0x8c44, 0xe86b, 0x2272, 0x0000, 0x0000, 0x0000,
-    0x35c0, 0xbbf4, 0x2253, 0x0000, 0x0000, 0x0000, 0x0c40, 0xdafb, 0x2230, 0x0000,
-    0x0000, 0x0000, 0x92dc, 0x9935, 0x2216, 0x0000, 0x0000, 0x0000, 0x0ca0, 0xbda6,
-    0x21f3, 0x0000, 0x0000, 0x0000, 0x5958, 0xa6fd, 0x21d6, 0x0000, 0x0000, 0x0000,
-    0xa3dc, 0x9d7f, 0x21b9, 0x0000, 0x0000, 0x0000, 0x79dc, 0xfcb5, 0x2199, 0x0000,
-    0x0000, 0x0000, 0xf264, 0xcebb, 0x217b, 0x0000, 0x0000, 0x0000, 0x0abe, 0x8308,
-    0x215c, 0x0000, 0x0000, 0x0000, 0x30ae, 0xb463, 0x213d, 0x0000, 0x0000, 0x0000,
-    0x6228, 0xb040, 0x211c, 0x0000, 0x0000, 0x0000, 0xc9b2, 0xf43b, 0x20ff, 0x0000,
-    0x0000, 0x0000, 0x3d8e, 0xa4b3, 0x20e0, 0x0000, 0x0000, 0x0000, 0x84e6, 0x8dab,
-    0x20c1, 0x0000, 0x0000, 0x0000, 0xa124, 0x9b74, 0x20a1, 0x0000, 0x0000, 0x0000,
-    0xc276, 0xd497, 0x2083, 0x0000, 0x0000, 0x0000, 0x6354, 0xa466, 0x2063, 0x0000,
-    0x0000, 0x0000, 0x8654, 0xaf0a, 0x2044, 0x0000, 0x0000, 0x0000, 0x1d20, 0xfa5c,
-    0x2024, 0x0000, 0x0000, 0x0000, 0xbcd0, 0xf3f0, 0x2004, 0x0000, 0x0000, 0x0000,
-    0xedf0, 0xf0b6, 0x1fe7, 0x0000, 0x0000, 0x0000, 0x45bc, 0x9182, 0x1fc9, 0x0000,
-    0x0000, 0x0000, 0xe254, 0xdc85, 0x1faa, 0x0000, 0x0000, 0x0000, 0xb898, 0xe9b1,
-    0x1f8a, 0x0000, 0x0000, 0x0000, 0x0ebe, 0xe6f0, 0x1f6c, 0x0000, 0x0000, 0x0000,
-    0xa9b8, 0xf584, 0x1f4c, 0x0000, 0x0000, 0x0000, 0x12e8, 0xdf6b, 0x1f2e, 0x0000,
-    0x0000, 0x0000, 0x9f9e, 0xcd55, 0x1f0f, 0x0000, 0x0000, 0x0000, 0x05a0, 0xec3a,
-    0x1eef, 0x0000, 0x0000, 0x0000, 0xd8e0, 0x96f8, 0x1ed1, 0x0000, 0x0000, 0x0000,
-    0x3bd4, 0xccc6, 0x1eb1, 0x0000, 0x0000, 0x0000, 0x4910, 0xb87b, 0x1e93, 0x0000,
-    0x0000, 0x0000, 0xbefc, 0xd40b, 0x1e73, 0x0000, 0x0000, 0x0000, 0x317e, 0xa406,
-    0x1e55, 0x0000, 0x0000, 0x0000, 0x6bb2, 0xc2b2, 0x1e36, 0x0000, 0x0000, 0x0000,
-    0xb87e, 0xbb78, 0x1e17, 0x0000, 0x0000, 0x0000, 0xa03c, 0xdbbd, 0x1df7, 0x0000,
-    0x0000, 0x0000, 0x5b6c, 0xe3c8, 0x1dd9, 0x0000, 0x0000, 0x0000, 0x8968, 0xca8e,
-    0x1dba, 0x0000, 0x0000, 0x0000, 0xc024, 0xe6ab, 0x1d9a, 0x0000, 0x0000, 0x0000,
-    0x4110, 0xd4eb, 0x1d7a, 0x0000, 0x0000, 0x0000, 0xa168, 0xbdb5, 0x1d5d, 0x0000,
-    0x0000, 0x0000, 0x012e, 0xa5fa, 0x1d3e, 0x0000, 0x0000, 0x0000, 0x6838, 0x9c1f,
-    0x1d1e, 0x0000, 0x0000, 0x0000, 0xa158, 0xaa76, 0x1d00, 0x0000, 0x0000, 0x0000,
-    0x090a, 0xbd95, 0x1ce1, 0x0000, 0x0000, 0x0000, 0xf73e, 0x8b6d, 0x1cc2, 0x0000,
-    0x0000, 0x0000, 0x5fda, 0xbcbf, 0x1ca3, 0x0000, 0x0000, 0x0000, 0xdbe8, 0xb89f,
-    0x1c84, 0x0000, 0x0000, 0x0000, 0x6e4c, 0x96c7, 0x1c64, 0x0000, 0x0000, 0x0000,
-    0x19c2, 0xf2a4, 0x1c46, 0x0000, 0x0000, 0x0000, 0xb800, 0xf855, 0x1c1e, 0x0000,
-    0x0000, 0x0000, 0x87fc, 0x85ff, 0x1c08, 0x0000, 0x0000, 0x0000, 0x1418, 0x839f,
-    0x1be9, 0x0000, 0x0000, 0x0000, 0x6186, 0xd9d8, 0x1bca, 0x0000, 0x0000, 0x0000,
-    0xf500, 0xabaa, 0x1ba6, 0x0000, 0x0000, 0x0000, 0x7b36, 0xdafe, 0x1b8c, 0x0000,
-    0x0000, 0x0000, 0xf394, 0xe6d8, 0x1b6c, 0x0000, 0x0000, 0x0000, 0x6efc, 0x9e55,
-    0x1b4e, 0x0000, 0x0000, 0x0000, 0x5e10, 0xc523, 0x1b2e, 0x0000, 0x0000, 0x0000,
-    0x8210, 0xb6f9, 0x1b0d, 0x0000, 0x0000, 0x0000, 0x9ab0, 0x96e3, 0x1af1, 0x0000,
-    0x0000, 0x0000, 0x3864, 0x92e7, 0x1ad1, 0x0000, 0x0000, 0x0000, 0x9878, 0xdc65,
-    0x1ab1, 0x0000, 0x0000, 0x0000, 0xfa20, 0xd6cb, 0x1a94, 0x0000, 0x0000, 0x0000,
-    0x6c00, 0xa4e4, 0x1a70, 0x0000, 0x0000, 0x0000, 0xab40, 0xb41b, 0x1a53, 0x0000,
-    0x0000, 0x0000, 0x43a4, 0x8ede, 0x1a37, 0x0000, 0x0000, 0x0000, 0x22e0, 0x9314,
-    0x1a15, 0x0000, 0x0000, 0x0000, 0x6170, 0xb949, 0x19f8, 0x0000, 0x0000, 0x0000,
-    0x6b00, 0xe056, 0x19d8, 0x0000, 0x0000, 0x0000, 0x9ba8, 0xa94c, 0x19b9, 0x0000,
-    0x0000, 0x0000, 0xfaa0, 0xaa16, 0x199b, 0x0000, 0x0000, 0x0000, 0x899a, 0xf627,
-    0x197d, 0x0000, 0x0000, 0x0000, 0x9f20, 0xfb70, 0x195d, 0x0000, 0x0000, 0x0000,
-    0xa4b8, 0xc176, 0x193e, 0x0000, 0x0000, 0x0000, 0xb21c, 0x85c3, 0x1920, 0x0000,
-    0x0000, 0x0000, 0x50d2, 0x9b19, 0x1901, 0x0000, 0x0000, 0x0000, 0xd4b0, 0xb708,
-    0x18e0, 0x0000, 0x0000, 0x0000, 0xfb88, 0xf510, 0x18c1, 0x0000, 0x0000, 0x0000,
-    0x31ec, 0xdc8d, 0x18a3, 0x0000, 0x0000, 0x0000, 0x3c00, 0xbff9, 0x1885, 0x0000,
-    0x0000, 0x0000, 0x5020, 0xc30b, 0x1862, 0x0000, 0x0000, 0x0000, 0xd4f0, 0xda0c,
-    0x1844, 0x0000, 0x0000, 0x0000, 0x20d2, 0x99a5, 0x1828, 0x0000, 0x0000, 0x0000,
-    0x852e, 0xd159, 0x1809, 0x0000, 0x0000, 0x0000, 0x7cd8, 0x97a1, 0x17e9, 0x0000,
-    0x0000, 0x0000, 0x423a, 0x997b, 0x17cb, 0x0000, 0x0000, 0x0000, 0xc1c0, 0xbe7d,
-    0x17a8, 0x0000, 0x0000, 0x0000, 0xe8bc, 0xdcdd, 0x178d, 0x0000, 0x0000, 0x0000,
-    0x8b28, 0xae06, 0x176e, 0x0000, 0x0000, 0x0000, 0x102e, 0xb8d4, 0x174f, 0x0000,
-    0x0000, 0x0000, 0xaa00, 0xaa5c, 0x172f, 0x0000, 0x0000, 0x0000, 0x51f0, 0x9fc0,
-    0x170e, 0x0000, 0x0000, 0x0000, 0xf858, 0xe181, 0x16f2, 0x0000, 0x0000, 0x0000,
-    0x91a8, 0x8162, 0x16d3, 0x0000, 0x0000, 0x0000, 0x5f40, 0xcb6f, 0x16b1, 0x0000,
-    0x0000, 0x0000, 0xbb50, 0xe55f, 0x1693, 0x0000, 0x0000, 0x0000, 0xacd2, 0xd895,
-    0x1676, 0x0000, 0x0000, 0x0000, 0xef30, 0x97bf, 0x1654, 0x0000, 0x0000, 0x0000,
-    0xf700, 0xb3d7, 0x1633, 0x0000, 0x0000, 0x0000, 0x3454, 0xa7b5, 0x1619, 0x0000,
-    0x0000, 0x0000, 0x6b00, 0xa929, 0x15f6, 0x0000, 0x0000, 0x0000, 0x9f04, 0x89f7,
-    0x15db, 0x0000, 0x0000, 0x0000, 0xad78, 0xd985, 0x15bc, 0x0000, 0x0000, 0x0000,
-    0xa46a, 0xae3f, 0x159d, 0x0000, 0x0000, 0x0000, 0x63a0, 0xd0da, 0x157c, 0x0000,
-    0x0000, 0x0000, 0x5e90, 0x817d, 0x155e, 0x0000, 0x0000, 0x0000, 0x1494, 0xb13f,
-    0x1540, 0x0000, 0x0000, 0x0000, 0x0090, 0x9c40, 0x1521, 0x0000, 0x0000, 0x0000,
-    0xdd70, 0xcc86, 0x1500, 0x0000, 0x0000, 0x0000, 0x64f8, 0xdb6f, 0x14e1, 0x0000,
-    0x0000, 0x0000, 0xe22c, 0xac17, 0x14c3, 0x0000, 0x0000, 0x0000, 0x60e0, 0xa9ad,
-    0x14a3, 0x0000, 0x0000, 0x0000, 0x4640, 0xd658, 0x1481, 0x0000, 0x0000, 0x0000,
-    0x6490, 0xa181, 0x1467, 0x0000, 0x0000, 0x0000, 0x1df4, 0xaaa2, 0x1447, 0x0000,
-    0x0000, 0x0000, 0xb94a, 0x8f61, 0x1429, 0x0000, 0x0000, 0x0000, 0x5198, 0x9d83,
-    0x1409, 0x0000, 0x0000, 0x0000, 0x0f7a, 0xa818, 0x13eb, 0x0000, 0x0000, 0x0000,
-    0xc45e, 0xc06c, 0x13cc, 0x0000, 0x0000, 0x0000, 0x4ec0, 0xfa29, 0x13a8, 0x0000,
-    0x0000, 0x0000, 0x6418, 0x8cad, 0x138c, 0x0000, 0x0000, 0x0000, 0xbcc8, 0xe7d1,
-    0x136f, 0x0000, 0x0000, 0x0000, 0xc934, 0xf9b0, 0x134f, 0x0000, 0x0000, 0x0000,
-    0x6ce0, 0x98df, 0x1331, 0x0000, 0x0000, 0x0000, 0x3516, 0xe5e9, 0x1312, 0x0000,
-    0x0000, 0x0000, 0xc6c0, 0xef8b, 0x12ef, 0x0000, 0x0000, 0x0000, 0xaf02, 0x913d,
-    0x12d4, 0x0000, 0x0000, 0x0000, 0xd230, 0xe1d5, 0x12b5, 0x0000, 0x0000, 0x0000,
-    0xfba8, 0xc232, 0x1295, 0x0000, 0x0000, 0x0000, 0x7ba4, 0xabeb, 0x1277, 0x0000,
-    0x0000, 0x0000, 0x6e5c, 0xc692, 0x1258, 0x0000, 0x0000, 0x0000, 0x76a2, 0x9756,
-    0x1239, 0x0000, 0x0000, 0x0000, 0xe180, 0xe423, 0x1214, 0x0000, 0x0000, 0x0000,
-    0x8c3c, 0x90f8, 0x11fb, 0x0000, 0x0000, 0x0000, 0x9f3c, 0x9fd2, 0x11dc, 0x0000,
-    0x0000, 0x0000, 0x53e0, 0xb73e, 0x11bd, 0x0000, 0x0000, 0x0000, 0x45be, 0x88d6,
-    0x119e, 0x0000, 0x0000, 0x0000, 0x111a, 0x8bc0, 0x117f, 0x0000, 0x0000, 0x0000,
-    0xe26a, 0xd7ff, 0x1160, 0x0000, 0x0000, 0x0000, 0xfb60, 0xdd8d, 0x113f, 0x0000,
-    0x0000, 0x0000, 0x9370, 0xc108, 0x1120, 0x0000, 0x0000, 0x0000, 0x9654, 0x8baf,
-    0x1103, 0x0000, 0x0000, 0x0000, 0xd6ec, 0xd6b9, 0x10e4, 0x0000, 0x0000, 0x0000,
-    0x23e4, 0xd7b7, 0x10c4, 0x0000, 0x0000, 0x0000, 0x1aa6, 0xa847, 0x10a6, 0x0000,
-    0x0000, 0x0000, 0xbee6, 0x9fef, 0x1087, 0x0000, 0x0000, 0x0000, 0x26d0, 0xa6eb,
-    0x1066, 0x0000, 0x0000, 0x0000, 0x5b86, 0xa880, 0x1049, 0x0000, 0x0000, 0x0000,
-    0x125c, 0xd971, 0x1029, 0x0000, 0x0000, 0x0000, 0x1f78, 0x9d18, 0x100a, 0x0000,
-    0x0000, 0x0000, 0x0e84, 0xb15b, 0x0feb, 0x0000, 0x0000, 0x0000, 0xd0c0, 0xc150,
-    0x0fcc, 0x0000, 0x0000, 0x0000, 0xa330, 0xc40c, 0x0fad, 0x0000, 0x0000, 0x0000,
-    0x5202, 0xfc2c, 0x0f8f, 0x0000, 0x0000, 0x0000, 0x3f7c, 0xecf5, 0x0f6f, 0x0000,
-    0x0000, 0x0000, 0xef44, 0xfdfd, 0x0f50, 0x0000, 0x0000, 0x0000, 0x3f6c, 0xab1b,
-    0x0f31, 0x0000, 0x0000, 0x0000, 0xf658, 0x89ec, 0x0f11, 0x0000, 0x0000, 0x0000,
-    0xbfc8, 0x9ba8, 0x0ef4, 0x0000, 0x0000, 0x0000, 0x3d40, 0xbe21, 0x0ed5, 0x0000,
-    0x0000, 0x0000, 0xbbc4, 0xc70d, 0x0eb6, 0x0000, 0x0000, 0x0000, 0x5158, 0xdb16,
-    0x0e96, 0x0000, 0x0000, 0x0000, 0xb5a8, 0xa8d8, 0x0e78, 0x0000, 0x0000, 0x0000,
-    0xcccc, 0xb40e, 0x0e58, 0x0000, 0x0000, 0x0000, 0x448c, 0xcb62, 0x0e3a, 0x0000,
-    0x0000, 0x0000, 0xf12a, 0x8aed, 0x0e1b, 0x0000, 0x0000, 0x0000, 0x79d0, 0xc59c,
-    0x0dfb, 0x0000, 0x0000, 0x0000, 0x06b4, 0xcdc9, 0x0ddd, 0x0000, 0x0000, 0x0000,
-    0xae70, 0xa979, 0x0dbe, 0x0000, 0x0000, 0x0000, 0x317c, 0xa8fb, 0x0d9e, 0x0000,
-    0x0000, 0x0000, 0x5fe0, 0x8a50, 0x0d7d, 0x0000, 0x0000, 0x0000, 0x70b6, 0xfdfa,
-    0x0d61, 0x0000, 0x0000, 0x0000, 0x1640, 0x9dc7, 0x0d41, 0x0000, 0x0000, 0x0000,
-    0x9a9c, 0xdc50, 0x0d23, 0x0000, 0x0000, 0x0000, 0x4fcc, 0x9a9b, 0x0d04, 0x0000,
-    0x0000, 0x0000, 0x7e48, 0x8f77, 0x0ce5, 0x0000, 0x0000, 0x0000, 0x84e4, 0xd4b9,
-    0x0cc6, 0x0000, 0x0000, 0x0000, 0x84e0, 0xbd10, 0x0ca6, 0x0000, 0x0000, 0x0000,
-    0x1b0a, 0xc8d9, 0x0c88, 0x0000, 0x0000, 0x0000, 0x6a48, 0xfc81, 0x0c68, 0x0000,
-    0x0000, 0x0000, 0x070a, 0xbef6, 0x0c4a, 0x0000, 0x0000, 0x0000, 0x8a70, 0xf096,
-    0x0c2b, 0x0000, 0x0000, 0x0000, 0xecc2, 0xc994, 0x0c0c, 0x0000, 0x0000, 0x0000,
-    0x1540, 0x9537, 0x0bea, 0x0000, 0x0000, 0x0000, 0x1b02, 0xab5b, 0x0bce, 0x0000,
-    0x0000, 0x0000, 0x5dc0, 0xb0c8, 0x0bad, 0x0000, 0x0000, 0x0000, 0xc928, 0xe034,
-    0x0b8f, 0x0000, 0x0000, 0x0000, 0x2d12, 0xb4b0, 0x0b71, 0x0000, 0x0000, 0x0000,
-    0x8fc2, 0xbb94, 0x0b52, 0x0000, 0x0000, 0x0000, 0xe236, 0xe22f, 0x0b33, 0x0000,
-    0x0000, 0x0000, 0xb97c, 0xbe9e, 0x0b13, 0x0000, 0x0000, 0x0000, 0xe1a6, 0xe16d,
-    0x0af5, 0x0000, 0x0000, 0x0000, 0xd330, 0xbaf0, 0x0ad6, 0x0000, 0x0000, 0x0000,
-    0xc0bc, 0xbbd0, 0x0ab7, 0x0000, 0x0000, 0x0000, 0x8e66, 0xdd9b, 0x0a98, 0x0000,
-    0x0000, 0x0000, 0xc95c, 0xf799, 0x0a79, 0x0000, 0x0000, 0x0000, 0xdac0, 0xbe4c,
-    0x0a55, 0x0000, 0x0000, 0x0000, 0xafc0, 0xc378, 0x0a37, 0x0000, 0x0000, 0x0000,
-    0xa880, 0xe341, 0x0a19, 0x0000, 0x0000, 0x0000, 0xc242, 0x81f6, 0x09fd, 0x0000,
-    0x0000, 0x0000, 0x7470, 0xc777, 0x09de, 0x0000, 0x0000, 0x0000, 0x62bc, 0xb684,
-    0x09be, 0x0000, 0x0000, 0x0000, 0x43ac, 0x8c58, 0x099f, 0x0000, 0x0000, 0x0000,
-    0xcc3c, 0xf9ac, 0x0981, 0x0000, 0x0000, 0x0000, 0x1526, 0xb670, 0x0962, 0x0000,
-    0x0000, 0x0000, 0xc9fe, 0xdf50, 0x0943, 0x0000, 0x0000, 0x0000, 0x6ae6, 0xc065,
-    0x0924, 0x0000, 0x0000, 0x0000, 0xb114, 0xcf29, 0x0905, 0x0000, 0x0000, 0x0000,
-    0xd388, 0x922a, 0x08e4, 0x0000, 0x0000, 0x0000, 0xcf54, 0xb926, 0x08c7, 0x0000,
-    0x0000, 0x0000, 0x3826, 0xe855, 0x08a8, 0x0000, 0x0000, 0x0000, 0xe7c8, 0x829b,
-    0x0888, 0x0000, 0x0000, 0x0000, 0x546c, 0xa903, 0x086a, 0x0000, 0x0000, 0x0000,
-    0x8768, 0x99cc, 0x0849, 0x0000, 0x0000, 0x0000, 0x00ac, 0xf529, 0x082b, 0x0000,
-    0x0000, 0x0000, 0x2658, 0x9f0b, 0x080c, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x9e21,
-    0x07ee, 0x0000, 0x0000, 0x0000, 0x6da2, 0x9910, 0x07cf, 0x0000, 0x0000, 0x0000,
-    0x9220, 0xf9b3, 0x07b0, 0x0000, 0x0000, 0x0000, 0x3d90, 0xa541, 0x0791, 0x0000,
-    0x0000, 0x0000, 0x6e4c, 0xe7cc, 0x0771, 0x0000, 0x0000, 0x0000, 0xa8fa, 0xe80a,
-    0x0753, 0x0000, 0x0000, 0x0000, 0x4e14, 0xc3a7, 0x0734, 0x0000, 0x0000, 0x0000,
-    0xf7e0, 0xbad9, 0x0712, 0x0000, 0x0000, 0x0000, 0xfea0, 0xeff2, 0x06f5, 0x0000,
-    0x0000, 0x0000, 0xcef6, 0xbd48, 0x06d7, 0x0000, 0x0000, 0x0000, 0x7544, 0xf559,
-    0x06b7, 0x0000, 0x0000, 0x0000, 0x2388, 0xf655, 0x0698, 0x0000, 0x0000, 0x0000,
-    0xe900, 0xad56, 0x0676, 0x0000, 0x0000, 0x0000, 0x2cc0, 0x8437, 0x0659, 0x0000,
-    0x0000, 0x0000, 0x3068, 0xc544, 0x063b, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe73c,
-    0x061b, 0x0000, 0x0000, 0x0000, 0xee50, 0x9d49, 0x05fc, 0x0000, 0x0000, 0x0000,
-    0x93d2, 0x81f6, 0x05df, 0x0000, 0x0000, 0x0000, 0x941c, 0xadff, 0x05bf, 0x0000,
-    0x0000, 0x0000, 0x2ce2, 0x8e45, 0x05a1, 0x0000, 0x0000, 0x0000, 0x4a60, 0x95fd,
-    0x0581, 0x0000, 0x0000, 0x0000, 0x79f8, 0xb83a, 0x0563, 0x0000, 0x0000, 0x0000,
-    0xcb58, 0xa1f5, 0x0543, 0x0000, 0x0000, 0x0000, 0x2a3a, 0xdc36, 0x0525, 0x0000,
-    0x0000, 0x0000, 0x14ee, 0x890e, 0x0506, 0x0000, 0x0000, 0x0000, 0x8f20, 0xc432,
-    0x04e3, 0x0000, 0x0000, 0x0000, 0x8440, 0xb21d, 0x04c6, 0x0000, 0x0000, 0x0000,
-    0x5430, 0xf698, 0x04a7, 0x0000, 0x0000, 0x0000, 0x04ae, 0x8b20, 0x048a, 0x0000,
-    0x0000, 0x0000, 0x04d0, 0xe872, 0x046b, 0x0000, 0x0000, 0x0000, 0xc78e, 0x8893,
-    0x044c, 0x0000, 0x0000, 0x0000, 0x0f78, 0x9895, 0x042b, 0x0000, 0x0000, 0x0000,
-    0x11d4, 0xdf2e, 0x040d, 0x0000, 0x0000, 0x0000, 0xe84c, 0x89d5, 0x03ef, 0x0000,
-    0x0000, 0x0000, 0xf7be, 0x8a67, 0x03d0, 0x0000, 0x0000, 0x0000, 0x95d0, 0xc906,
-    0x03b1, 0x0000, 0x0000, 0x0000, 0x64ce, 0xd96c, 0x0392, 0x0000, 0x0000, 0x0000,
-    0x97ba, 0xa16f, 0x0373, 0x0000, 0x0000, 0x0000, 0x463c, 0xc51a, 0x0354, 0x0000,
-    0x0000, 0x0000, 0xef0a, 0xe93e, 0x0335, 0x0000, 0x0000, 0x0000, 0x526a, 0xa466,
-    0x0316, 0x0000, 0x0000, 0x0000, 0x4140, 0xa94d, 0x02f5, 0x0000, 0x0000, 0x0000,
-    0xb4ec, 0xce68, 0x02d8, 0x0000, 0x0000, 0x0000, 0x4fa2, 0x8490, 0x02b9, 0x0000,
-    0x0000, 0x0000, 0x4e60, 0xca98, 0x0298, 0x0000, 0x0000, 0x0000, 0x08dc, 0xe09c,
-    0x027a, 0x0000, 0x0000, 0x0000, 0x2b90, 0xc7e3, 0x025c, 0x0000, 0x0000, 0x0000,
-    0x5a7c, 0xf8ef, 0x023c, 0x0000, 0x0000, 0x0000, 0x5022, 0x9d58, 0x021e, 0x0000,
-    0x0000, 0x0000, 0x553a, 0xe242, 0x01ff, 0x0000, 0x0000, 0x0000, 0x7e6e, 0xb54d,
-    0x01e0, 0x0000, 0x0000, 0x0000, 0xd2d4, 0xa88c, 0x01c1, 0x0000, 0x0000, 0x0000,
-    0x75b6, 0xfe6d, 0x01a2, 0x0000, 0x0000, 0x0000, 0x3bb2, 0xf04c, 0x0183, 0x0000,
-    0x0000, 0x0000, 0xc2d0, 0xc046, 0x0163, 0x0000, 0x0000, 0x0000, 0x250c, 0xf9d6,
-    0x0145, 0x0000, 0x0000, 0x0000, 0xb7b4, 0x8a0d, 0x0126, 0x0000, 0x0000, 0x0000,
-    0x1a72, 0xe4f5, 0x0107, 0x0000, 0x0000, 0x0000, 0x825c, 0xa9b8, 0x00e8, 0x0000,
-    0x0000, 0x0000, 0x6c90, 0xc9ad, 0x00c6, 0x0000, 0x0000, 0x0000, 0x4d00, 0xd1bb,
-    0x00aa, 0x0000, 0x0000, 0x0000, 0xa4a0, 0xee01, 0x0087, 0x0000, 0x0000, 0x0000,
-    0x89a8, 0xbe9f, 0x006b, 0x0000, 0x0000, 0x0000, 0x038e, 0xc80c, 0x004d, 0x0000,
-    0x0000, 0x0000, 0xfe26, 0x8384, 0x002e, 0x0000, 0x0000, 0x0000, 0xcd90, 0xca57,
-    0x000e, 0x0000
-};
-
-void MacroAssembler::libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) {
-  Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12;
-  Label B1_13, B1_14, B1_15;
-
-  assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp);
-
-  address zero_none  = (address)_zero_none;
-  address _4onpi_d   = (address)__4onpi_d;
-  address TWO_32H    = (address)_TWO_32H;
-  address pi04_3d    = (address)_pi04_3d;
-  address pi04_5d    = (address)_pi04_5d;
-  address SCALE      = (address)_SCALE;
-  address zeros      = (address)_zeros;
-  address pi04_2d    = (address)_pi04_2d;
-  address TWO_12H    = (address)_TWO_12H;
-  address _4onpi_31l = (address)__4onpi_31l;
-
-  bind(B1_1);
-  push(ebp);
-  movl(ebp, esp);
-  andl(esp, -16);
-  push(esi);
-  push(edi);
-  push(ebx);
-  subl(esp, 20);
-  movzwl(ebx, Address(ebp, 16));
-  andl(ebx, 32767);
-  movl(eax, Address(ebp, 20));
-  cmpl(ebx, 16413);
-  movl(esi, Address(ebp, 24));
-  movl(Address(esp, 4), eax);
-  jcc(Assembler::greaterEqual, B1_8);
-
-  bind(B1_2);
-  fld_x(Address(ebp, 8));
-  fld_d(ExternalAddress(_4onpi_d));    //0x6dc9c883UL, 0x3ff45f30UL
-  fmul(1);
-  fstp_x(Address(esp, 8));
-  movzwl(ecx, Address(esp, 16));
-  negl(ecx);
-  addl(ecx, 30);
-  movl(eax, Address(esp, 12));
-  shrl(eax);
-  cmpl(Address(esp, 4), 0);
-  jcc(Assembler::notEqual, B1_4);
-
-  bind(B1_3);
-  lea(ecx, Address(eax, 1));
-  andl(ecx, -2);
-  jmp(B1_5);
-
-  bind(B1_4);
-  movl(ecx, eax);
-  addl(eax, Address(esp, 4));
-  movl(edx, eax);
-  andl(edx, 1);
-  addl(ecx, edx);
-
-  bind(B1_5);
-  fld_d(ExternalAddress(TWO_32H));    //0x00000000UL, 0x41f80000UL
-  cmpl(ebx, 16400);
-  movl(Address(esp, 0), ecx);
-  fild_s(Address(esp, 0));
-  jcc(Assembler::greaterEqual, B1_7);
-
-  bind(B1_6);
-  fld_d(ExternalAddress(pi04_3d));    //0x54442d00UL, 0x3fe921fbUL
-  fmul(1);
-  fsubp(3);
-  fxch(1);
-  fmul(2);
-  fld_s(2);
-  fadd(1);
-  fsubrp(1);
-  fld_s(0);
-  fxch(1);
-  fsuba(3);
-  fld_d(ExternalAddress(8 + pi04_3d));    //0x98cc5180UL, 0x3ce84698UL
-  fmul(3);
-  fsuba(2);
-  fxch(1);
-  fsub(2);
-  fsubrp(1);
-  faddp(3);
-  fld_d(ExternalAddress(16 + pi04_3d));    //0xcbb5bf6cUL, 0xb9dfc8f8UL
-  fmulp(2);
-  fld_s(1);
-  fsubr(1);
-  fsuba(1);
-  fxch(2);
-  fsubp(1);
-  faddp(2);
-  fxch(1);
-  jmp(B1_15);
-
-  bind(B1_7);
-  fld_d(ExternalAddress(pi04_5d));    //0x54400000UL, 0x3fe921fbUL
-  fmul(1);
-  fsubp(3);
-  fxch(1);
-  fmul(2);
-  fld_s(2);
-  fadd(1);
-  fsubrp(1);
-  fld_s(0);
-  fxch(1);
-  fsuba(3);
-  fld_d(ExternalAddress(8 + pi04_5d));    //0x1a600000UL, 0x3dc0b461UL
-  fmul(3);
-  fsuba(2);
-  fxch(1);
-  fsub(2);
-  fsubrp(1);
-  faddp(3);
-  fld_d(ExternalAddress(16 + pi04_5d));    //0x2e000000UL, 0x3b93198aUL
-  fmul(2);
-  fld_s(0);
-  fsubr(2);
-  fsuba(2);
-  fxch(1);
-  fsubp(2);
-  fxch(1);
-  faddp(3);
-  fld_d(ExternalAddress(24 + pi04_5d));    //0x25200000UL, 0x396b839aUL
-  fmul(2);
-  fld_s(0);
-  fsubr(2);
-  fsuba(2);
-  fxch(1);
-  fsubp(2);
-  fxch(1);
-  faddp(3);
-  fld_d(ExternalAddress(32 + pi04_5d));    //0x533e63a0UL, 0x37027044UL
-  fmulp(2);
-  fld_s(1);
-  fsubr(1);
-  fsuba(1);
-  fxch(2);
-  fsubp(1);
-  faddp(2);
-  fxch(1);
-  jmp(B1_15);
-
-  bind(B1_8);
-  fld_x(Address(ebp, 8));
-  addl(ebx, -16417);
-  fmul_d(as_Address(ExternalAddress(SCALE)));    //0x00000000UL, 0x32600000UL
-  movl(eax, -2078209981);
-  imull(ebx);
-  addl(edx, ebx);
-  movl(ecx, ebx);
-  sarl(edx, 4);
-  sarl(ecx, 31);
-  subl(edx, ecx);
-  movl(eax, edx);
-  shll(eax, 5);
-  fstp_x(Address(ebp, 8));
-  fld_x(Address(ebp, 8));
-  subl(eax, edx);
-  movl(Address(ebp, 8), 0);
-  subl(ebx, eax);
-  fld_x(Address(ebp, 8));
-  cmpl(ebx, 17);
-  fsuba(1);
-  jcc(Assembler::less, B1_10);
-
-  bind(B1_9);
-  lea(eax, Address(noreg, edx, Address::times_8));
-  lea(ecx, Address(eax, edx, Address::times_4));
-  incl(edx);
-  fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1));
-  fmul(2);
-  fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1));
-  fmul(2);
-  fld_s(0);
-  fadd(2);
-  fsuba(2);
-  fxch(1);
-  faddp(2);
-  fld_s(1);
-  fadd(1);
-  fstp_x(Address(esp, 8));
-  andl(Address(esp, 8), -16777216);
-  fld_x(Address(esp, 8));
-  fsubp(1);
-  jmp(B1_11);
-
-  bind(B1_10);
-  fld_d(ExternalAddress(zeros));    //0x00000000UL, 0x00000000UL
-  fld_s(0);
-
-  bind(B1_11);
-  fld_s(0);
-  lea(eax, Address(noreg, edx, Address::times_8));
-  fld_s(3);
-  lea(edx, Address(eax, edx, Address::times_4));
-  fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1));
-  fmul(6);
-  movl(Address(esp, 0), edx);
-  fadda(2);
-  fxch(2);
-  fsuba(3);
-  fxch(2);
-  faddp(3);
-  fxch(2);
-  faddp(3);
-  fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1));
-  fmula(2);
-  fld_s(2);
-  fadd(2);
-  fld_s(0);
-  fxch(1);
-  fsubra(3);
-  fxch(3);
-  fchs();
-  faddp(4);
-  fxch(3);
-  faddp(4);
-  fxch(2);
-  fadd(3);
-  fxch(2);
-  fmul(5);
-  fadda(2);
-  fld_s(4);
-  fld_x(Address(24 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1));
-  fmula(1);
-  fxch(1);
-  fadda(4);
-  fxch(4);
-  fstp_x(Address(esp, 8));
-  movzwl(ebx, Address(esp, 16));
-  andl(ebx, 32767);
-  cmpl(ebx, 16415);
-  jcc(Assembler::greaterEqual, B1_13);
-
-  bind(B1_12);
-  negl(ebx);
-  addl(ebx, 30);
-  movl(ecx, ebx);
-  movl(eax, Address(esp, 12));
-  shrl(eax);
-  shll(eax);
-  movl(Address(esp, 12), eax);
-  movl(Address(esp, 8), 0);
-  shrl(eax);
-  jmp(B1_14);
-
-  bind(B1_13);
-  negl(ebx);
-  addl(ebx, 30);
-  movl(ecx, ebx);
-  movl(edx, Address(esp, 8));
-  shrl(edx);
-  shll(edx);
-  negl(ecx);
-  movl(eax, Address(esp, 12));
-  shll(eax);
-  movl(ecx, ebx);
-  movl(Address(esp, 8), edx);
-  shrl(edx);
-  orl(eax, edx);
-
-  bind(B1_14);
-  fld_x(Address(esp, 8));
-  addl(eax, Address(esp, 4));
-  fsubp(3);
-  fmul(6);
-  fld_s(4);
-  movl(edx, eax);
-  andl(edx, 1);
-  fadd(3);
-  movl(ecx, Address(esp, 0));
-  fsuba(3);
-  fxch(3);
-  faddp(5);
-  fld_s(1);
-  fxch(3);
-  fadd_d(Address(zero_none, RelocationHolder::none).plus_disp(edx, Address::times_8));
-  fadda(3);
-  fsub(3);
-  faddp(2);
-  fxch(1);
-  faddp(4);
-  fld_s(2);
-  fadd(2);
-  fsuba(2);
-  fxch(3);
-  faddp(2);
-  fxch(1);
-  faddp(3);
-  fld_s(0);
-  fadd(2);
-  fsuba(2);
-  fxch(1);
-  faddp(2);
-  fxch(1);
-  faddp(2);
-  fld_s(2);
-  fld_x(Address(36 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1));
-  fmula(1);
-  fld_s(1);
-  fadd(3);
-  fsuba(3);
-  fxch(2);
-  faddp(3);
-  fxch(2);
-  faddp(3);
-  fxch(1);
-  fmul(4);
-  fld_s(0);
-  fadd(2);
-  fsuba(2);
-  fxch(1);
-  faddp(2);
-  fxch(1);
-  faddp(2);
-  fld_s(2);
-  fld_x(Address(48 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1));
-  fmula(1);
-  fld_s(1);
-  fadd(3);
-  fsuba(3);
-  fxch(2);
-  faddp(3);
-  fxch(2);
-  faddp(3);
-  fld_s(3);
-  fxch(2);
-  fmul(5);
-  fld_x(Address(60 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1));
-  fmula(3);
-  fxch(3);
-  faddp(1);
-  fld_s(0);
-  fadd(2);
-  fsuba(2);
-  fxch(1);
-  faddp(2);
-  fxch(1);
-  faddp(3);
-  fld_s(3);
-  fxch(2);
-  fmul(5);
-  fld_x(Address(72 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1));
-  fmula(3);
-  fxch(3);
-  faddp(1);
-  fld_s(0);
-  fadd(2);
-  fsuba(2);
-  fxch(1);
-  faddp(2);
-  fxch(1);
-  faddp(3);
-  fxch(1);
-  fmulp(4);
-  fld_x(Address(84 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1));
-  fmulp(3);
-  fxch(2);
-  faddp(3);
-  fld_s(2);
-  fadd(2);
-  fld_d(ExternalAddress(TWO_32H));    //0x00000000UL, 0x41f80000UL
-  fmul(1);
-  fadda(1);
-  fsubp(1);
-  fsuba(2);
-  fxch(3);
-  faddp(2);
-  faddp(1);
-  fld_d(ExternalAddress(pi04_2d));    //0x54400000UL, 0x3fe921fbUL
-  fld_s(0);
-  fmul(2);
-  fxch(2);
-  fadd(3);
-  fxch(1);
-  fmulp(3);
-  fmul_d(as_Address(ExternalAddress(8 + pi04_2d)));    //0x1a626331UL, 0x3dc0b461UL
-  faddp(1);
-
-  bind(B1_15);
-  fld_d(ExternalAddress(TWO_12H));    //0x00000000UL, 0x40b80000UL
-  fld_s(2);
-  fadd(2);
-  fmula(1);
-  fstp_x(Address(esp, 8));
-  fld_x(Address(esp, 8));
-  fadd(1);
-  fsubrp(1);
-  fst_d(Address(esi, 0));
-  fsubp(2);
-  faddp(1);
-  fstp_d(Address(esi, 8));
-  addl(esp, 20);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-}
-
-
-ATTRIBUTE_ALIGNED(16) static const jushort _SP[] =
-{
-    0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffc, 0x0000, 0x8887, 0x8888, 0x8888, 0x8888,
-    0x3ff8, 0x0000, 0xc527, 0x0d00, 0x00d0, 0xd00d, 0xbff2, 0x0000, 0x45f6, 0xb616,
-    0x1d2a, 0xb8ef, 0x3fec, 0x0000, 0x825b, 0x3997, 0x2b3f, 0xd732, 0xbfe5, 0x0000,
-    0xbf33, 0x8bb4, 0x2fda, 0xb092, 0x3fde, 0x0000, 0x44a6, 0xed1a, 0x29ef, 0xd73e,
-    0xbfd6, 0x0000, 0x8610, 0x307f, 0x62a1, 0xc921, 0x3fce, 0x0000
-};
-
-ATTRIBUTE_ALIGNED(16) static const jushort _CP[] =
-{
-    0x0000, 0x0000, 0x0000, 0x8000, 0xbffe, 0x0000, 0xaaa5, 0xaaaa, 0xaaaa, 0xaaaa,
-    0x3ffa, 0x0000, 0x9c2f, 0x0b60, 0x60b6, 0xb60b, 0xbff5, 0x0000, 0xf024, 0x0cac,
-    0x00d0, 0xd00d, 0x3fef, 0x0000, 0x03fe, 0x3f65, 0x7dbb, 0x93f2, 0xbfe9, 0x0000,
-    0xd84d, 0xadee, 0xc698, 0x8f76, 0x3fe2, 0x0000, 0xdaba, 0xfe79, 0xea36, 0xc9c9,
-    0xbfda, 0x0000, 0x3ac6, 0x0ba0, 0x07ce, 0xd585, 0x3fd2, 0x0000
-};
-
-void MacroAssembler::libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) {
-  Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12;
-  Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23;
-  Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34;
-  Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_41, B1_42, B1_43, B1_46;
-
-  assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp);
-
-  address CP = (address)_CP;
-  address SP = (address)_SP;
-
-  bind(B1_1);
-  push(ebp);
-  movl(ebp, esp);
-  andl(esp, -64);
-  push(esi);
-  push(edi);
-  push(ebx);
-  subl(esp, 52);
-  movl(eax, Address(ebp, 16));
-  movl(edx, Address(ebp, 20));
-  movl(Address(esp, 32), eax);
-  movl(Address(esp, 36), edx);
-
-  bind(B1_2);
-  fnstcw(Address(esp, 30));
-
-  bind(B1_3);
-  movsd(xmm1, Address(ebp, 8));
-  movl(esi, Address(ebp, 12));
-  movl(eax, esi);
-  andl(eax, 2147483647);
-  andps(xmm1, ExternalAddress(L_2IL0FLOATPACKET_0));    //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL
-  shrl(esi, 31);
-  movl(Address(esp, 40), eax);
-  cmpl(eax, 1104150528);
-  movsd(Address(ebp, 8), xmm1);
-  jcc(Assembler::aboveEqual, B1_11);
-
-  bind(B1_4);
-  movsd(xmm0, ExternalAddress(PI4_INV));    //0x6dc9c883UL, 0x3ff45f30UL
-  mulsd(xmm0, xmm1);
-  movzwl(edx, Address(esp, 30));
-  movl(eax, edx);
-  andl(eax, 768);
-  movsd(Address(esp, 0), xmm0);
-  cmpl(eax, 768);
-  jcc(Assembler::equal, B1_42);
-
-  bind(B1_5);
-  orl(edx, -64768);
-  movw(Address(esp, 28), edx);
-
-  bind(B1_6);
-  fldcw(Address(esp, 28));
-
-  bind(B1_7);
-  movsd(xmm1, Address(ebp, 8));
-  movl(ebx, 1);
-
-  bind(B1_8);
-  movl(Address(esp, 12), ebx);
-  movl(ebx, Address(esp, 4));
-  movl(eax, ebx);
-  movl(Address(esp, 8), esi);
-  movl(esi, ebx);
-  shrl(esi, 20);
-  andl(eax, 1048575);
-  movl(ecx, esi);
-  orl(eax, 1048576);
-  negl(ecx);
-  movl(edx, eax);
-  addl(ecx, 19);
-  addl(esi, 13);
-  movl(Address(esp, 24), ecx);
-  shrl(edx);
-  movl(ecx, esi);
-  shll(eax);
-  movl(ecx, Address(esp, 24));
-  movl(esi, Address(esp, 0));
-  shrl(esi);
-  orl(eax, esi);
-  cmpl(ebx, 1094713344);
-  movsd(Address(esp, 16), xmm1);
-  fld_d(Address(esp, 16));
-  cmov32(Assembler::below, eax, edx);
-  movl(esi, Address(esp, 8));
-  lea(edx, Address(eax, 1));
-  movl(ebx, edx);
-  andl(ebx, -2);
-  movl(Address(esp, 16), ebx);
-  fild_s(Address(esp, 16));
-  movl(ebx, Address(esp, 12));
-  cmpl(Address(esp, 40), 1094713344);
-  jcc(Assembler::aboveEqual, B1_10);
-
-  bind(B1_9);
-  fld_d(ExternalAddress(PI4X3));    //0x54443000UL, 0xbfe921fbUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X3 + 8));    //0x3b39a000UL, 0x3d373dcbUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X3 + 16));    //0xe0e68948UL, 0xba845c06UL
-  fmulp(1);
-  faddp(1);
-  jmp(B1_17);
-
-  bind(B1_10);
-  fld_d(ExternalAddress(PI4X4));    //0x54400000UL, 0xbfe921fbUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X4 + 8));    //0x1a600000UL, 0xbdc0b461UL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X4 + 16));    //0x2e000000UL, 0xbb93198aUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X4 + 24));    //0x252049c1UL, 0xb96b839aUL
-  fmulp(1);
-  faddp(1);
-  jmp(B1_17);
-
-  bind(B1_11);
-  movzwl(edx, Address(esp, 30));
-  movl(eax, edx);
-  andl(eax, 768);
-  cmpl(eax, 768);
-  jcc(Assembler::equal, B1_43);
-  bind(B1_12);
-  orl(edx, -64768);
-  movw(Address(esp, 28), edx);
-
-  bind(B1_13);
-  fldcw(Address(esp, 28));
-
-  bind(B1_14);
-  movsd(xmm1, Address(ebp, 8));
-  movl(ebx, 1);
-
-  bind(B1_15);
-  movsd(Address(esp, 16), xmm1);
-  fld_d(Address(esp, 16));
-  addl(esp, -32);
-  lea(eax, Address(esp, 32));
-  fstp_x(Address(esp, 0));
-  movl(Address(esp, 12), 0);
-  movl(Address(esp, 16), eax);
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l())));
-
-  bind(B1_46);
-  addl(esp, 32);
-
-  bind(B1_16);
-  fld_d(Address(esp, 0));
-  lea(edx, Address(eax, 1));
-  fld_d(Address(esp, 8));
-  faddp(1);
-
-  bind(B1_17);
-  movl(ecx, edx);
-  addl(eax, 3);
-  shrl(ecx, 2);
-  andl(ecx, 1);
-  shrl(eax, 2);
-  xorl(esi, ecx);
-  movl(ecx, Address(esp, 36));
-  andl(eax, 1);
-  andl(ecx, 3);
-  cmpl(ecx, 3);
-  jcc(Assembler::notEqual, B1_25);
-
-  bind(B1_18);
-  fld_x(ExternalAddress(84 + SP));    //0x8610, 0x307f, 0x62
-  fld_s(1);
-  fmul((2));
-  testb(edx, 2);
-  fmula((1));
-  fld_x(ExternalAddress(72 + SP));    //0x44a6, 0xed1a, 0x29
-  faddp(2);
-  fmula(1);
-  fld_x(ExternalAddress(60 + SP));    //0xbf33, 0x8bb4, 0x2f
-  faddp(2);
-  fmula(1);
-  fld_x(ExternalAddress(48 + SP));    //0x825b, 0x3997, 0x2b
-  faddp(2);
-  fmula(1);
-  fld_x(ExternalAddress(36 + SP));    //0x45f6, 0xb616, 0x1d
-  faddp(2);
-  fmula(1);
-  fld_x(ExternalAddress(24 + SP));    //0xc527, 0x0d00, 0x00
-  faddp(2);
-  fmula(1);
-  fld_x(ExternalAddress(12 + SP));    //0x8887, 0x8888, 0x88
-  faddp(2);
-  fmula(1);
-  fld_x(ExternalAddress(SP));    //0xaaab, 0xaaaa, 0xaa
-  faddp(2);
-  fmula(1);
-  fld_x(ExternalAddress(84 + CP));    //0x3ac6, 0x0ba0, 0x07
-  fmul(1);
-  fld_x(ExternalAddress(72 + CP));    //0xdaba, 0xfe79, 0xea
-  faddp(1);
-  fmul(1);
-  fld_x(ExternalAddress(62 + CP));    //0xd84d, 0xadee, 0xc6
-  faddp(1);
-  fmul(1);
-  fld_x(ExternalAddress(48 + CP));    //0x03fe, 0x3f65, 0x7d
-  faddp(1);
-  fmul(1);
-  fld_x(ExternalAddress(36 + CP));    //0xf024, 0x0cac, 0x00
-  faddp(1);
-  fmul(1);
-  fld_x(ExternalAddress(24 + CP));    //0x9c2f, 0x0b60, 0x60
-  faddp(1);
-  fmul(1);
-  fld_x(ExternalAddress(12 + CP));    //0xaaa5, 0xaaaa, 0xaa
-  faddp(1);
-  fmul(1);
-  fld_x(ExternalAddress(CP));    //0x0000, 0x0000, 0x00
-  faddp(1);
-  fmulp(1);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(esi, Address::times_8));
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  jcc(Assembler::equal, B1_22);
-
-  bind(B1_19);
-  fmulp(4);
-  testl(ebx, ebx);
-  fxch(2);
-  fmul(3);
-  movl(eax, Address(esp, 2));
-  faddp(3);
-  fxch(2);
-  fstp_d(Address(eax, 0));
-  fmula(1);
-  faddp(1);
-  fstp_d(Address(eax, 8));
-  jcc(Assembler::equal, B1_21);
-
-  bind(B1_20);
-  fldcw(Address(esp, 30));
-
-  bind(B1_21);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_22);
-  fxch(1);
-  fmulp(4);
-  testl(ebx, ebx);
-  fxch(2);
-  fmul(3);
-  movl(eax, Address(esp, 32));
-  faddp(3);
-  fxch(2);
-  fstp_d(Address(eax, 8));
-  fmula(1);
-  faddp(1);
-  fstp_d(Address(eax, 0));
-  jcc(Assembler::equal, B1_24);
-
-  bind(B1_23);
-  fldcw(Address(esp, 30));
-
-  bind(B1_24);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_25);
-  testb(Address(esp, 36), 2);
-  jcc(Assembler::equal, B1_33);
-
-  bind(B1_26);
-  fld_s(0);
-  testb(edx, 2);
-  fmul(1);
-  fld_s(0);
-  fmul(1);
-  jcc(Assembler::equal, B1_30);
-
-  bind(B1_27);
-  fstp_d(2);
-  fld_x(ExternalAddress(84 + CP));    //0x3ac6, 0x0ba0, 0x07
-  testl(ebx, ebx);
-  fmul(2);
-  fld_x(ExternalAddress(72 + CP));    //0xdaba, 0xfe79, 0xea
-  fmul(3);
-  fld_x(ExternalAddress(60 + CP));    //0xd84d, 0xadee, 0xc6
-  movl(eax, Address(rsp, 32));
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(48 + CP));    //0x03fe, 0x3f65, 0x7d
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(36 + CP));    //0xf024, 0x0cac, 0x00
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(24 + CP));    //0x9c2f, 0x0b60, 0x60
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(12 + CP));    //0xaaa5, 0xaaaa, 0xaa
-  faddp(2);
-  fxch(1);
-  fmulp(3);
-  fld_x(ExternalAddress(CP));    //0x0000, 0x0000, 0x00
-  faddp(1);
-  fmulp(1);
-  faddp(1);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(rsi, Address::times_8));
-  fmula(1);
-  faddp(1);
-  fstp_d(Address(eax, 8));
-  jcc(Assembler::equal, B1_29);
-
-  bind(B1_28);
-  fldcw(Address(esp, 30));
-
-  bind(B1_29);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_30);
-  fld_x(ExternalAddress(84 + SP));    //0x8610, 0x307f, 0x62
-  testl(ebx, ebx);
-  fmul(1);
-  fld_x(ExternalAddress(72 + SP));    //0x44a6, 0xed1a, 0x29
-  fmul(2);
-  fld_x(ExternalAddress(60 + SP));    //0xbf33, 0x8bb4, 0x2f
-  movl(eax, Address(rsp, 32));
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(48 + SP));    //0x825b, 0x3997, 0x2b
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(36 + SP));    //0x45f6, 0xb616, 0x1d
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(24 + SP));    //0xc527, 0x0d00, 0x00
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(12 + SP));    //0x8887, 0x8888, 0x88
-  faddp(2);
-  fxch(1);
-  fmulp(2);
-  fld_x(ExternalAddress(SP));    //0xaaab, 0xaaaa, 0xaa
-  faddp(1);
-  fmulp(2);
-  faddp(1);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(rsi, Address::times_8));
-  fmulp(2);
-  fmul(1);
-  faddp(1);
-  fstp_d(Address(eax, 8));
-  jcc(Assembler::equal, B1_32);
-
-  bind(B1_31);
-  fldcw(Address(esp, 30));
-
-  bind(B1_32);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_33);
-  testb(Address(esp, 36), 1);
-  jcc(Assembler::equal, B1_41);
-
-  bind(B1_34);
-  fld_s(0);
-  testb(edx, 2);
-  fmul(1);
-  fld_s(0);
-  fmul(1);
-  jcc(Assembler::equal, B1_38);
-
-  bind(B1_35);
-  fld_x(ExternalAddress(84 + SP));    //0x8610, 0x307f, 0x62
-  testl(ebx, ebx);
-  fmul(1);
-  fld_x(ExternalAddress(72 + SP));    //0x44a6, 0xed1a, 0x29
-  fmul(2);
-  fld_x(ExternalAddress(60 + SP));    //0xbf33, 0x8bb4, 0x2f
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(48 + SP));    //0x825b, 0x3997, 0x2b
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(36 + SP));    //0x45f6, 0xb616, 0x1d
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(24 + SP));    //0xc527, 0x0d00, 0x00
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(12 + SP));    //0x8887, 0x8888, 0x88
-  faddp(2);
-  fxch(1);
-  fmulp(2);
-  fld_x(ExternalAddress(SP));    //0xaaab, 0xaaaa, 0xaa
-  faddp(1);
-  fmulp(2);
-  faddp(1);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmulp(2);
-  fmul(1);
-  movl(eax, Address(esp, 32));
-  faddp(1);
-  fstp_d(Address(eax, 0));
-  jcc(Assembler::equal, B1_37);
-
-  bind(B1_36);
-  fldcw(Address(esp, 30));
-
-  bind(B1_37);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_38);
-  fstp_d(2);
-  fld_x(ExternalAddress(84 + CP));    //0x3ac6, 0x0ba0, 0x07
-  testl(ebx, ebx);
-  fmul(2);
-  fld_x(ExternalAddress(72 + CP));    //0xdaba, 0xfe79, 0xea
-  fmul(3);
-  fld_x(ExternalAddress(60 + CP));    //0xd84d, 0xadee, 0xc6
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(48 + CP));    //0x03fe, 0x3f65, 0x7d
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(36 + CP));    //0xf024, 0x0cac, 0x00
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(24 + CP));    //0x9c2f, 0x0b60, 0x60
-  faddp(2);
-  fxch(1);
-  fmul(3);
-  fld_x(ExternalAddress(12 + CP));    //0xaaa5, 0xaaaa, 0xaa
-  faddp(2);
-  fxch(1);
-  fmulp(3);
-  fld_x(ExternalAddress(CP));    //0x0000, 0x0000, 0x00
-  faddp(1);
-  fmulp(1);
-  faddp(1);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmula(1);
-  movl(eax, Address(esp, 32));
-  faddp(1);
-  fstp_d(Address(eax, 0));
-  jcc(Assembler::equal, B1_40);
-
-  bind(B1_39);
-  fldcw(Address(esp, 30));
-  bind(B1_40);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-  bind(B1_41);
-  fstp_d(0);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-  bind(B1_42);
-  xorl(ebx, ebx);
-  jmp(B1_8);
-  bind(B1_43);
-  xorl(ebx, ebx);
-  jmp(B1_15);
-}
-
-ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_sin[] =
-{
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
-    0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
-    0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
-    0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
-    0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
-    0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
-    0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
-    0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
-    0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
-    0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
-    0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
-    0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
-    0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
-    0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
-    0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
-    0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
-    0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
-    0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
-    0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
-    0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
-    0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
-    0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
-    0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
-    0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
-    0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
-    0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
-    0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
-    0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
-    0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
-    0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
-    0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
-    0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
-    0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
-    0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
-    0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
-    0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
-    0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
-    0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
-    0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
-    0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
-    0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
-    0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
-    0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
-    0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
-    0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
-    0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
-    0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
-    0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
-    0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
-    0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
-    0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
-    0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
-    0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
-    0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
-    0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
-    0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
-    0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
-    0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
-    0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
-    0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
-    0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
-    0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
-    0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
-    0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
-    0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
-    0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
-    0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
-    0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
-    0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
-    0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
-    0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
-    0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
-    0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
-    0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
-    0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
-    0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
-    0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
-    0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
-    0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
-    0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
-    0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
-    0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
-    0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
-    0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
-    0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
-    0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
-    0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
-    0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
-    0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
-    0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
-    0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
-    0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
-    0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
-    0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
-    0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
-    0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
-    0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
-    0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
-    0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
-    0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
-    0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
-    0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
-    0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL,
-    0x00000000UL, 0xffffffffUL, 0x3fefffffUL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x80000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x3fe00000UL,
-    0x00000000UL, 0x3fe00000UL
-};
-
-void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
-                              XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                              Register eax, Register ebx, Register edx) {
-
-  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
-  Label L_2TAG_PACKET_4_0_2;
-
-  assert_different_registers(eax, ebx, edx);
-
-  address static_const_table_sin = (address)_static_const_table_sin;
-
-  subl(rsp, 120);
-  movl(Address(rsp, 56), ebx);
-  lea(ebx, ExternalAddress(static_const_table_sin));
-  movsd(xmm0, Address(rsp, 128));
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32767);
-  subl(eax, 12336);
-  cmpl(eax, 4293);
-  jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
-  movsd(xmm1, Address(ebx, 2160));
-  mulsd(xmm1, xmm0);
-  movsd(xmm5, Address(ebx, 2272));
-  movdqu(xmm4, Address(ebx, 2256));
-  pand(xmm4, xmm0);
-  por(xmm5, xmm4);
-  movsd(xmm3, Address(ebx, 2128));
-  movdqu(xmm2, Address(ebx, 2112));
-  addpd(xmm1, xmm5);
-  cvttsd2sil(edx, xmm1);
-  cvtsi2sdl(xmm1, edx);
-  mulsd(xmm3, xmm1);
-  unpcklpd(xmm1, xmm1);
-  addl(edx, 1865216);
-  movdqu(xmm4, xmm0);
-  andl(edx, 63);
-  movdqu(xmm5, Address(ebx, 2096));
-  lea(eax, Address(ebx, 0));
-  shll(edx, 5);
-  addl(eax, edx);
-  mulpd(xmm2, xmm1);
-  subsd(xmm0, xmm3);
-  mulsd(xmm1, Address(ebx, 2144));
-  subsd(xmm4, xmm3);
-  movsd(xmm7, Address(eax, 8));
-  unpcklpd(xmm0, xmm0);
-  movapd(xmm3, xmm4);
-  subsd(xmm4, xmm2);
-  mulpd(xmm5, xmm0);
-  subpd(xmm0, xmm2);
-  movdqu(xmm6, Address(ebx, 2064));
-  mulsd(xmm7, xmm4);
-  subsd(xmm3, xmm4);
-  mulpd(xmm5, xmm0);
-  mulpd(xmm0, xmm0);
-  subsd(xmm3, xmm2);
-  movdqu(xmm2, Address(eax, 0));
-  subsd(xmm1, xmm3);
-  movsd(xmm3, Address(eax, 24));
-  addsd(xmm2, xmm3);
-  subsd(xmm7, xmm2);
-  mulsd(xmm2, xmm4);
-  mulpd(xmm6, xmm0);
-  mulsd(xmm3, xmm4);
-  mulpd(xmm2, xmm0);
-  mulpd(xmm0, xmm0);
-  addpd(xmm5, Address(ebx, 2080));
-  mulsd(xmm4, Address(eax, 0));
-  addpd(xmm6, Address(ebx, 2048));
-  mulpd(xmm5, xmm0);
-  movapd(xmm0, xmm3);
-  addsd(xmm3, Address(eax, 8));
-  mulpd(xmm1, xmm7);
-  movapd(xmm7, xmm4);
-  addsd(xmm4, xmm3);
-  addpd(xmm6, xmm5);
-  movsd(xmm5, Address(eax, 8));
-  subsd(xmm5, xmm3);
-  subsd(xmm3, xmm4);
-  addsd(xmm1, Address(eax, 16));
-  mulpd(xmm6, xmm2);
-  addsd(xmm5, xmm0);
-  addsd(xmm3, xmm7);
-  addsd(xmm1, xmm5);
-  addsd(xmm1, xmm3);
-  addsd(xmm1, xmm6);
-  unpckhpd(xmm6, xmm6);
-  addsd(xmm1, xmm6);
-  addsd(xmm4, xmm1);
-  movsd(Address(rsp, 0), xmm4);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_0_0_2);
-  jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
-  shrl(eax, 4);
-  cmpl(eax, 268434685);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2);
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_3_0_2);
-  movsd(xmm3, Address(ebx, 2192));
-  mulsd(xmm3, xmm0);
-  subsd(xmm3, xmm0);
-  mulsd(xmm3, Address(ebx, 2208));
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_2_0_2);
-  movl(eax, Address(rsp, 132));
-  andl(eax, 2146435072);
-  cmpl(eax, 2146435072);
-  jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
-  subl(rsp, 32);
-  movsd(Address(rsp, 0), xmm0);
-  lea(eax, Address(rsp, 40));
-  movl(Address(rsp, 8), eax);
-  movl(eax, 2);
-  movl(Address(rsp, 12), eax);
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
-  addl(rsp, 32);
-  fld_d(Address(rsp, 16));
-  jmp(L_2TAG_PACKET_1_0_2);
-  bind(L_2TAG_PACKET_4_0_2);
-  fld_d(Address(rsp, 128));
-  fmul_d(Address(ebx, 2240));
-  bind(L_2TAG_PACKET_1_0_2);
-  movl(ebx, Address(rsp, 56));
-}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp
deleted file mode 100644
index 4e8be8a1f1d..00000000000
--- a/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp
+++ /dev/null
@@ -1,1172 +0,0 @@
-/*
-* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved.
-* Intel Math Library (LIBM) Source Code
-*
-* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-*
-* This code is free software; you can redistribute it and/or modify it
-* under the terms of the GNU General Public License version 2 only, as
-* published by the Free Software Foundation.
-*
-* This code is distributed in the hope that it will be useful, but WITHOUT
-* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-* version 2 for more details (a copy is included in the LICENSE file that
-* accompanied this code).
-*
-* You should have received a copy of the GNU General Public License version
-* 2 along with this work; if not, write to the Free Software Foundation,
-* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-*
-* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-* or visit www.oracle.com if you need additional information or have any
-* questions.
-*
-*/
-
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/******************************************************************************/
-//                     ALGORITHM DESCRIPTION - TAN()
-//                     ---------------------
-//
-// Polynomials coefficients and other constants.
-//
-// Note that in this algorithm, there is a different polynomial for
-// each breakpoint, so there are 32 sets of polynomial coefficients
-// as well as 32 instances of the other constants.
-//
-// The polynomial coefficients and constants are offset from the start
-// of the main block as follows:
-//
-//   0:  c8 | c0
-//  16:  c9 | c1
-//  32: c10 | c2
-//  48: c11 | c3
-//  64: c12 | c4
-//  80: c13 | c5
-//  96: c14 | c6
-// 112: c15 | c7
-// 128: T_hi
-// 136: T_lo
-// 144: Sigma
-// 152: T_hl
-// 160: Tau
-// 168: Mask
-// 176: (end of block)
-//
-// The total table size is therefore 5632 bytes.
-//
-// Note that c0 and c1 are always zero. We could try storing
-// other constants here, and just loading the low part of the
-// SIMD register in these cases, after ensuring the high part
-// is zero.
-//
-// The higher terms of the polynomial are computed in the *low*
-// part of the SIMD register. This is so we can overlap the
-// multiplication by r^8 and the unpacking of the other part.
-//
-// The constants are:
-// T_hi + T_lo = accurate constant term in power series
-// Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit)
-// Tau = multiplier for the reciprocal, always -1 or 0
-//
-// The basic reconstruction formula using these constants is:
-//
-// High = tau * recip_hi + t_hi
-// Med = (sgn * r + t_hl * r)_hi
-// Low = (sgn * r + t_hl * r)_lo +
-//       tau * recip_lo + T_lo + (T_hl + sigma) * c + pol
-//
-// where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15
-//
-// (c0 = c1 = 0, but using them keeps SIMD regularity)
-//
-// We then do a compensated sum High + Med, add the low parts together
-// and then do the final sum.
-//
-// Here recip_hi + recip_lo is an accurate reciprocal of the remainder
-// modulo pi/2
-//
-// Special cases:
-//  tan(NaN) = quiet NaN, and raise invalid exception
-//  tan(INF) = NaN and raise invalid exception
-//  tan(+/-0) = +/-0
-//
-/******************************************************************************/
-
-// The 32 bit code is at most SSE2 compliant
-
-ATTRIBUTE_ALIGNED(16) static const jushort _TP[] =
-{
-    0x4cd6, 0xaf6c, 0xc710, 0xc662, 0xbffd, 0x0000, 0x4b06, 0xb0ac, 0xd3b2, 0xcc2c,
-    0x3ff9, 0x0000, 0x00e3, 0xc850, 0xaa28, 0x9533, 0xbff3, 0x0000, 0x2ff0, 0x466d,
-    0x1a3b, 0xb266, 0x3fe5, 0x0000
-};
-
-ATTRIBUTE_ALIGNED(16) static const jushort _TQ[] =
-{
-    0x399c, 0x8391, 0x154c, 0x94ca, 0xbfff, 0x0000, 0xb6a3, 0xc36a, 0x44e2, 0x8a2c,
-    0x3ffe, 0x0000, 0xb70f, 0xd068, 0xa6ce, 0xe9dd, 0xbff9, 0x0000, 0x820f, 0x51ce,
-    0x7d76, 0x9bff, 0x3ff3, 0x0000
-};
-
-ATTRIBUTE_ALIGNED(16) static const jushort _GP[] =
-{
-    0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffd, 0x0000, 0xb62f, 0x0b60, 0x60b6, 0xb60b,
-    0xbff9, 0x0000, 0xdfa7, 0x08aa, 0x55e0, 0x8ab3, 0xbff6, 0x0000, 0x85a0, 0xa819,
-    0xbc99, 0xddeb, 0xbff2, 0x0000, 0x7065, 0x6a37, 0x795f, 0xb354, 0xbfef, 0x0000,
-    0xa8f9, 0x83f1, 0x2ec8, 0x9140, 0xbfec, 0x0000, 0xf3ca, 0x8c96, 0x8e0b, 0xeb6d,
-    0xbfe8, 0x0000, 0x355b, 0xd910, 0x67c9, 0xbed3, 0xbfe5, 0x0000, 0x286b, 0xb49e,
-    0xb854, 0x9a98, 0xbfe2, 0x0000, 0x0871, 0x1a2f, 0x6477, 0xfcc4, 0xbfde, 0x0000,
-    0xa559, 0x1da9, 0xaed2, 0xba76, 0xbfdb, 0x0000, 0x00a3, 0x7fea, 0x9bc3, 0xf205,
-    0xbfd8, 0x0000
-};
-
-void MacroAssembler::libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) {
-  Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12;
-  Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23;
-  Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34;
-  Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_43;
-
-  assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp);
-
-  address TP = (address)_TP;
-  address TQ = (address)_TQ;
-  address GP = (address)_GP;
-
-  bind(B1_1);
-  push(ebp);
-  movl(ebp, esp);
-  andl(esp, -64);
-  push(esi);
-  push(edi);
-  push(ebx);
-  subl(esp, 52);
-  movl(eax, Address(ebp, 16));
-  movl(ebx, Address(ebp, 20));
-  movl(Address(esp, 40), eax);
-
-  bind(B1_2);
-  fnstcw(Address(esp, 38));
-
-  bind(B1_3);
-  movl(edx, Address(ebp, 12));
-  movl(eax, edx);
-  andl(eax, 2147483647);
-  shrl(edx, 31);
-  movl(Address(esp, 44), edx);
-  cmpl(eax, 1104150528);
-  jcc(Assembler::aboveEqual, B1_11);
-
-  bind(B1_4);
-  movsd(xmm1, Address(ebp, 8));
-  movzwl(ecx, Address(esp, 38));
-  movl(edx, ecx);
-  andl(edx, 768);
-  andps(xmm1, ExternalAddress(L_2IL0FLOATPACKET_0));    //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL
-  cmpl(edx, 768);
-  movsd(xmm0, ExternalAddress(PI4_INV));    ////0x6dc9c883UL, 0x3ff45f30UL
-  mulsd(xmm0, xmm1);
-  movsd(Address(ebp, 8), xmm1);
-  movsd(Address(esp, 0), xmm0);
-  jcc(Assembler::equal, B1_39);
-
-  bind(B1_5);
-  orl(ecx, -64768);
-  movw(Address(esp, 36), ecx);
-
-  bind(B1_6);
-  fldcw(Address(esp, 36));
-
-  bind(B1_7);
-  movsd(xmm1, Address(ebp, 8));
-  movl(edi, 1);
-
-  bind(B1_8);
-  movl(Address(esp, 12), esi);
-  movl(esi, Address(esp, 4));
-  movl(edx, esi);
-  movl(Address(esp, 24), edi);
-  movl(edi, esi);
-  shrl(edi, 20);
-  andl(edx, 1048575);
-  movl(ecx, edi);
-  orl(edx, 1048576);
-  negl(ecx);
-  addl(edi, 13);
-  movl(Address(esp, 8), ebx);
-  addl(ecx, 19);
-  movl(ebx, edx);
-  movl(Address(esp, 28), ecx);
-  shrl(ebx);
-  movl(ecx, edi);
-  shll(edx);
-  movl(ecx, Address(esp, 28));
-  movl(edi, Address(esp, 0));
-  shrl(edi);
-  orl(edx, edi);
-  cmpl(esi, 1094713344);
-  movsd(Address(esp, 16), xmm1);
-  fld_d(Address(esp, 16));
-  cmov32(Assembler::below, edx, ebx);
-  movl(edi, Address(esp, 24));
-  movl(esi, Address(esp, 12));
-  lea(ebx, Address(edx, 1));
-  andl(ebx, -2);
-  movl(Address(esp, 16), ebx);
-  cmpl(eax, 1094713344);
-  fild_s(Address(esp, 16));
-  movl(ebx, Address(esp, 8));
-  jcc(Assembler::aboveEqual, B1_10);
-
-  bind(B1_9);
-  fld_d(ExternalAddress(PI4X3));    //0x54443000UL, 0xbfe921fbUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X3 + 8));    //0x3b39a000UL, 0x3d373dcbUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X3 + 16));    //0xe0e68948UL, 0xba845c06UL
-  fmulp(1);
-  faddp(1);
-  jmp(B1_17);
-
-  bind(B1_10);
-  fld_d(ExternalAddress(PI4X4));    //0x54400000UL, 0xbfe921fbUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X4 + 8));    //0x1a600000UL, 0xbdc0b461UL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X4 + 16));    //0x2e000000UL, 0xbb93198aUL
-  fmul(1);
-  faddp(2);
-  fld_d(ExternalAddress(PI4X4 + 24));    //0x252049c1UL, 0xb96b839aUL
-  fmulp(1);
-  faddp(1);
-  jmp(B1_17);
-
-  bind(B1_11);
-  movzwl(edx, Address(esp, 38));
-  movl(eax, edx);
-  andl(eax, 768);
-  cmpl(eax, 768);
-  jcc(Assembler::equal, B1_40);
-
-  bind(B1_12);
-  orl(edx, -64768);
-  movw(Address(esp, 36), edx);
-
-  bind(B1_13);
-  fldcw(Address(esp, 36));
-
-  bind(B1_14);
-  movl(edi, 1);
-
-  bind(B1_15);
-  movsd(xmm0, Address(ebp, 8));
-  addl(esp, -32);
-  andps(xmm0, ExternalAddress(L_2IL0FLOATPACKET_0));    //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL
-  lea(eax, Address(esp, 32));
-  movsd(Address(eax, 16), xmm0);
-  fld_d(Address(eax, 16));
-  fstp_x(Address(esp, 0));
-  movl(Address(esp, 12), 0);
-  movl(Address(esp, 16), eax);
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l())));
-
-  bind(B1_43);
-  movl(edx, eax);
-  addl(esp, 32);
-
-  bind(B1_16);
-  fld_d(Address(esp, 0));
-  fld_d(Address(esp, 8));
-  faddp(1);
-
-  bind(B1_17);
-  movl(eax, ebx);
-  andl(eax, 3);
-  cmpl(eax, 3);
-  jcc(Assembler::notEqual, B1_24);
-
-  bind(B1_18);
-  fld_d(ExternalAddress(ONES));
-  incl(edx);
-  fdiv(1);
-  testb(edx, 2);
-  fstp_x(Address(esp, 24));
-  fld_s(0);
-  fmul(1);
-  fld_s(0);
-  fmul(1);
-  fld_x(ExternalAddress(36 + TP));    //0x2ff0, 0x466d, 0x1a
-  fmul(2);
-  fld_x(ExternalAddress(24 + TP));    //0x00e3, 0xc850, 0xaa
-  faddp(1);
-  fmul(2);
-  fld_x(ExternalAddress(12 + TP));    //0x4b06, 0xb0ac, 0xd3
-  faddp(1);
-  fmul(2);
-  fld_x(ExternalAddress(36 + TQ));    //0x820f, 0x51ce, 0x7d
-  fmul(3);
-  fld_x(ExternalAddress(24 + TQ));    //0xb70f, 0xd068, 0xa6
-  faddp(1);
-  fmul(3);
-  fld_x(ExternalAddress(12 + TQ));    //0xb6a3, 0xc36a, 0x44
-  faddp(1);
-  fmul(3);
-  fld_x(ExternalAddress(TQ));    //0x399c, 0x8391, 0x15
-  faddp(1);
-  fld_x(ExternalAddress(TP));    //0x4cd6, 0xaf6c, 0xc7
-  faddp(2);
-  fld_x(ExternalAddress(132 + GP));    //0x00a3, 0x7fea, 0x9b
-  fmul(3);
-  fld_x(ExternalAddress(120 + GP));    //0xa559, 0x1da9, 0xae
-  fmul(4);
-  fld_x(ExternalAddress(108 + GP));    //0x0871, 0x1a2f, 0x64
-  faddp(2);
-  fxch(1);
-  fmul(4);
-  fld_x(ExternalAddress(96 + GP));    //0x286b, 0xb49e, 0xb8
-  faddp(2);
-  fxch(1);
-  fmul(4);
-  fld_x(ExternalAddress(84 + GP));    //0x355b, 0xd910, 0x67
-  faddp(2);
-  fxch(1);
-  fmul(4);
-  fld_x(ExternalAddress(72 + GP));    //0x8c96, 0x8e0b, 0xeb
-  faddp(2);
-  fxch(1);
-  fmul(4);
-  fld_x(ExternalAddress(60 + GP));    //0xa8f9, 0x83f1, 0x2e
-  faddp(2);
-  fxch(1);
-  fmul(4);
-  fld_x(ExternalAddress(48 + GP));    //0x7065, 0x6a37, 0x79
-  faddp(2);
-  fxch(1);
-  fmul(4);
-  fld_x(ExternalAddress(36 + GP));    //0x85a0, 0xa819, 0xbc
-  faddp(2);
-  fxch(1);
-  fmul(4);
-  fld_x(ExternalAddress(24 + GP));    //0xdfa7, 0x08aa, 0x55
-  faddp(2);
-  fxch(1);
-  fmulp(4);
-  fld_x(ExternalAddress(12 + GP));    //0xb62f, 0x0b60, 0x60
-  faddp(1);
-  fmul(4);
-  fmul(5);
-  fld_x(ExternalAddress(GP));    //0xaaab, 0xaaaa, 0xaa
-  faddp(4);
-  fxch(3);
-  fmul(5);
-  faddp(3);
-  jcc(Assembler::equal, B1_20);
-
-  bind(B1_19);
-  fld_x(Address(esp, 24));
-  fxch(1);
-  fdivrp(2);
-  fxch(1);
-  fmulp(3);
-  movl(eax, Address(esp, 44));
-  xorl(eax, 1);
-  fxch(2);
-  fmul(3);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmula(2);
-  fmula(3);
-  fxch(3);
-  faddp(2);
-  fxch(1);
-  fstp_d(Address(esp, 16));
-  fmul(1);
-  fxch(1);
-  fmulp(2);
-  movsd(xmm0, Address(esp, 16));
-  faddp(1);
-  fstp_d(Address(esp, 16));
-  movsd(xmm1, Address(esp, 16));
-  jmp(B1_21);
-
-  bind(B1_20);
-  fdivrp(1);
-  fmulp(2);
-  fxch(1);
-  fmul(2);
-  movl(eax, Address(esp, 44));
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmula(1);
-  fmula(3);
-  fxch(3);
-  faddp(1);
-  fstp_d(Address(esp, 16));
-  fmul(1);
-  fld_x(Address(esp, 24));
-  fmulp(2);
-  movsd(xmm0, Address(esp, 16));
-  faddp(1);
-  fstp_d(Address(esp, 16));
-  movsd(xmm1, Address(esp, 16));
-
-  bind(B1_21);
-  testl(edi, edi);
-  jcc(Assembler::equal, B1_23);
-
-  bind(B1_22);
-  fldcw(Address(esp, 38));
-
-  bind(B1_23);
-  movl(eax, Address(esp, 40));
-  movsd(Address(eax, 0), xmm0);
-  movsd(Address(eax, 8), xmm1);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_24);
-  testb(ebx, 2);
-  jcc(Assembler::equal, B1_31);
-
-  bind(B1_25);
-  incl(edx);
-  fld_s(0);
-  fmul(1);
-  testb(edx, 2);
-  jcc(Assembler::equal, B1_27);
-
-  bind(B1_26);
-  fld_d(ExternalAddress(ONES));
-  fdiv(2);
-  fld_s(1);
-  fmul(2);
-  fld_x(ExternalAddress(132 + GP));    //0x00a3, 0x7fea, 0x9b
-  fmul(1);
-  fld_x(ExternalAddress(120 + GP));    //0xa559, 0x1da9, 0xae
-  fmul(2);
-  fld_x(ExternalAddress(108 + GP));    //0x67c9, 0xbed3, 0xbf
-  movl(eax, Address(esp, 44));
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  xorl(eax, 1);
-  fld_x(ExternalAddress(96 + GP));    //0x286b, 0xb49e, 0xb8
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(84 + GP));    //0x355b, 0xd910, 0x67
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(72 + GP));    //0xf3ca, 0x8c96, 0x8e
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(60 + GP));    //0xa8f9, 0x83f1, 0x2e
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(48 + GP));    //0x7065, 0x6a37, 0x79
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(36 + GP));    //0x85a0, 0xa819, 0xbc
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(24 + GP));    //0xdfa7, 0x08aa, 0x55
-  faddp(2);
-  fxch(1);
-  fmulp(2);
-  fld_x(ExternalAddress(12 + GP));    //0xb62f, 0x0b60, 0x60
-  faddp(1);
-  fmulp(3);
-  fld_x(ExternalAddress(GP));    //0xaaab, 0xaaaa, 0xaa
-  faddp(1);
-  fmul(3);
-  fxch(2);
-  fmulp(3);
-  fxch(1);
-  faddp(2);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmula(2);
-  fmulp(1);
-  faddp(1);
-  fstp_d(Address(esp, 16));
-  movsd(xmm0, Address(esp, 16));
-  jmp(B1_28);
-
-  bind(B1_27);
-  fld_x(ExternalAddress(36 + TP));    //0x2ff0, 0x466d, 0x1a
-  fmul(1);
-  fld_x(ExternalAddress(24 + TP));    //0x00e3, 0xc850, 0xaa
-  movl(eax, Address(esp, 44));
-  faddp(1);
-  fmul(1);
-  fld_x(ExternalAddress(36 + TQ));    //0x820f, 0x51ce, 0x7d
-  fmul(2);
-  fld_x(ExternalAddress(24 + TQ));    //0xb70f, 0xd068, 0xa6
-  faddp(1);
-  fmul(2);
-  fld_x(ExternalAddress(12 + TQ));    //0xb6a3, 0xc36a, 0x44
-  faddp(1);
-  fmul(2);
-  fld_x(ExternalAddress(TQ));    //0x399c, 0x8391, 0x15
-  faddp(1);
-  fld_x(ExternalAddress(12 + TP));    //0x4b06, 0xb0ac, 0xd3
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(TP));    //0x4cd6, 0xaf6c, 0xc7
-  faddp(1);
-  fdivrp(1);
-  fmulp(1);
-  fmul(1);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmula(1);
-  fmulp(2);
-  faddp(1);
-  fstp_d(Address(esp, 16));
-  movsd(xmm0, Address(esp, 16));
-
-  bind(B1_28);
-  testl(edi, edi);
-  jcc(Assembler::equal, B1_30);
-
-  bind(B1_29);
-  fldcw(Address(esp, 38));
-
-  bind(B1_30);
-  movl(eax, Address(esp, 40));
-  movsd(Address(eax, 0), xmm0);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  movl(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_31);
-  testb(ebx, 1);
-  jcc(Assembler::equal, B1_38);
-
-  bind(B1_32);
-  incl(edx);
-  fld_s(0);
-  fmul(1);
-  testb(edx, 2);
-  jcc(Assembler::equal, B1_34);
-
-  bind(B1_33);
-  fld_x(ExternalAddress(36 + TP));    //0x2ff0, 0x466d, 0x1a
-  fmul(1);
-  fld_x(ExternalAddress(24 + TP));    //0x00e3, 0xc850, 0xaa
-  movl(eax, Address(esp, 44));
-  faddp(1);
-  fmul(1);
-  xorl(eax, 1);
-  fld_x(ExternalAddress(36 + TQ));    //0x820f, 0x51ce, 0x7d
-  fmul(2);
-  fld_x(ExternalAddress(24 + TQ));    //0xb70f, 0xd068, 0xa6
-  faddp(1);
-  fmul(2);
-  fld_x(ExternalAddress(12 + TQ));    //0xb6a3, 0xc36a, 0x44
-  faddp(1);
-  fmul(2);
-  fld_x(ExternalAddress(TQ));    //0x399c, 0x8391, 0x15
-  faddp(1);
-  fld_x(ExternalAddress(12 + TP));    //0x4b06, 0xb0ac, 0xd3
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(TP));    //0x4cd6, 0xaf6c, 0xc7
-  faddp(1);
-  fdivrp(1);
-  fmulp(1);
-  fmul(1);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmula(1);
-  fmulp(2);
-  faddp(1);
-  fstp_d(Address(esp, 16));
-  movsd(xmm0, Address(esp, 16));
-  jmp(B1_35);
-
-  bind(B1_34);
-  fld_d(ExternalAddress(ONES));
-  fdiv(2);
-  fld_s(1);
-  fmul(2);
-  fld_x(ExternalAddress(132 + GP));    //0x00a3, 0x7fea, 0x9b
-  fmul(1);
-  fld_x(ExternalAddress(120 + GP));    //0xa559, 0x1da9, 0xae
-  fmul(2);
-  fld_x(ExternalAddress(108 + GP));    //0x67c9, 0xbed3, 0xbf
-  movl(eax, Address(esp, 44));
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(96 + GP));    //0x286b, 0xb49e, 0xb8
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(84 + GP));    //0x355b, 0xd910, 0x67
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(72 + GP));    //0xf3ca, 0x8c96, 0x8e
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(60 + GP));    //0xa8f9, 0x83f1, 0x2e
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(48 + GP));    //0x7065, 0x6a37, 0x79
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(36 + GP));    //0x85a0, 0xa819, 0xbc
-  faddp(2);
-  fxch(1);
-  fmul(2);
-  fld_x(ExternalAddress(24 + GP));    //0xdfa7, 0x08aa, 0x55
-  faddp(2);
-  fxch(1);
-  fmulp(2);
-  fld_x(ExternalAddress(12 + GP));    //0xb62f, 0x0b60, 0x60
-  faddp(1);
-  fmulp(3);
-  fld_x(ExternalAddress(GP));    //0xaaab, 0xaaaa, 0xaa
-  faddp(1);
-  fmul(3);
-  fxch(2);
-  fmulp(3);
-  fxch(1);
-  faddp(2);
-  fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8));
-  fmula(2);
-  fmulp(1);
-  faddp(1);
-  fstp_d(Address(esp, 16));
-  movsd(xmm0, Address(esp, 16));
-
-  bind(B1_35);
-  testl(edi, edi);
-  jcc(Assembler::equal, B1_37);
-
-  bind(B1_36);
-  fldcw(Address(esp, 38));
-
-  bind(B1_37);
-  movl(eax, Address(esp, 40));
-  movsd(Address(eax, 8), xmm0);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  mov(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_38);
-  fstp_d(0);
-  addl(esp, 52);
-  pop(ebx);
-  pop(edi);
-  pop(esi);
-  mov(esp, ebp);
-  pop(ebp);
-  ret(0);
-
-  bind(B1_39);
-  xorl(edi, edi);
-  jmp(B1_8);
-
-  bind(B1_40);
-  xorl(edi, edi);
-  jmp(B1_15);
-}
-
-ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_tan[] =
-{
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x882c10faUL,
-    0x3f9664f4UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x55e6c23dUL, 0x3f8226e3UL, 0x55555555UL,
-    0x3fd55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x0e157de0UL, 0x3f6d6d3dUL, 0x11111111UL, 0x3fc11111UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x452b75e3UL, 0x3f57da36UL,
-    0x1ba1ba1cUL, 0x3faba1baUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL,
-    0x3f953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL,
-    0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0x3f85ad63UL, 0xdc230b9bUL,
-    0x3fb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL,
-    0x77bb08baUL, 0x3f757c85UL, 0xb6247521UL, 0x3fb1381eUL, 0x5922170cUL,
-    0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0x3f64e391UL,
-    0x3e666320UL, 0x3fa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL,
-    0x3fafa8aeUL, 0x8c5b2da2UL, 0x3fb936bbUL, 0x4e88f7a5UL, 0x3c587d05UL,
-    0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x5a279ea3UL, 0x3faa3407UL,
-    0x00000000UL, 0x00000000UL, 0x432d65faUL, 0x3fa70153UL, 0x00000000UL,
-    0x00000000UL, 0x891a4602UL, 0x3f9d03efUL, 0xd62ca5f8UL, 0x3fca77d9UL,
-    0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 0x3fd8cf51UL, 0xb58fd909UL,
-    0x3f8f88e3UL, 0x01771ceaUL, 0x3fc2b154UL, 0xf3562f8eUL, 0x3f888f57UL,
-    0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 0x3f80f44cUL, 0x214368e9UL,
-    0x3fb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 0x172dbbf0UL, 0x3fb6cb8eUL,
-    0xe0553158UL, 0x3fc975f5UL, 0x593fe814UL, 0x3c2ef5d3UL, 0x00000000UL,
-    0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x9314533eUL, 0x3fbb8ec5UL, 0x00000000UL,
-    0x00000000UL, 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL,
-    0xdcb427fdUL, 0x3fb13950UL, 0xd87ab0bbUL, 0x3fd5335eUL, 0xce0ae8a5UL,
-    0x3fabb382UL, 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0x3fa552f1UL,
-    0x59f21a6dUL, 0x3fd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL,
-    0x3fd0576cUL, 0x8f2c2950UL, 0x3f9a4898UL, 0xc0b3f22cUL, 0x3fc59462UL,
-    0x1883a4b8UL, 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL,
-    0x3fd36a08UL, 0x1dce993dUL, 0xbc6d704dUL, 0x00000000UL, 0x3ff00000UL,
-    0x2b82ab63UL, 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x56f37042UL, 0x3fccfc56UL, 0x00000000UL, 0x00000000UL,
-    0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 0x3d0e7c5dUL,
-    0x3fc50533UL, 0x9bed9b2eUL, 0x3fdf0ed9UL, 0x5fe7c47cUL, 0x3fc1f250UL,
-    0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0x3fbe5c71UL, 0x86362c20UL,
-    0x3fda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 0x3fd911bdUL,
-    0xb56658beUL, 0x3fb5e4c7UL, 0x93a2fd76UL, 0x3fd3c092UL, 0xda271794UL,
-    0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 0x3fda8279UL,
-    0xb68c1467UL, 0x3c708b2fUL, 0x00000000UL, 0x3ff00000UL, 0x980c4337UL,
-    0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0xcc03e501UL, 0x3fdff10fUL, 0x00000000UL, 0x00000000UL, 0x44a4e845UL,
-    0x3fddb63bUL, 0x00000000UL, 0x00000000UL, 0x3768ad9fUL, 0x3fdb72a4UL,
-    0x3dd01ccaUL, 0x3fe5fdb9UL, 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL,
-    0x3fe977f9UL, 0xd013b3abUL, 0x3fd78ca3UL, 0xbf0bf914UL, 0x3fe4f192UL,
-    0x4d53e730UL, 0x3fd5d060UL, 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL,
-    0x3fd4322aUL, 0x5936a835UL, 0x3fe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL,
-    0xef478605UL, 0x3fe1659eUL, 0x190834ecUL, 0x3fe11ab7UL, 0xcdb625eaUL,
-    0xbc8e564bUL, 0x00000000UL, 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL,
-    0x3ff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL,
-    0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0x3ff3972eUL, 0xe93463bdUL,
-    0x3feeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL,
-    0xa04e8ea3UL, 0x3ff4541aUL, 0x386accd3UL, 0x3ff1369eUL, 0x222a66ddUL,
-    0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0x3ff5178fUL,
-    0xddaa0031UL, 0x3ff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL,
-    0x3ff29311UL, 0x2ab7f990UL, 0x3fe561b8UL, 0x209c7df1UL, 0x3c87a8c5UL,
-    0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc7ab4d5aUL, 0x40085e24UL,
-    0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 0x400b963dUL, 0x00000000UL,
-    0x00000000UL, 0x94a7f25aUL, 0x400f37e2UL, 0x4b6261cbUL, 0x3ff5f984UL,
-    0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 0x3ffaf5a5UL, 0x7f2ce8e3UL,
-    0x4013fe8bUL, 0xfe8e54faUL, 0x3ffd7334UL, 0x670d618dUL, 0x4016a10cUL,
-    0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 0x40199c5fUL, 0x697d6eceUL,
-    0x4003006eUL, 0x83298b82UL, 0x401cfc4dUL, 0x19d490d6UL, 0x40058c19UL,
-    0x2ae42850UL, 0x3fea4300UL, 0x118e20e6UL, 0xbc7a6db8UL, 0x00000000UL,
-    0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x65965966UL, 0x40219659UL, 0x00000000UL,
-    0x00000000UL, 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL,
-    0x83cd3723UL, 0x402c8342UL, 0x00000000UL, 0x40000000UL, 0x55e6c23dUL,
-    0x403226e3UL, 0x55555555UL, 0x40055555UL, 0x34451939UL, 0x40371c96UL,
-    0xaaaaaaabUL, 0x400aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL,
-    0x40111111UL, 0xa738201fUL, 0x4042bbceUL, 0x05b05b06UL, 0x4015b05bUL,
-    0x452b75e3UL, 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL,
-    0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x4f48b8d3UL, 0xbf33eaf9UL, 0x00000000UL, 0x00000000UL,
-    0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 0xd0258911UL,
-    0xbf0abaf3UL, 0x23e49fe9UL, 0xbfab5a8cUL, 0x2d53222eUL, 0x3ef60d15UL,
-    0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0xbee1d3b5UL, 0xdbf93b8eUL,
-    0xbf84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 0x3f743924UL,
-    0x794a8297UL, 0xbeb7b7b9UL, 0xe015f797UL, 0xbf5d41f5UL, 0xe41a4a56UL,
-    0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 0xbfce49ceUL,
-    0x8c743719UL, 0x3d1eb860UL, 0x00000000UL, 0x00000000UL, 0x1b4863cfUL,
-    0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL,
-    0x535ad890UL, 0xbf2b9320UL, 0x00000000UL, 0x00000000UL, 0x018fdf1fUL,
-    0x3f16d61dUL, 0x00000000UL, 0x00000000UL, 0x0359f1beUL, 0xbf0139e4UL,
-    0xa4317c6dUL, 0xbfa67e17UL, 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL,
-    0x3f9f455bUL, 0x51ccf238UL, 0xbed55317UL, 0xf437b9acUL, 0xbf804beeUL,
-    0xc791a2b5UL, 0x3ec0e993UL, 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL,
-    0xbeaa48a2UL, 0x0a268358UL, 0xbf55a443UL, 0xdfd978e4UL, 0x3e94b61fUL,
-    0xd7767a58UL, 0x3f431806UL, 0x2aea0000UL, 0xbfc9bbe8UL, 0x7723ea61UL,
-    0xbd3a2369UL, 0x00000000UL, 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL,
-    0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL,
-    0xbf231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL,
-    0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0xbef66191UL, 0x848a46c6UL,
-    0xbfa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL,
-    0xfdd299efUL, 0xbec9dd1aUL, 0x3f8dbaafUL, 0xbf793363UL, 0x309fc6eaUL,
-    0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0xbe9dae11UL,
-    0x3e5c67b3UL, 0xbf4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL,
-    0x3f3d1eb1UL, 0x29cfc000UL, 0xbfc549ceUL, 0xbf159358UL, 0xbd397b33UL,
-    0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL,
-    0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x7d98a556UL, 0xbf1a3958UL,
-    0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 0x3f0704c2UL, 0x00000000UL,
-    0x00000000UL, 0x73742a2bUL, 0xbeed054aUL, 0x58844587UL, 0xbf9c2a13UL,
-    0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 0x3f9a48f4UL, 0xa8dc9888UL,
-    0xbebf8939UL, 0xaad4b5b8UL, 0xbf72f746UL, 0x9102efa1UL, 0x3ea88f82UL,
-    0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 0xbe90f456UL, 0x741fb4edUL,
-    0xbf46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 0xca89ff3fUL, 0x3f36db70UL,
-    0xa8a2a000UL, 0xbfc0ee13UL, 0x3da24be1UL, 0xbd338b9fUL, 0x00000000UL,
-    0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 0x00000000UL, 0x3ff00000UL,
-    0x00000000UL, 0xfffffff8UL, 0x1a154b97UL, 0xbf116b01UL, 0x00000000UL,
-    0x00000000UL, 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL,
-    0xb93820c8UL, 0xbee264d4UL, 0xbb6cbb18UL, 0xbf94ab8cUL, 0x888d4d92UL,
-    0x3ed0568bUL, 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0xbeb2f950UL,
-    0x22cf9f74UL, 0xbf6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL,
-    0x3f64aad7UL, 0x637b73afUL, 0xbe83487cUL, 0xe522591aUL, 0xbf3fc092UL,
-    0xa158e8bcUL, 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL,
-    0xbfb9477fUL, 0xc2c2d2bcUL, 0xbd135ef9UL, 0x00000000UL, 0x00000000UL,
-    0xf2fdb123UL, 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL,
-    0xfffffff8UL, 0xc41acb64UL, 0xbf05448dUL, 0x00000000UL, 0x00000000UL,
-    0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 0x9e42962dUL,
-    0xbed5aea5UL, 0x2579f8efUL, 0xbf8b2398UL, 0x288a1ed9UL, 0x3ec81441UL,
-    0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0xbea57cd3UL, 0x5766336fUL,
-    0xbf617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 0x3f62c646UL,
-    0x6b8fb29cUL, 0xbe74e3a3UL, 0xdc4c0409UL, 0xbf33f952UL, 0x9bffe365UL,
-    0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 0xbfb0cc62UL,
-    0x016b907fUL, 0xbd119cbcUL, 0x00000000UL, 0x00000000UL, 0xe6b9d8faUL,
-    0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL,
-    0x5daf22a6UL, 0xbef429d7UL, 0x00000000UL, 0x00000000UL, 0x06bca545UL,
-    0x3ef7a27dUL, 0x00000000UL, 0x00000000UL, 0x7211c19aUL, 0xbec41c3eUL,
-    0x956ed53eUL, 0xbf7ae3f4UL, 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL,
-    0x3f96f713UL, 0x36661e6cUL, 0xbe936e09UL, 0x506f9381UL, 0xbf5122e8UL,
-    0xcb6dd43fUL, 0x3e9041b9UL, 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL,
-    0xbe625a8aUL, 0xe5a0e9dcUL, 0xbf23499dUL, 0x110384ddUL, 0x3e5b1c2cUL,
-    0x68d43db6UL, 0x3f2cb899UL, 0x6ecac000UL, 0xbfa0c414UL, 0xcd7dd58cUL,
-    0x3d13500fUL, 0x00000000UL, 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL,
-    0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2bf70ebeUL, 0x3ef66a8fUL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0xd644267fUL, 0x3ec22805UL, 0x16c16c17UL, 0x3f96c16cUL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc4e09162UL,
-    0x3e8d6db2UL, 0xbc011567UL, 0x3f61566aUL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x1f79955cUL, 0x3e57da4eUL, 0x9334ef0bUL,
-    0x3f2bbd77UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x55555555UL, 0x3fd55555UL, 0x00000000UL,
-    0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x5daf22a6UL, 0x3ef429d7UL,
-    0x00000000UL, 0x00000000UL, 0x06bca545UL, 0x3ef7a27dUL, 0x00000000UL,
-    0x00000000UL, 0x7211c19aUL, 0x3ec41c3eUL, 0x956ed53eUL, 0x3f7ae3f4UL,
-    0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 0x3f96f713UL, 0x36661e6cUL,
-    0x3e936e09UL, 0x506f9381UL, 0x3f5122e8UL, 0xcb6dd43fUL, 0x3e9041b9UL,
-    0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 0x3e625a8aUL, 0xe5a0e9dcUL,
-    0x3f23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 0x68d43db6UL, 0x3f2cb899UL,
-    0x6ecac000UL, 0x3fa0c414UL, 0xcd7dd58cUL, 0xbd13500fUL, 0x00000000UL,
-    0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 0x00000000UL, 0x3ff00000UL,
-    0x00000000UL, 0xfffffff8UL, 0xc41acb64UL, 0x3f05448dUL, 0x00000000UL,
-    0x00000000UL, 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL,
-    0x9e42962dUL, 0x3ed5aea5UL, 0x2579f8efUL, 0x3f8b2398UL, 0x288a1ed9UL,
-    0x3ec81441UL, 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0x3ea57cd3UL,
-    0x5766336fUL, 0x3f617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL,
-    0x3f62c646UL, 0x6b8fb29cUL, 0x3e74e3a3UL, 0xdc4c0409UL, 0x3f33f952UL,
-    0x9bffe365UL, 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL,
-    0x3fb0cc62UL, 0x016b907fUL, 0x3d119cbcUL, 0x00000000UL, 0x00000000UL,
-    0xe6b9d8faUL, 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL,
-    0xfffffff8UL, 0x1a154b97UL, 0x3f116b01UL, 0x00000000UL, 0x00000000UL,
-    0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 0xb93820c8UL,
-    0x3ee264d4UL, 0xbb6cbb18UL, 0x3f94ab8cUL, 0x888d4d92UL, 0x3ed0568bUL,
-    0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0x3eb2f950UL, 0x22cf9f74UL,
-    0x3f6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 0x3f64aad7UL,
-    0x637b73afUL, 0x3e83487cUL, 0xe522591aUL, 0x3f3fc092UL, 0xa158e8bcUL,
-    0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 0x3fb9477fUL,
-    0xc2c2d2bcUL, 0x3d135ef9UL, 0x00000000UL, 0x00000000UL, 0xf2fdb123UL,
-    0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL,
-    0x7d98a556UL, 0x3f1a3958UL, 0x00000000UL, 0x00000000UL, 0x9d88dc01UL,
-    0x3f0704c2UL, 0x00000000UL, 0x00000000UL, 0x73742a2bUL, 0x3eed054aUL,
-    0x58844587UL, 0x3f9c2a13UL, 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL,
-    0x3f9a48f4UL, 0xa8dc9888UL, 0x3ebf8939UL, 0xaad4b5b8UL, 0x3f72f746UL,
-    0x9102efa1UL, 0x3ea88f82UL, 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL,
-    0x3e90f456UL, 0x741fb4edUL, 0x3f46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL,
-    0xca89ff3fUL, 0x3f36db70UL, 0xa8a2a000UL, 0x3fc0ee13UL, 0x3da24be1UL,
-    0x3d338b9fUL, 0x00000000UL, 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL,
-    0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL,
-    0x3f231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL,
-    0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0x3ef66191UL, 0x848a46c6UL,
-    0x3fa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL,
-    0xfdd299efUL, 0x3ec9dd1aUL, 0x3f8dbaafUL, 0x3f793363UL, 0x309fc6eaUL,
-    0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0x3e9dae11UL,
-    0x3e5c67b3UL, 0x3f4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL,
-    0x3f3d1eb1UL, 0x29cfc000UL, 0x3fc549ceUL, 0xbf159358UL, 0x3d397b33UL,
-    0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL,
-    0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x535ad890UL, 0x3f2b9320UL,
-    0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 0x3f16d61dUL, 0x00000000UL,
-    0x00000000UL, 0x0359f1beUL, 0x3f0139e4UL, 0xa4317c6dUL, 0x3fa67e17UL,
-    0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 0x3f9f455bUL, 0x51ccf238UL,
-    0x3ed55317UL, 0xf437b9acUL, 0x3f804beeUL, 0xc791a2b5UL, 0x3ec0e993UL,
-    0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 0x3eaa48a2UL, 0x0a268358UL,
-    0x3f55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 0xd7767a58UL, 0x3f431806UL,
-    0x2aea0000UL, 0x3fc9bbe8UL, 0x7723ea61UL, 0x3d3a2369UL, 0x00000000UL,
-    0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 0x00000000UL, 0x3ff00000UL,
-    0x00000000UL, 0xfffffff8UL, 0x4f48b8d3UL, 0x3f33eaf9UL, 0x00000000UL,
-    0x00000000UL, 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL,
-    0xd0258911UL, 0x3f0abaf3UL, 0x23e49fe9UL, 0x3fab5a8cUL, 0x2d53222eUL,
-    0x3ef60d15UL, 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0x3ee1d3b5UL,
-    0xdbf93b8eUL, 0x3f84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL,
-    0x3f743924UL, 0x794a8297UL, 0x3eb7b7b9UL, 0xe015f797UL, 0x3f5d41f5UL,
-    0xe41a4a56UL, 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL,
-    0x3fce49ceUL, 0x8c743719UL, 0xbd1eb860UL, 0x00000000UL, 0x00000000UL,
-    0x1b4863cfUL, 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL,
-    0xfffffff8UL, 0x65965966UL, 0xc0219659UL, 0x00000000UL, 0x00000000UL,
-    0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 0x83cd3723UL,
-    0xc02c8342UL, 0x00000000UL, 0xc0000000UL, 0x55e6c23dUL, 0x403226e3UL,
-    0x55555555UL, 0x40055555UL, 0x34451939UL, 0xc0371c96UL, 0xaaaaaaabUL,
-    0xc00aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 0x40111111UL,
-    0xa738201fUL, 0xc042bbceUL, 0x05b05b06UL, 0xc015b05bUL, 0x452b75e3UL,
-    0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 0xbff00000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0xc7ab4d5aUL, 0xc0085e24UL, 0x00000000UL, 0x00000000UL, 0xe93ea75dUL,
-    0x400b963dUL, 0x00000000UL, 0x00000000UL, 0x94a7f25aUL, 0xc00f37e2UL,
-    0x4b6261cbUL, 0xbff5f984UL, 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL,
-    0x3ffaf5a5UL, 0x7f2ce8e3UL, 0xc013fe8bUL, 0xfe8e54faUL, 0xbffd7334UL,
-    0x670d618dUL, 0x4016a10cUL, 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL,
-    0xc0199c5fUL, 0x697d6eceUL, 0xc003006eUL, 0x83298b82UL, 0x401cfc4dUL,
-    0x19d490d6UL, 0x40058c19UL, 0x2ae42850UL, 0xbfea4300UL, 0x118e20e6UL,
-    0x3c7a6db8UL, 0x00000000UL, 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL,
-    0xbff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL,
-    0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0xbff3972eUL, 0xe93463bdUL,
-    0xbfeeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL,
-    0xa04e8ea3UL, 0xbff4541aUL, 0x386accd3UL, 0xbff1369eUL, 0x222a66ddUL,
-    0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0xbff5178fUL,
-    0xddaa0031UL, 0xbff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL,
-    0x3ff29311UL, 0x2ab7f990UL, 0xbfe561b8UL, 0x209c7df1UL, 0xbc87a8c5UL,
-    0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0xcc03e501UL, 0xbfdff10fUL,
-    0x00000000UL, 0x00000000UL, 0x44a4e845UL, 0x3fddb63bUL, 0x00000000UL,
-    0x00000000UL, 0x3768ad9fUL, 0xbfdb72a4UL, 0x3dd01ccaUL, 0xbfe5fdb9UL,
-    0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 0x3fe977f9UL, 0xd013b3abUL,
-    0xbfd78ca3UL, 0xbf0bf914UL, 0xbfe4f192UL, 0x4d53e730UL, 0x3fd5d060UL,
-    0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 0xbfd4322aUL, 0x5936a835UL,
-    0xbfe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 0xef478605UL, 0x3fe1659eUL,
-    0x190834ecUL, 0xbfe11ab7UL, 0xcdb625eaUL, 0x3c8e564bUL, 0x00000000UL,
-    0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x56f37042UL, 0xbfccfc56UL, 0x00000000UL,
-    0x00000000UL, 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL,
-    0x3d0e7c5dUL, 0xbfc50533UL, 0x9bed9b2eUL, 0xbfdf0ed9UL, 0x5fe7c47cUL,
-    0x3fc1f250UL, 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0xbfbe5c71UL,
-    0x86362c20UL, 0xbfda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL,
-    0x3fd911bdUL, 0xb56658beUL, 0xbfb5e4c7UL, 0x93a2fd76UL, 0xbfd3c092UL,
-    0xda271794UL, 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL,
-    0xbfda8279UL, 0xb68c1467UL, 0xbc708b2fUL, 0x00000000UL, 0x3ff00000UL,
-    0x980c4337UL, 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x9314533eUL, 0xbfbb8ec5UL, 0x00000000UL, 0x00000000UL,
-    0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 0xdcb427fdUL,
-    0xbfb13950UL, 0xd87ab0bbUL, 0xbfd5335eUL, 0xce0ae8a5UL, 0x3fabb382UL,
-    0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0xbfa552f1UL, 0x59f21a6dUL,
-    0xbfd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 0x3fd0576cUL,
-    0x8f2c2950UL, 0xbf9a4898UL, 0xc0b3f22cUL, 0xbfc59462UL, 0x1883a4b8UL,
-    0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 0xbfd36a08UL,
-    0x1dce993dUL, 0x3c6d704dUL, 0x00000000UL, 0x3ff00000UL, 0x2b82ab63UL,
-    0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
-    0x5a279ea3UL, 0xbfaa3407UL, 0x00000000UL, 0x00000000UL, 0x432d65faUL,
-    0x3fa70153UL, 0x00000000UL, 0x00000000UL, 0x891a4602UL, 0xbf9d03efUL,
-    0xd62ca5f8UL, 0xbfca77d9UL, 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL,
-    0x3fd8cf51UL, 0xb58fd909UL, 0xbf8f88e3UL, 0x01771ceaUL, 0xbfc2b154UL,
-    0xf3562f8eUL, 0x3f888f57UL, 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL,
-    0xbf80f44cUL, 0x214368e9UL, 0xbfb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL,
-    0x172dbbf0UL, 0x3fb6cb8eUL, 0xe0553158UL, 0xbfc975f5UL, 0x593fe814UL,
-    0xbc2ef5d3UL, 0x00000000UL, 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL,
-    0xbf953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL,
-    0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0xbf85ad63UL, 0xdc230b9bUL,
-    0xbfb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL,
-    0x77bb08baUL, 0xbf757c85UL, 0xb6247521UL, 0xbfb1381eUL, 0x5922170cUL,
-    0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0xbf64e391UL,
-    0x3e666320UL, 0xbfa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL,
-    0x3fafa8aeUL, 0x8c5b2da2UL, 0xbfb936bbUL, 0x4e88f7a5UL, 0xbc587d05UL,
-    0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x6dc9c883UL, 0x3fe45f30UL,
-    0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x43780000UL, 0x00000000UL,
-    0x43380000UL, 0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL,
-    0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL, 0x3707344aUL,
-    0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL, 0x00000000UL, 0x80000000UL,
-    0x00000000UL, 0x80000000UL, 0x676733afUL, 0x3d32e7b9UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x7ff00000UL, 0x00000000UL, 0x00000000UL, 0xfffc0000UL,
-    0xffffffffUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL,
-    0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL,
-    0x00000000UL, 0x00000000UL, 0x3ca00000UL, 0x00000000UL, 0x00000000UL,
-    0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL, 0x00000000UL,
-    0x40300000UL, 0x00000000UL, 0x3ff00000UL
-};
-
-void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
-
-  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
-  Label L_2TAG_PACKET_4_0_2;
-
-  assert_different_registers(tmp, eax, ecx, edx);
-
-  address static_const_table_tan = (address)_static_const_table_tan;
-
-  subl(rsp, 120);
-  movl(Address(rsp, 56), tmp);
-  lea(tmp, ExternalAddress(static_const_table_tan));
-  movsd(xmm0, Address(rsp, 128));
-  pextrw(eax, xmm0, 3);
-  andl(eax, 32767);
-  subl(eax, 14368);
-  cmpl(eax, 2216);
-  jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
-  movdqu(xmm5, Address(tmp, 5840));
-  movdqu(xmm6, Address(tmp, 5856));
-  unpcklpd(xmm0, xmm0);
-  movdqu(xmm4, Address(tmp, 5712));
-  andpd(xmm4, xmm0);
-  movdqu(xmm1, Address(tmp, 5632));
-  mulpd(xmm1, xmm0);
-  por(xmm5, xmm4);
-  addpd(xmm1, xmm5);
-  movdqu(xmm7, xmm1);
-  unpckhpd(xmm7, xmm7);
-  cvttsd2sil(edx, xmm7);
-  cvttpd2dq(xmm1, xmm1);
-  cvtdq2pd(xmm1, xmm1);
-  mulpd(xmm1, xmm6);
-  movdqu(xmm3, Address(tmp, 5664));
-  movsd(xmm5, Address(tmp, 5728));
-  addl(edx, 469248);
-  movdqu(xmm4, Address(tmp, 5680));
-  mulpd(xmm3, xmm1);
-  andl(edx, 31);
-  mulsd(xmm5, xmm1);
-  movl(ecx, edx);
-  mulpd(xmm4, xmm1);
-  shll(ecx, 1);
-  subpd(xmm0, xmm3);
-  mulpd(xmm1, Address(tmp, 5696));
-  addl(edx, ecx);
-  shll(ecx, 2);
-  addl(edx, ecx);
-  addsd(xmm5, xmm0);
-  movdqu(xmm2, xmm0);
-  subpd(xmm0, xmm4);
-  movsd(xmm6, Address(tmp, 5744));
-  shll(edx, 4);
-  lea(eax, Address(tmp, 0));
-  andpd(xmm5, Address(tmp, 5776));
-  movdqu(xmm3, xmm0);
-  addl(eax, edx);
-  subpd(xmm2, xmm0);
-  unpckhpd(xmm0, xmm0);
-  divsd(xmm6, xmm5);
-  subpd(xmm2, xmm4);
-  movdqu(xmm7, Address(eax, 16));
-  subsd(xmm3, xmm5);
-  mulpd(xmm7, xmm0);
-  subpd(xmm2, xmm1);
-  movdqu(xmm1, Address(eax, 48));
-  mulpd(xmm1, xmm0);
-  movdqu(xmm4, Address(eax, 96));
-  mulpd(xmm4, xmm0);
-  addsd(xmm2, xmm3);
-  movdqu(xmm3, xmm0);
-  mulpd(xmm0, xmm0);
-  addpd(xmm7, Address(eax, 0));
-  addpd(xmm1, Address(eax, 32));
-  mulpd(xmm1, xmm0);
-  addpd(xmm4, Address(eax, 80));
-  addpd(xmm7, xmm1);
-  movdqu(xmm1, Address(eax, 112));
-  mulpd(xmm1, xmm0);
-  mulpd(xmm0, xmm0);
-  addpd(xmm4, xmm1);
-  movdqu(xmm1, Address(eax, 64));
-  mulpd(xmm1, xmm0);
-  addpd(xmm7, xmm1);
-  movdqu(xmm1, xmm3);
-  mulpd(xmm3, xmm0);
-  mulsd(xmm0, xmm0);
-  mulpd(xmm1, Address(eax, 144));
-  mulpd(xmm4, xmm3);
-  movdqu(xmm3, xmm1);
-  addpd(xmm7, xmm4);
-  movdqu(xmm4, xmm1);
-  mulsd(xmm0, xmm7);
-  unpckhpd(xmm7, xmm7);
-  addsd(xmm0, xmm7);
-  unpckhpd(xmm1, xmm1);
-  addsd(xmm3, xmm1);
-  subsd(xmm4, xmm3);
-  addsd(xmm1, xmm4);
-  movdqu(xmm4, xmm2);
-  movsd(xmm7, Address(eax, 144));
-  unpckhpd(xmm2, xmm2);
-  addsd(xmm7, Address(eax, 152));
-  mulsd(xmm7, xmm2);
-  addsd(xmm7, Address(eax, 136));
-  addsd(xmm7, xmm1);
-  addsd(xmm0, xmm7);
-  movsd(xmm7, Address(tmp, 5744));
-  mulsd(xmm4, xmm6);
-  movsd(xmm2, Address(eax, 168));
-  andpd(xmm2, xmm6);
-  mulsd(xmm5, xmm2);
-  mulsd(xmm6, Address(eax, 160));
-  subsd(xmm7, xmm5);
-  subsd(xmm2, Address(eax, 128));
-  subsd(xmm7, xmm4);
-  mulsd(xmm7, xmm6);
-  movdqu(xmm4, xmm3);
-  subsd(xmm3, xmm2);
-  addsd(xmm2, xmm3);
-  subsd(xmm4, xmm2);
-  addsd(xmm0, xmm4);
-  subsd(xmm0, xmm7);
-  addsd(xmm0, xmm3);
-  movsd(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_0_0_2);
-  jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
-  shrl(eax, 4);
-  cmpl(eax, 268434558);
-  jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2);
-  movdqu(xmm3, xmm0);
-  mulsd(xmm3, Address(tmp, 5808));
-
-  bind(L_2TAG_PACKET_3_0_2);
-  movsd(xmm3, Address(tmp, 5792));
-  mulsd(xmm3, xmm0);
-  addsd(xmm3, xmm0);
-  mulsd(xmm3, Address(tmp, 5808));
-  movsd(Address(rsp, 0), xmm3);
-  fld_d(Address(rsp, 0));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_2_0_2);
-  movq(xmm7, Address(tmp, 5712));
-  andpd(xmm7, xmm0);
-  xorpd(xmm7, xmm0);
-  ucomisd(xmm7, Address(tmp, 5760));
-  jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
-  subl(rsp, 32);
-  movsd(Address(rsp, 0), xmm0);
-  lea(eax, Address(rsp, 40));
-  movl(Address(rsp, 8), eax);
-  movl(eax, 2);
-  movl(Address(rsp, 12), eax);
-  call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_tan_cot_huge())));
-  addl(rsp, 32);
-  fld_d(Address(rsp, 8));
-  jmp(L_2TAG_PACKET_1_0_2);
-
-  bind(L_2TAG_PACKET_4_0_2);
-  movq(Address(rsp, 0), xmm0);
-  fld_d(Address(rsp, 0));
-  fsub_d(Address(rsp, 0));
-
-  bind(L_2TAG_PACKET_1_0_2);
-  movl(tmp, Address(rsp, 56));
-}
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
deleted file mode 100644
index 8e5e54f244c..00000000000
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
+++ /dev/null
@@ -1,2854 +0,0 @@
-/*
- * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "asm/macroAssembler.hpp"
-#include "asm/macroAssembler.inline.hpp"
-#include "code/compiledIC.hpp"
-#include "code/debugInfoRec.hpp"
-#include "code/nativeInst.hpp"
-#include "code/vtableStubs.hpp"
-#include "compiler/oopMap.hpp"
-#include "gc/shared/gcLocker.hpp"
-#include "gc/shared/barrierSet.hpp"
-#include "gc/shared/barrierSetAssembler.hpp"
-#include "interpreter/interpreter.hpp"
-#include "logging/log.hpp"
-#include "memory/resourceArea.hpp"
-#include "oops/klass.inline.hpp"
-#include "prims/methodHandles.hpp"
-#include "runtime/jniHandles.hpp"
-#include "runtime/safepointMechanism.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "runtime/signature.hpp"
-#include "runtime/stubRoutines.hpp"
-#include "runtime/timerTrace.hpp"
-#include "runtime/vframeArray.hpp"
-#include "runtime/vm_version.hpp"
-#include "utilities/align.hpp"
-#include "vmreg_x86.inline.hpp"
-#ifdef COMPILER1
-#include "c1/c1_Runtime1.hpp"
-#endif
-#ifdef COMPILER2
-#include "opto/runtime.hpp"
-#endif
-
-#define __ masm->
-
-#ifdef PRODUCT
-#define BLOCK_COMMENT(str) /* nothing */
-#else
-#define BLOCK_COMMENT(str) __ block_comment(str)
-#endif // PRODUCT
-
-const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
-
-class RegisterSaver {
-  // Capture info about frame layout
-#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
-  enum layout {
-                fpu_state_off = 0,
-                fpu_state_end = fpu_state_off+FPUStateSizeInWords,
-                st0_off, st0H_off,
-                st1_off, st1H_off,
-                st2_off, st2H_off,
-                st3_off, st3H_off,
-                st4_off, st4H_off,
-                st5_off, st5H_off,
-                st6_off, st6H_off,
-                st7_off, st7H_off,
-                xmm_off,
-                DEF_XMM_OFFS(0),
-                DEF_XMM_OFFS(1),
-                DEF_XMM_OFFS(2),
-                DEF_XMM_OFFS(3),
-                DEF_XMM_OFFS(4),
-                DEF_XMM_OFFS(5),
-                DEF_XMM_OFFS(6),
-                DEF_XMM_OFFS(7),
-                flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word
-                rdi_off,
-                rsi_off,
-                ignore_off,  // extra copy of rbp,
-                rsp_off,
-                rbx_off,
-                rdx_off,
-                rcx_off,
-                rax_off,
-                // The frame sender code expects that rbp will be in the "natural" place and
-                // will override any oopMap setting for it. We must therefore force the layout
-                // so that it agrees with the frame sender code.
-                rbp_off,
-                return_off,      // slot for return address
-                reg_save_size };
-  enum { FPU_regs_live = flags_off - fpu_state_end };
-
-  public:
-
-  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words,
-                                     int* total_frame_words, bool verify_fpu = true, bool save_vectors = false);
-  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
-
-  static int rax_offset() { return rax_off; }
-  static int rbx_offset() { return rbx_off; }
-
-  // Offsets into the register save area
-  // Used by deoptimization when it is managing result register
-  // values on its own
-
-  static int raxOffset(void) { return rax_off; }
-  static int rdxOffset(void) { return rdx_off; }
-  static int rbxOffset(void) { return rbx_off; }
-  static int xmm0Offset(void) { return xmm0_off; }
-  // This really returns a slot in the fp save area, which one is not important
-  static int fpResultOffset(void) { return st0_off; }
-
-  // During deoptimization only the result register need to be restored
-  // all the other values have already been extracted.
-
-  static void restore_result_registers(MacroAssembler* masm);
-
-};
-
-OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
-                                           int* total_frame_words, bool verify_fpu, bool save_vectors) {
-  int num_xmm_regs = XMMRegister::number_of_registers;
-  int ymm_bytes = num_xmm_regs * 16;
-  int zmm_bytes = num_xmm_regs * 32;
-#ifdef COMPILER2
-  int opmask_state_bytes = KRegister::number_of_registers * 8;
-  if (save_vectors) {
-    assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
-    assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
-    // Save upper half of YMM registers
-    int vect_bytes = ymm_bytes;
-    if (UseAVX > 2) {
-      // Save upper half of ZMM registers as well
-      vect_bytes += zmm_bytes;
-      additional_frame_words += opmask_state_bytes / wordSize;
-    }
-    additional_frame_words += vect_bytes / wordSize;
-  }
-#else
-  assert(!save_vectors, "vectors are generated only by C2");
-#endif
-  int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize;
-  int frame_words = frame_size_in_bytes / wordSize;
-  *total_frame_words = frame_words;
-
-  assert(FPUStateSizeInWords == 27, "update stack layout");
-
-  // save registers, fpu state, and flags
-  // We assume caller has already has return address slot on the stack
-  // We push epb twice in this sequence because we want the real rbp,
-  // to be under the return like a normal enter and we want to use pusha
-  // We push by hand instead of using push.
-  __ enter();
-  __ pusha();
-  __ pushf();
-  __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space
-  __ push_FPU_state();          // Save FPU state & init
-
-  if (verify_fpu) {
-    // Some stubs may have non standard FPU control word settings so
-    // only check and reset the value when it required to be the
-    // standard value.  The safepoint blob in particular can be used
-    // in methods which are using the 24 bit control word for
-    // optimized float math.
-
-#ifdef ASSERT
-    // Make sure the control word has the expected value
-    Label ok;
-    __ cmpw(Address(rsp, 0), StubRoutines::x86::fpu_cntrl_wrd_std());
-    __ jccb(Assembler::equal, ok);
-    __ stop("corrupted control word detected");
-    __ bind(ok);
-#endif
-
-    // Reset the control word to guard against exceptions being unmasked
-    // since fstp_d can cause FPU stack underflow exceptions.  Write it
-    // into the on stack copy and then reload that to make sure that the
-    // current and future values are correct.
-    __ movw(Address(rsp, 0), StubRoutines::x86::fpu_cntrl_wrd_std());
-  }
-
-  __ frstor(Address(rsp, 0));
-  if (!verify_fpu) {
-    // Set the control word so that exceptions are masked for the
-    // following code.
-    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-  }
-
-  int off = st0_off;
-  int delta = st1_off - off;
-
-  // Save the FPU registers in de-opt-able form
-  for (int n = 0; n < FloatRegister::number_of_registers; n++) {
-    __ fstp_d(Address(rsp, off*wordSize));
-    off += delta;
-  }
-
-  off = xmm0_off;
-  delta = xmm1_off - off;
-  if(UseSSE == 1) {
-    // Save the XMM state
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ movflt(Address(rsp, off*wordSize), as_XMMRegister(n));
-      off += delta;
-    }
-  } else if(UseSSE >= 2) {
-    // Save whole 128bit (16 bytes) XMM registers
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
-      off += delta;
-    }
-  }
-
-#ifdef COMPILER2
-  if (save_vectors) {
-    __ subptr(rsp, ymm_bytes);
-    // Save upper half of YMM registers
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
-    }
-    if (UseAVX > 2) {
-      __ subptr(rsp, zmm_bytes);
-      // Save upper half of ZMM registers
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
-      }
-      __ subptr(rsp, opmask_state_bytes);
-      // Save opmask registers
-      for (int n = 0; n < KRegister::number_of_registers; n++) {
-        __ kmov(Address(rsp, n*8), as_KRegister(n));
-      }
-    }
-  }
-#else
-  assert(!save_vectors, "vectors are generated only by C2");
-#endif
-
-  __ vzeroupper();
-
-  // Set an oopmap for the call site.  This oopmap will map all
-  // oop-registers and debug-info registers as callee-saved.  This
-  // will allow deoptimization at this safepoint to find all possible
-  // debug-info recordings, as well as let GC find all oops.
-
-  OopMapSet *oop_maps = new OopMapSet();
-  OopMap* map =  new OopMap( frame_words, 0 );
-
-#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
-#define NEXTREG(x) (x)->as_VMReg()->next()
-
-  map->set_callee_saved(STACK_OFFSET(rax_off), rax->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(rcx_off), rcx->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(rdx_off), rdx->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(rbx_off), rbx->as_VMReg());
-  // rbp, location is known implicitly, no oopMap
-  map->set_callee_saved(STACK_OFFSET(rsi_off), rsi->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(rdi_off), rdi->as_VMReg());
-
-  // %%% This is really a waste but we'll keep things as they were for now for the upper component
-  off = st0_off;
-  delta = st1_off - off;
-  for (int n = 0; n < FloatRegister::number_of_registers; n++) {
-    FloatRegister freg_name = as_FloatRegister(n);
-    map->set_callee_saved(STACK_OFFSET(off), freg_name->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(freg_name));
-    off += delta;
-  }
-  off = xmm0_off;
-  delta = xmm1_off - off;
-  for (int n = 0; n < num_xmm_regs; n++) {
-    XMMRegister xmm_name = as_XMMRegister(n);
-    map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(xmm_name));
-    off += delta;
-  }
-#undef NEXTREG
-#undef STACK_OFFSET
-
-  return map;
-}
-
-void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
-  int opmask_state_bytes = 0;
-  int additional_frame_bytes = 0;
-  int num_xmm_regs = XMMRegister::number_of_registers;
-  int ymm_bytes = num_xmm_regs * 16;
-  int zmm_bytes = num_xmm_regs * 32;
-  // Recover XMM & FPU state
-#ifdef COMPILER2
-  if (restore_vectors) {
-    assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
-    assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
-    // Save upper half of YMM registers
-    additional_frame_bytes = ymm_bytes;
-    if (UseAVX > 2) {
-      // Save upper half of ZMM registers as well
-      additional_frame_bytes += zmm_bytes;
-      opmask_state_bytes = KRegister::number_of_registers * 8;
-      additional_frame_bytes += opmask_state_bytes;
-    }
-  }
-#else
-  assert(!restore_vectors, "vectors are generated only by C2");
-#endif
-
-  int off = xmm0_off;
-  int delta = xmm1_off - off;
-
-  __ vzeroupper();
-
-  if (UseSSE == 1) {
-    // Restore XMM registers
-    assert(additional_frame_bytes == 0, "");
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize));
-      off += delta;
-    }
-  } else if (UseSSE >= 2) {
-    // Restore whole 128bit (16 bytes) XMM registers. Do this before restoring YMM and
-    // ZMM because the movdqu instruction zeros the upper part of the XMM register.
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
-      off += delta;
-    }
-  }
-
-  if (restore_vectors) {
-    off = additional_frame_bytes - ymm_bytes;
-    // Restore upper half of YMM registers.
-    for (int n = 0; n < num_xmm_regs; n++) {
-      __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16+off));
-    }
-    if (UseAVX > 2) {
-      // Restore upper half of ZMM registers.
-      off = opmask_state_bytes;
-      for (int n = 0; n < num_xmm_regs; n++) {
-        __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32+off));
-      }
-      for (int n = 0; n < KRegister::number_of_registers; n++) {
-        __ kmov(as_KRegister(n), Address(rsp, n*8));
-      }
-    }
-    __ addptr(rsp, additional_frame_bytes);
-  }
-
-  __ pop_FPU_state();
-  __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers
-
-  __ popf();
-  __ popa();
-  // Get the rbp, described implicitly by the frame sender code (no oopMap)
-  __ pop(rbp);
-}
-
-void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
-
-  // Just restore result register. Only used by deoptimization. By
-  // now any callee save register that needs to be restore to a c2
-  // caller of the deoptee has been extracted into the vframeArray
-  // and will be stuffed into the c2i adapter we create for later
-  // restoration so only result registers need to be restored here.
-  //
-
-  __ frstor(Address(rsp, 0));      // Restore fpu state
-
-  // Recover XMM & FPU state
-  if( UseSSE == 1 ) {
-    __ movflt(xmm0, Address(rsp, xmm0_off*wordSize));
-  } else if( UseSSE >= 2 ) {
-    __ movdbl(xmm0, Address(rsp, xmm0_off*wordSize));
-  }
-  __ movptr(rax, Address(rsp, rax_off*wordSize));
-  __ movptr(rdx, Address(rsp, rdx_off*wordSize));
-  // Pop all of the register save are off the stack except the return address
-  __ addptr(rsp, return_off * wordSize);
-}
-
-// Is vector's size (in bytes) bigger than a size saved by default?
-// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions.
-// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated.
-bool SharedRuntime::is_wide_vector(int size) {
-  return size > 16;
-}
-
-// The java_calling_convention describes stack locations as ideal slots on
-// a frame with no abi restrictions. Since we must observe abi restrictions
-// (like the placement of the register window) the slots must be biased by
-// the following value.
-static int reg2offset_in(VMReg r) {
-  // Account for saved rbp, and return address
-  // This should really be in_preserve_stack_slots
-  return (r->reg2stack() + 2) * VMRegImpl::stack_slot_size;
-}
-
-static int reg2offset_out(VMReg r) {
-  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
-}
-
-// ---------------------------------------------------------------------------
-// Read the array of BasicTypes from a signature, and compute where the
-// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
-// quantities.  Values less than SharedInfo::stack0 are registers, those above
-// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
-// as framesizes are fixed.
-// VMRegImpl::stack0 refers to the first slot 0(sp).
-// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.
-// Register up to Register::number_of_registers are the 32-bit
-// integer registers.
-
-// Pass first two oop/int args in registers ECX and EDX.
-// Pass first two float/double args in registers XMM0 and XMM1.
-// Doubles have precedence, so if you pass a mix of floats and doubles
-// the doubles will grab the registers before the floats will.
-
-// Note: the INPUTS in sig_bt are in units of Java argument words, which are
-// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
-// units regardless of build. Of course for i486 there is no 64 bit build
-
-
-// ---------------------------------------------------------------------------
-// The compiled Java calling convention.
-// Pass first two oop/int args in registers ECX and EDX.
-// Pass first two float/double args in registers XMM0 and XMM1.
-// Doubles have precedence, so if you pass a mix of floats and doubles
-// the doubles will grab the registers before the floats will.
-int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
-                                           VMRegPair *regs,
-                                           int total_args_passed) {
-  uint    stack = 0;          // Starting stack position for args on stack
-
-
-  // Pass first two oop/int args in registers ECX and EDX.
-  uint reg_arg0 = 9999;
-  uint reg_arg1 = 9999;
-
-  // Pass first two float/double args in registers XMM0 and XMM1.
-  // Doubles have precedence, so if you pass a mix of floats and doubles
-  // the doubles will grab the registers before the floats will.
-  // CNC - TURNED OFF FOR non-SSE.
-  //       On Intel we have to round all doubles (and most floats) at
-  //       call sites by storing to the stack in any case.
-  // UseSSE=0 ==> Don't Use ==> 9999+0
-  // UseSSE=1 ==> Floats only ==> 9999+1
-  // UseSSE>=2 ==> Floats or doubles ==> 9999+2
-  enum { fltarg_dontuse = 9999+0, fltarg_float_only = 9999+1, fltarg_flt_dbl = 9999+2 };
-  uint fargs = (UseSSE>=2) ? 2 : UseSSE;
-  uint freg_arg0 = 9999+fargs;
-  uint freg_arg1 = 9999+fargs;
-
-  // Pass doubles & longs aligned on the stack.  First count stack slots for doubles
-  int i;
-  for( i = 0; i < total_args_passed; i++) {
-    if( sig_bt[i] == T_DOUBLE ) {
-      // first 2 doubles go in registers
-      if( freg_arg0 == fltarg_flt_dbl ) freg_arg0 = i;
-      else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
-      else // Else double is passed low on the stack to be aligned.
-        stack += 2;
-    } else if( sig_bt[i] == T_LONG ) {
-      stack += 2;
-    }
-  }
-  int dstack = 0;             // Separate counter for placing doubles
-
-  // Now pick where all else goes.
-  for( i = 0; i < total_args_passed; i++) {
-    // From the type and the argument number (count) compute the location
-    switch( sig_bt[i] ) {
-    case T_SHORT:
-    case T_CHAR:
-    case T_BYTE:
-    case T_BOOLEAN:
-    case T_INT:
-    case T_ARRAY:
-    case T_OBJECT:
-    case T_ADDRESS:
-      if( reg_arg0 == 9999 )  {
-        reg_arg0 = i;
-        regs[i].set1(rcx->as_VMReg());
-      } else if( reg_arg1 == 9999 )  {
-        reg_arg1 = i;
-        regs[i].set1(rdx->as_VMReg());
-      } else {
-        regs[i].set1(VMRegImpl::stack2reg(stack++));
-      }
-      break;
-    case T_FLOAT:
-      if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
-        freg_arg0 = i;
-        regs[i].set1(xmm0->as_VMReg());
-      } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
-        freg_arg1 = i;
-        regs[i].set1(xmm1->as_VMReg());
-      } else {
-        regs[i].set1(VMRegImpl::stack2reg(stack++));
-      }
-      break;
-    case T_LONG:
-      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
-      regs[i].set2(VMRegImpl::stack2reg(dstack));
-      dstack += 2;
-      break;
-    case T_DOUBLE:
-      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
-      if( freg_arg0 == (uint)i ) {
-        regs[i].set2(xmm0->as_VMReg());
-      } else if( freg_arg1 == (uint)i ) {
-        regs[i].set2(xmm1->as_VMReg());
-      } else {
-        regs[i].set2(VMRegImpl::stack2reg(dstack));
-        dstack += 2;
-      }
-      break;
-    case T_VOID: regs[i].set_bad(); break;
-      break;
-    default:
-      ShouldNotReachHere();
-      break;
-    }
-  }
-
-  return stack;
-}
-
-// Patch the callers callsite with entry to compiled code if it exists.
-static void patch_callers_callsite(MacroAssembler *masm) {
-  Label L;
-  __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
-  __ jcc(Assembler::equal, L);
-  // Schedule the branch target address early.
-  // Call into the VM to patch the caller, then jump to compiled callee
-  // rax, isn't live so capture return address while we easily can
-  __ movptr(rax, Address(rsp, 0));
-  __ pusha();
-  __ pushf();
-
-  if (UseSSE == 1) {
-    __ subptr(rsp, 2*wordSize);
-    __ movflt(Address(rsp, 0), xmm0);
-    __ movflt(Address(rsp, wordSize), xmm1);
-  }
-  if (UseSSE >= 2) {
-    __ subptr(rsp, 4*wordSize);
-    __ movdbl(Address(rsp, 0), xmm0);
-    __ movdbl(Address(rsp, 2*wordSize), xmm1);
-  }
-#ifdef COMPILER2
-  // C2 may leave the stack dirty if not in SSE2+ mode
-  if (UseSSE >= 2) {
-    __ verify_FPU(0, "c2i transition should have clean FPU stack");
-  } else {
-    __ empty_FPU_stack();
-  }
-#endif /* COMPILER2 */
-
-  // VM needs caller's callsite
-  __ push(rax);
-  // VM needs target method
-  __ push(rbx);
-  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
-  __ addptr(rsp, 2*wordSize);
-
-  if (UseSSE == 1) {
-    __ movflt(xmm0, Address(rsp, 0));
-    __ movflt(xmm1, Address(rsp, wordSize));
-    __ addptr(rsp, 2*wordSize);
-  }
-  if (UseSSE >= 2) {
-    __ movdbl(xmm0, Address(rsp, 0));
-    __ movdbl(xmm1, Address(rsp, 2*wordSize));
-    __ addptr(rsp, 4*wordSize);
-  }
-
-  __ popf();
-  __ popa();
-  __ bind(L);
-}
-
-
-static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
-  int next_off = st_off - Interpreter::stackElementSize;
-  __ movdbl(Address(rsp, next_off), r);
-}
-
-static void gen_c2i_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs,
-                            Label& skip_fixup) {
-  // Before we get into the guts of the C2I adapter, see if we should be here
-  // at all.  We've come from compiled code and are attempting to jump to the
-  // interpreter, which means the caller made a static call to get here
-  // (vcalls always get a compiled target if there is one).  Check for a
-  // compiled target.  If there is one, we need to patch the caller's call.
-  patch_callers_callsite(masm);
-
-  __ bind(skip_fixup);
-
-#ifdef COMPILER2
-  // C2 may leave the stack dirty if not in SSE2+ mode
-  if (UseSSE >= 2) {
-    __ verify_FPU(0, "c2i transition should have clean FPU stack");
-  } else {
-    __ empty_FPU_stack();
-  }
-#endif /* COMPILER2 */
-
-  // Since all args are passed on the stack, total_args_passed * interpreter_
-  // stack_element_size  is the
-  // space we need.
-  int extraspace = total_args_passed * Interpreter::stackElementSize;
-
-  // Get return address
-  __ pop(rax);
-
-  // set senderSP value
-  __ movptr(rsi, rsp);
-
-  __ subptr(rsp, extraspace);
-
-  // Now write the args into the outgoing interpreter space
-  for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-      continue;
-    }
-
-    // st_off points to lowest address on stack.
-    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
-    int next_off = st_off - Interpreter::stackElementSize;
-
-    // Say 4 args:
-    // i   st_off
-    // 0   12 T_LONG
-    // 1    8 T_VOID
-    // 2    4 T_OBJECT
-    // 3    0 T_BOOL
-    VMReg r_1 = regs[i].first();
-    VMReg r_2 = regs[i].second();
-    if (!r_1->is_valid()) {
-      assert(!r_2->is_valid(), "");
-      continue;
-    }
-
-    if (r_1->is_stack()) {
-      // memory to memory use fpu stack top
-      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
-
-      if (!r_2->is_valid()) {
-        __ movl(rdi, Address(rsp, ld_off));
-        __ movptr(Address(rsp, st_off), rdi);
-      } else {
-
-        // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
-        // st_off == MSW, st_off-wordSize == LSW
-
-        __ movptr(rdi, Address(rsp, ld_off));
-        __ movptr(Address(rsp, next_off), rdi);
-        __ movptr(rdi, Address(rsp, ld_off + wordSize));
-        __ movptr(Address(rsp, st_off), rdi);
-      }
-    } else if (r_1->is_Register()) {
-      Register r = r_1->as_Register();
-      if (!r_2->is_valid()) {
-        __ movl(Address(rsp, st_off), r);
-      } else {
-        // long/double in gpr
-        ShouldNotReachHere();
-      }
-    } else {
-      assert(r_1->is_XMMRegister(), "");
-      if (!r_2->is_valid()) {
-        __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
-      } else {
-        assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
-        move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
-      }
-    }
-  }
-
-  // Schedule the branch target address early.
-  __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
-  // And repush original return address
-  __ push(rax);
-  __ jmp(rcx);
-}
-
-
-static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
-  int next_val_off = ld_off - Interpreter::stackElementSize;
-  __ movdbl(r, Address(saved_sp, next_val_off));
-}
-
-static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
-                        address code_start, address code_end,
-                        Label& L_ok) {
-  Label L_fail;
-  __ lea(temp_reg, AddressLiteral(code_start, relocInfo::none));
-  __ cmpptr(pc_reg, temp_reg);
-  __ jcc(Assembler::belowEqual, L_fail);
-  __ lea(temp_reg, AddressLiteral(code_end, relocInfo::none));
-  __ cmpptr(pc_reg, temp_reg);
-  __ jcc(Assembler::below, L_ok);
-  __ bind(L_fail);
-}
-
-void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-                                    int total_args_passed,
-                                    int comp_args_on_stack,
-                                    const BasicType *sig_bt,
-                                    const VMRegPair *regs) {
-  // Note: rsi contains the senderSP on entry. We must preserve it since
-  // we may do a i2c -> c2i transition if we lose a race where compiled
-  // code goes non-entrant while we get args ready.
-
-  // Adapters can be frameless because they do not require the caller
-  // to perform additional cleanup work, such as correcting the stack pointer.
-  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
-  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
-  // even if a callee has modified the stack pointer.
-  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
-  // routinely repairs its caller's stack pointer (from sender_sp, which is set
-  // up via the senderSP register).
-  // In other words, if *either* the caller or callee is interpreted, we can
-  // get the stack pointer repaired after a call.
-  // This is why c2i and i2c adapters cannot be indefinitely composed.
-  // In particular, if a c2i adapter were to somehow call an i2c adapter,
-  // both caller and callee would be compiled methods, and neither would
-  // clean up the stack pointer changes performed by the two adapters.
-  // If this happens, control eventually transfers back to the compiled
-  // caller, but with an uncorrected stack, causing delayed havoc.
-
-  // Pick up the return address
-  __ movptr(rax, Address(rsp, 0));
-
-  if (VerifyAdapterCalls &&
-      (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) {
-    // So, let's test for cascading c2i/i2c adapters right now.
-    //  assert(Interpreter::contains($return_addr) ||
-    //         StubRoutines::contains($return_addr),
-    //         "i2c adapter must return to an interpreter frame");
-    __ block_comment("verify_i2c { ");
-    Label L_ok;
-    if (Interpreter::code() != nullptr) {
-      range_check(masm, rax, rdi,
-                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
-                  L_ok);
-    }
-    if (StubRoutines::initial_stubs_code() != nullptr) {
-      range_check(masm, rax, rdi,
-                  StubRoutines::initial_stubs_code()->code_begin(),
-                  StubRoutines::initial_stubs_code()->code_end(),
-                  L_ok);
-    }
-    if (StubRoutines::final_stubs_code() != nullptr) {
-      range_check(masm, rax, rdi,
-                  StubRoutines::final_stubs_code()->code_begin(),
-                  StubRoutines::final_stubs_code()->code_end(),
-                  L_ok);
-    }
-    const char* msg = "i2c adapter must return to an interpreter frame";
-    __ block_comment(msg);
-    __ stop(msg);
-    __ bind(L_ok);
-    __ block_comment("} verify_i2ce ");
-  }
-
-  // Must preserve original SP for loading incoming arguments because
-  // we need to align the outgoing SP for compiled code.
-  __ movptr(rdi, rsp);
-
-  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
-  // in registers, we will occasionally have no stack args.
-  int comp_words_on_stack = 0;
-  if (comp_args_on_stack) {
-    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
-    // registers are below.  By subtracting stack0, we either get a negative
-    // number (all values in registers) or the maximum stack slot accessed.
-    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
-    // Convert 4-byte stack slots to words.
-    comp_words_on_stack = align_up(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
-    // Round up to miminum stack alignment, in wordSize
-    comp_words_on_stack = align_up(comp_words_on_stack, 2);
-    __ subptr(rsp, comp_words_on_stack * wordSize);
-  }
-
-  // Align the outgoing SP
-  __ andptr(rsp, -(StackAlignmentInBytes));
-
-  // push the return address on the stack (note that pushing, rather
-  // than storing it, yields the correct frame alignment for the callee)
-  __ push(rax);
-
-  // Put saved SP in another register
-  const Register saved_sp = rax;
-  __ movptr(saved_sp, rdi);
-
-
-  // Will jump to the compiled code just as if compiled code was doing it.
-  // Pre-load the register-jump target early, to schedule it better.
-  __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
-
-  // Now generate the shuffle code.  Pick up all register args and move the
-  // rest through the floating point stack top.
-  for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      // Longs and doubles are passed in native word order, but misaligned
-      // in the 32-bit build.
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-      continue;
-    }
-
-    // Pick up 0, 1 or 2 words from SP+offset.
-
-    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
-            "scrambled load targets?");
-    // Load in argument order going down.
-    int ld_off = (total_args_passed - i) * Interpreter::stackElementSize;
-    // Point to interpreter value (vs. tag)
-    int next_off = ld_off - Interpreter::stackElementSize;
-    //
-    //
-    //
-    VMReg r_1 = regs[i].first();
-    VMReg r_2 = regs[i].second();
-    if (!r_1->is_valid()) {
-      assert(!r_2->is_valid(), "");
-      continue;
-    }
-    if (r_1->is_stack()) {
-      // Convert stack slot to an SP offset (+ wordSize to account for return address )
-      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
-
-      // We can use rsi as a temp here because compiled code doesn't need rsi as an input
-      // and if we end up going thru a c2i because of a miss a reasonable value of rsi
-      // we be generated.
-      if (!r_2->is_valid()) {
-        // __ fld_s(Address(saved_sp, ld_off));
-        // __ fstp_s(Address(rsp, st_off));
-        __ movl(rsi, Address(saved_sp, ld_off));
-        __ movptr(Address(rsp, st_off), rsi);
-      } else {
-        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
-        // are accessed as negative so LSW is at LOW address
-
-        // ld_off is MSW so get LSW
-        // st_off is LSW (i.e. reg.first())
-        // __ fld_d(Address(saved_sp, next_off));
-        // __ fstp_d(Address(rsp, st_off));
-        //
-        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
-        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
-        // So we must adjust where to pick up the data to match the interpreter.
-        //
-        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
-        // are accessed as negative so LSW is at LOW address
-
-        // ld_off is MSW so get LSW
-        __ movptr(rsi, Address(saved_sp, next_off));
-        __ movptr(Address(rsp, st_off), rsi);
-        __ movptr(rsi, Address(saved_sp, ld_off));
-        __ movptr(Address(rsp, st_off + wordSize), rsi);
-      }
-    } else if (r_1->is_Register()) {  // Register argument
-      Register r = r_1->as_Register();
-      assert(r != rax, "must be different");
-      if (r_2->is_valid()) {
-        //
-        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
-        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
-        // So we must adjust where to pick up the data to match the interpreter.
-
-        // this can be a misaligned move
-        __ movptr(r, Address(saved_sp, next_off));
-        assert(r_2->as_Register() != rax, "need another temporary register");
-        // Remember r_1 is low address (and LSB on x86)
-        // So r_2 gets loaded from high address regardless of the platform
-        __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
-      } else {
-        __ movl(r, Address(saved_sp, ld_off));
-      }
-    } else {
-      assert(r_1->is_XMMRegister(), "");
-      if (!r_2->is_valid()) {
-        __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
-      } else {
-        move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
-      }
-    }
-  }
-
-  // 6243940 We might end up in handle_wrong_method if
-  // the callee is deoptimized as we race thru here. If that
-  // happens we don't want to take a safepoint because the
-  // caller frame will look interpreted and arguments are now
-  // "compiled" so it is much better to make this transition
-  // invisible to the stack walking code. Unfortunately if
-  // we try and find the callee by normal means a safepoint
-  // is possible. So we stash the desired callee in the thread
-  // and the vm will find there should this case occur.
-
-  __ get_thread(rax);
-  __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
-
-  // move Method* to rax, in case we end up in an c2i adapter.
-  // the c2i adapters expect Method* in rax, (c2) because c2's
-  // resolve stubs return the result (the method) in rax,.
-  // I'd love to fix this.
-  __ mov(rax, rbx);
-
-  __ jmp(rdi);
-}
-
-// ---------------------------------------------------------------
-AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-                                                            int total_args_passed,
-                                                            int comp_args_on_stack,
-                                                            const BasicType *sig_bt,
-                                                            const VMRegPair *regs,
-                                                            AdapterFingerPrint* fingerprint) {
-  address i2c_entry = __ pc();
-
-  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
-
-  // -------------------------------------------------------------------------
-  // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
-  // to the interpreter.  The args start out packed in the compiled layout.  They
-  // need to be unpacked into the interpreter layout.  This will almost always
-  // require some stack space.  We grow the current (compiled) stack, then repack
-  // the args.  We  finally end in a jump to the generic interpreter entry point.
-  // On exit from the interpreter, the interpreter will restore our SP (lest the
-  // compiled code, which relies solely on SP and not EBP, get sick).
-
-  address c2i_unverified_entry = __ pc();
-  Label skip_fixup;
-
-  Register data = rax;
-  Register receiver = rcx;
-  Register temp = rbx;
-
-  {
-    __ ic_check(1 /* end_alignment */);
-    __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset()));
-    // Method might have been compiled since the call site was patched to
-    // interpreted if that is the case treat it as a miss so we can get
-    // the call site corrected.
-    __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD);
-    __ jcc(Assembler::equal, skip_fixup);
-  }
-
-  address c2i_entry = __ pc();
-
-  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-  bs->c2i_entry_barrier(masm);
-
-  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
-
-  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
-}
-
-int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
-                                         VMRegPair *regs,
-                                         int total_args_passed) {
-
-// We return the amount of VMRegImpl stack slots we need to reserve for all
-// the arguments NOT counting out_preserve_stack_slots.
-
-  uint    stack = 0;        // All arguments on stack
-
-  for( int i = 0; i < total_args_passed; i++) {
-    // From the type and the argument number (count) compute the location
-    switch( sig_bt[i] ) {
-    case T_BOOLEAN:
-    case T_CHAR:
-    case T_FLOAT:
-    case T_BYTE:
-    case T_SHORT:
-    case T_INT:
-    case T_OBJECT:
-    case T_ARRAY:
-    case T_ADDRESS:
-    case T_METADATA:
-      regs[i].set1(VMRegImpl::stack2reg(stack++));
-      break;
-    case T_LONG:
-    case T_DOUBLE: // The stack numbering is reversed from Java
-      // Since C arguments do not get reversed, the ordering for
-      // doubles on the stack must be opposite the Java convention
-      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
-      regs[i].set2(VMRegImpl::stack2reg(stack));
-      stack += 2;
-      break;
-    case T_VOID: regs[i].set_bad(); break;
-    default:
-      ShouldNotReachHere();
-      break;
-    }
-  }
-  return stack;
-}
-
-int SharedRuntime::vector_calling_convention(VMRegPair *regs,
-                                             uint num_bits,
-                                             uint total_args_passed) {
-  Unimplemented();
-  return 0;
-}
-
-// A simple move of integer like type
-static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-  if (src.first()->is_stack()) {
-    if (dst.first()->is_stack()) {
-      // stack to stack
-      // __ ld(FP, reg2offset(src.first()), L5);
-      // __ st(L5, SP, reg2offset(dst.first()));
-      __ movl2ptr(rax, Address(rbp, reg2offset_in(src.first())));
-      __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
-    } else {
-      // stack to reg
-      __ movl2ptr(dst.first()->as_Register(),  Address(rbp, reg2offset_in(src.first())));
-    }
-  } else if (dst.first()->is_stack()) {
-    // reg to stack
-    // no need to sign extend on 64bit
-    __ movptr(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
-  } else {
-    if (dst.first() != src.first()) {
-      __ mov(dst.first()->as_Register(), src.first()->as_Register());
-    }
-  }
-}
-
-// An oop arg. Must pass a handle not the oop itself
-static void object_move(MacroAssembler* masm,
-                        OopMap* map,
-                        int oop_handle_offset,
-                        int framesize_in_slots,
-                        VMRegPair src,
-                        VMRegPair dst,
-                        bool is_receiver,
-                        int* receiver_offset) {
-
-  // Because of the calling conventions we know that src can be a
-  // register or a stack location. dst can only be a stack location.
-
-  assert(dst.first()->is_stack(), "must be stack");
-  // must pass a handle. First figure out the location we use as a handle
-
-  if (src.first()->is_stack()) {
-    // Oop is already on the stack as an argument
-    Register rHandle = rax;
-    Label nil;
-    __ xorptr(rHandle, rHandle);
-    __ cmpptr(Address(rbp, reg2offset_in(src.first())), NULL_WORD);
-    __ jcc(Assembler::equal, nil);
-    __ lea(rHandle, Address(rbp, reg2offset_in(src.first())));
-    __ bind(nil);
-    __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
-
-    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
-    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
-    if (is_receiver) {
-      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
-    }
-  } else {
-    // Oop is in a register we must store it to the space we reserve
-    // on the stack for oop_handles
-    const Register rOop = src.first()->as_Register();
-    const Register rHandle = rax;
-    int oop_slot = (rOop == rcx ? 0 : 1) * VMRegImpl::slots_per_word + oop_handle_offset;
-    int offset = oop_slot*VMRegImpl::stack_slot_size;
-    Label skip;
-    __ movptr(Address(rsp, offset), rOop);
-    map->set_oop(VMRegImpl::stack2reg(oop_slot));
-    __ xorptr(rHandle, rHandle);
-    __ cmpptr(rOop, NULL_WORD);
-    __ jcc(Assembler::equal, skip);
-    __ lea(rHandle, Address(rsp, offset));
-    __ bind(skip);
-    // Store the handle parameter
-    __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
-    if (is_receiver) {
-      *receiver_offset = offset;
-    }
-  }
-}
-
-// A float arg may have to do float reg int reg conversion
-static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
-
-  // Because of the calling convention we know that src is either a stack location
-  // or an xmm register. dst can only be a stack location.
-
-  assert(dst.first()->is_stack() && ( src.first()->is_stack() || src.first()->is_XMMRegister()), "bad parameters");
-
-  if (src.first()->is_stack()) {
-    __ movl(rax, Address(rbp, reg2offset_in(src.first())));
-    __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
-  } else {
-    // reg to stack
-    __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
-  }
-}
-
-// A long move
-static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-
-  // The only legal possibility for a long_move VMRegPair is:
-  // 1: two stack slots (possibly unaligned)
-  // as neither the java  or C calling convention will use registers
-  // for longs.
-
-  if (src.first()->is_stack() && dst.first()->is_stack()) {
-    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
-    __ movptr(rax, Address(rbp, reg2offset_in(src.first())));
-    __ movptr(rbx, Address(rbp, reg2offset_in(src.second())));
-    __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
-    __ movptr(Address(rsp, reg2offset_out(dst.second())), rbx);
-  } else {
-    ShouldNotReachHere();
-  }
-}
-
-// A double move
-static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
-
-  // The only legal possibilities for a double_move VMRegPair are:
-  // The painful thing here is that like long_move a VMRegPair might be
-
-  // Because of the calling convention we know that src is either
-  //   1: a single physical register (xmm registers only)
-  //   2: two stack slots (possibly unaligned)
-  // dst can only be a pair of stack slots.
-
-  assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || src.first()->is_stack()), "bad args");
-
-  if (src.first()->is_stack()) {
-    // source is all stack
-    __ movptr(rax, Address(rbp, reg2offset_in(src.first())));
-    __ movptr(rbx, Address(rbp, reg2offset_in(src.second())));
-    __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
-    __ movptr(Address(rsp, reg2offset_out(dst.second())), rbx);
-  } else {
-    // reg to stack
-    // No worries about stack alignment
-    __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
-  }
-}
-
-
-void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-  // We always ignore the frame_slots arg and just use the space just below frame pointer
-  // which by this time is free to use
-  switch (ret_type) {
-  case T_FLOAT:
-    __ fstp_s(Address(rbp, -wordSize));
-    break;
-  case T_DOUBLE:
-    __ fstp_d(Address(rbp, -2*wordSize));
-    break;
-  case T_VOID:  break;
-  case T_LONG:
-    __ movptr(Address(rbp, -wordSize), rax);
-    __ movptr(Address(rbp, -2*wordSize), rdx);
-    break;
-  default: {
-    __ movptr(Address(rbp, -wordSize), rax);
-    }
-  }
-}
-
-void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
-  // We always ignore the frame_slots arg and just use the space just below frame pointer
-  // which by this time is free to use
-  switch (ret_type) {
-  case T_FLOAT:
-    __ fld_s(Address(rbp, -wordSize));
-    break;
-  case T_DOUBLE:
-    __ fld_d(Address(rbp, -2*wordSize));
-    break;
-  case T_LONG:
-    __ movptr(rax, Address(rbp, -wordSize));
-    __ movptr(rdx, Address(rbp, -2*wordSize));
-    break;
-  case T_VOID:  break;
-  default: {
-    __ movptr(rax, Address(rbp, -wordSize));
-    }
-  }
-}
-
-static void verify_oop_args(MacroAssembler* masm,
-                            const methodHandle& method,
-                            const BasicType* sig_bt,
-                            const VMRegPair* regs) {
-  Register temp_reg = rbx;  // not part of any compiled calling seq
-  if (VerifyOops) {
-    for (int i = 0; i < method->size_of_parameters(); i++) {
-      if (is_reference_type(sig_bt[i])) {
-        VMReg r = regs[i].first();
-        assert(r->is_valid(), "bad oop arg");
-        if (r->is_stack()) {
-          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
-          __ verify_oop(temp_reg);
-        } else {
-          __ verify_oop(r->as_Register());
-        }
-      }
-    }
-  }
-}
-
-static void gen_special_dispatch(MacroAssembler* masm,
-                                 const methodHandle& method,
-                                 const BasicType* sig_bt,
-                                 const VMRegPair* regs) {
-  verify_oop_args(masm, method, sig_bt, regs);
-  vmIntrinsics::ID iid = method->intrinsic_id();
-
-  // Now write the args into the outgoing interpreter space
-  bool     has_receiver   = false;
-  Register receiver_reg   = noreg;
-  int      member_arg_pos = -1;
-  Register member_reg     = noreg;
-  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
-  if (ref_kind != 0) {
-    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
-    member_reg = rbx;  // known to be free at this point
-    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
-  } else if (iid == vmIntrinsics::_invokeBasic) {
-    has_receiver = true;
-  } else {
-    fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
-  }
-
-  if (member_reg != noreg) {
-    // Load the member_arg into register, if necessary.
-    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
-    VMReg r = regs[member_arg_pos].first();
-    if (r->is_stack()) {
-      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
-    } else {
-      // no data motion is needed
-      member_reg = r->as_Register();
-    }
-  }
-
-  if (has_receiver) {
-    // Make sure the receiver is loaded into a register.
-    assert(method->size_of_parameters() > 0, "oob");
-    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
-    VMReg r = regs[0].first();
-    assert(r->is_valid(), "bad receiver arg");
-    if (r->is_stack()) {
-      // Porting note:  This assumes that compiled calling conventions always
-      // pass the receiver oop in a register.  If this is not true on some
-      // platform, pick a temp and load the receiver from stack.
-      fatal("receiver always in a register");
-      receiver_reg = rcx;  // known to be free at this point
-      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
-    } else {
-      // no data motion is needed
-      receiver_reg = r->as_Register();
-    }
-  }
-
-  // Figure out which address we are really jumping to:
-  MethodHandles::generate_method_handle_dispatch(masm, iid,
-                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
-}
-
-// ---------------------------------------------------------------------------
-// Generate a native wrapper for a given method.  The method takes arguments
-// in the Java compiled code convention, marshals them to the native
-// convention (handlizes oops, etc), transitions to native, makes the call,
-// returns to java state (possibly blocking), unhandlizes any result and
-// returns.
-//
-// Critical native functions are a shorthand for the use of
-// GetPrimtiveArrayCritical and disallow the use of any other JNI
-// functions.  The wrapper is expected to unpack the arguments before
-// passing them to the callee. Critical native functions leave the state _in_Java,
-// since they cannot stop for GC.
-// Some other parts of JNI setup are skipped like the tear down of the JNI handle
-// block and the check for pending exceptions it's impossible for them
-// to be thrown.
-//
-//
-nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
-                                                const methodHandle& method,
-                                                int compile_id,
-                                                BasicType* in_sig_bt,
-                                                VMRegPair* in_regs,
-                                                BasicType ret_type) {
-  if (method->is_method_handle_intrinsic()) {
-    vmIntrinsics::ID iid = method->intrinsic_id();
-    intptr_t start = (intptr_t)__ pc();
-    int vep_offset = ((intptr_t)__ pc()) - start;
-    gen_special_dispatch(masm,
-                         method,
-                         in_sig_bt,
-                         in_regs);
-    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
-    __ flush();
-    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
-    return nmethod::new_native_nmethod(method,
-                                       compile_id,
-                                       masm->code(),
-                                       vep_offset,
-                                       frame_complete,
-                                       stack_slots / VMRegImpl::slots_per_word,
-                                       in_ByteSize(-1),
-                                       in_ByteSize(-1),
-                                       (OopMapSet*)nullptr);
-  }
-  address native_func = method->native_function();
-  assert(native_func != nullptr, "must have function");
-
-  // An OopMap for lock (and class if static)
-  OopMapSet *oop_maps = new OopMapSet();
-
-  // We have received a description of where all the java arg are located
-  // on entry to the wrapper. We need to convert these args to where
-  // the jni function will expect them. To figure out where they go
-  // we convert the java signature to a C signature by inserting
-  // the hidden arguments as arg[0] and possibly arg[1] (static method)
-
-  const int total_in_args = method->size_of_parameters();
-  int  total_c_args       = total_in_args + (method->is_static() ? 2 : 1);
-
-  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
-  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
-
-  int argc = 0;
-  out_sig_bt[argc++] = T_ADDRESS;
-  if (method->is_static()) {
-    out_sig_bt[argc++] = T_OBJECT;
-  }
-
-  for (int i = 0; i < total_in_args ; i++ ) {
-    out_sig_bt[argc++] = in_sig_bt[i];
-  }
-
-  // Now figure out where the args must be stored and how much stack space
-  // they require.
-  int out_arg_slots;
-  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
-
-  // Compute framesize for the wrapper.  We need to handlize all oops in
-  // registers a max of 2 on x86.
-
-  // Calculate the total number of stack slots we will need.
-
-  // First count the abi requirement plus all of the outgoing args
-  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
-
-  // Now the space for the inbound oop handle area
-  int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers
-
-  int oop_handle_offset = stack_slots;
-  stack_slots += total_save_slots;
-
-  // Now any space we need for handlizing a klass if static method
-
-  int klass_slot_offset = 0;
-  int klass_offset = -1;
-  int lock_slot_offset = 0;
-  bool is_static = false;
-
-  if (method->is_static()) {
-    klass_slot_offset = stack_slots;
-    stack_slots += VMRegImpl::slots_per_word;
-    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
-    is_static = true;
-  }
-
-  // Plus a lock if needed
-
-  if (method->is_synchronized()) {
-    lock_slot_offset = stack_slots;
-    stack_slots += VMRegImpl::slots_per_word;
-  }
-
-  // Now a place (+2) to save return values or temp during shuffling
-  // + 2 for return address (which we own) and saved rbp,
-  stack_slots += 4;
-
-  // Ok The space we have allocated will look like:
-  //
-  //
-  // FP-> |                     |
-  //      |---------------------|
-  //      | 2 slots for moves   |
-  //      |---------------------|
-  //      | lock box (if sync)  |
-  //      |---------------------| <- lock_slot_offset  (-lock_slot_rbp_offset)
-  //      | klass (if static)   |
-  //      |---------------------| <- klass_slot_offset
-  //      | oopHandle area      |
-  //      |---------------------| <- oop_handle_offset (a max of 2 registers)
-  //      | outbound memory     |
-  //      | based arguments     |
-  //      |                     |
-  //      |---------------------|
-  //      |                     |
-  // SP-> | out_preserved_slots |
-  //
-  //
-  // ****************************************************************************
-  // WARNING - on Windows Java Natives use pascal calling convention and pop the
-  // arguments off of the stack after the jni call. Before the call we can use
-  // instructions that are SP relative. After the jni call we switch to FP
-  // relative instructions instead of re-adjusting the stack on windows.
-  // ****************************************************************************
-
-
-  // Now compute actual number of stack words we need rounding to make
-  // stack properly aligned.
-  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
-
-  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
-
-  intptr_t start = (intptr_t)__ pc();
-
-  // First thing make an ic check to see if we should even be here
-
-  // We are free to use all registers as temps without saving them and
-  // restoring them except rbp. rbp is the only callee save register
-  // as far as the interpreter and the compiler(s) are concerned.
-
-
-  const Register receiver = rcx;
-  Label exception_pending;
-
-  __ verify_oop(receiver);
-  // verified entry must be aligned for code patching.
-  __ ic_check(8 /* end_alignment */);
-
-  int vep_offset = ((intptr_t)__ pc()) - start;
-
-#ifdef COMPILER1
-  // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
-  if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
-    inline_check_hashcode_from_object_header(masm, method, rcx /*obj_reg*/, rax /*result*/);
-   }
-#endif // COMPILER1
-
-  // The instruction at the verified entry point must be 5 bytes or longer
-  // because it can be patched on the fly by make_non_entrant. The stack bang
-  // instruction fits that requirement.
-
-  // Generate stack overflow check
-  __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size());
-
-  // Generate a new frame for the wrapper.
-  __ enter();
-  // -2 because return address is already present and so is saved rbp
-  __ subptr(rsp, stack_size - 2*wordSize);
-
-
-  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-  bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */);
-
-  // Frame is now completed as far as size and linkage.
-  int frame_complete = ((intptr_t)__ pc()) - start;
-
-  // Calculate the difference between rsp and rbp,. We need to know it
-  // after the native call because on windows Java Natives will pop
-  // the arguments and it is painful to do rsp relative addressing
-  // in a platform independent way. So after the call we switch to
-  // rbp, relative addressing.
-
-  int fp_adjustment = stack_size - 2*wordSize;
-
-#ifdef COMPILER2
-  // C2 may leave the stack dirty if not in SSE2+ mode
-  if (UseSSE >= 2) {
-    __ verify_FPU(0, "c2i transition should have clean FPU stack");
-  } else {
-    __ empty_FPU_stack();
-  }
-#endif /* COMPILER2 */
-
-  // Compute the rbp, offset for any slots used after the jni call
-
-  int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
-
-  // We use rdi as a thread pointer because it is callee save and
-  // if we load it once it is usable thru the entire wrapper
-  const Register thread = rdi;
-
-   // We use rsi as the oop handle for the receiver/klass
-   // It is callee save so it survives the call to native
-
-   const Register oop_handle_reg = rsi;
-
-   __ get_thread(thread);
-
-  //
-  // We immediately shuffle the arguments so that any vm call we have to
-  // make from here on out (sync slow path, jvmti, etc.) we will have
-  // captured the oops from our caller and have a valid oopMap for
-  // them.
-
-  // -----------------
-  // The Grand Shuffle
-  //
-  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
-  // and, if static, the class mirror instead of a receiver.  This pretty much
-  // guarantees that register layout will not match (and x86 doesn't use reg
-  // parms though amd does).  Since the native abi doesn't use register args
-  // and the java conventions does we don't have to worry about collisions.
-  // All of our moved are reg->stack or stack->stack.
-  // We ignore the extra arguments during the shuffle and handle them at the
-  // last moment. The shuffle is described by the two calling convention
-  // vectors we have in our possession. We simply walk the java vector to
-  // get the source locations and the c vector to get the destinations.
-
-  int c_arg = method->is_static() ? 2 : 1;
-
-  // Record rsp-based slot for receiver on stack for non-static methods
-  int receiver_offset = -1;
-
-  // This is a trick. We double the stack slots so we can claim
-  // the oops in the caller's frame. Since we are sure to have
-  // more args than the caller doubling is enough to make
-  // sure we can capture all the incoming oop args from the
-  // caller.
-  //
-  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-
-  // Mark location of rbp,
-  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
-
-  // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
-  // Are free to temporaries if we have to do  stack to steck moves.
-  // All inbound args are referenced based on rbp, and all outbound args via rsp.
-
-  for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
-    switch (in_sig_bt[i]) {
-      case T_ARRAY:
-      case T_OBJECT:
-        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
-                    ((i == 0) && (!is_static)),
-                    &receiver_offset);
-        break;
-      case T_VOID:
-        break;
-
-      case T_FLOAT:
-        float_move(masm, in_regs[i], out_regs[c_arg]);
-          break;
-
-      case T_DOUBLE:
-        assert( i + 1 < total_in_args &&
-                in_sig_bt[i + 1] == T_VOID &&
-                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
-        double_move(masm, in_regs[i], out_regs[c_arg]);
-        break;
-
-      case T_LONG :
-        long_move(masm, in_regs[i], out_regs[c_arg]);
-        break;
-
-      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
-
-      default:
-        simple_move32(masm, in_regs[i], out_regs[c_arg]);
-    }
-  }
-
-  // Pre-load a static method's oop into rsi.  Used both by locking code and
-  // the normal JNI call code.
-  if (method->is_static()) {
-
-    //  load opp into a register
-    __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
-
-    // Now handlize the static class mirror it's known not-null.
-    __ movptr(Address(rsp, klass_offset), oop_handle_reg);
-    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
-
-    // Now get the handle
-    __ lea(oop_handle_reg, Address(rsp, klass_offset));
-    // store the klass handle as second argument
-    __ movptr(Address(rsp, wordSize), oop_handle_reg);
-  }
-
-  // Change state to native (we save the return address in the thread, since it might not
-  // be pushed on the stack when we do a stack traversal). It is enough that the pc()
-  // points into the right code segment. It does not have to be the correct return pc.
-  // We use the same pc/oopMap repeatedly when we call out
-
-  intptr_t the_pc = (intptr_t) __ pc();
-  oop_maps->add_gc_map(the_pc - start, map);
-
-  __ set_last_Java_frame(thread, rsp, noreg, (address)the_pc, noreg);
-
-
-  // We have all of the arguments setup at this point. We must not touch any register
-  // argument registers at this point (what if we save/restore them there are no oop?
-
-  if (DTraceMethodProbes) {
-    __ mov_metadata(rax, method());
-    __ call_VM_leaf(
-         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-         thread, rax);
-  }
-
-  // RedefineClasses() tracing support for obsolete method entry
-  if (log_is_enabled(Trace, redefine, class, obsolete)) {
-    __ mov_metadata(rax, method());
-    __ call_VM_leaf(
-         CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
-         thread, rax);
-  }
-
-  // These are register definitions we need for locking/unlocking
-  const Register swap_reg = rax;  // Must use rax, for cmpxchg instruction
-  const Register obj_reg  = rcx;  // Will contain the oop
-  const Register lock_reg = rdx;  // Address of compiler lock object (BasicLock)
-
-  Label slow_path_lock;
-  Label lock_done;
-
-  // Lock a synchronized method
-  if (method->is_synchronized()) {
-    Label count_mon;
-
-    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
-
-    // Get the handle (the 2nd argument)
-    __ movptr(oop_handle_reg, Address(rsp, wordSize));
-
-    // Get address of the box
-
-    __ lea(lock_reg, Address(rbp, lock_slot_rbp_offset));
-
-    // Load the oop from the handle
-    __ movptr(obj_reg, Address(oop_handle_reg, 0));
-
-    if (LockingMode == LM_MONITOR) {
-      __ jmp(slow_path_lock);
-    } else if (LockingMode == LM_LEGACY) {
-      // Load immediate 1 into swap_reg %rax,
-      __ movptr(swap_reg, 1);
-
-      // Load (object->mark() | 1) into swap_reg %rax,
-      __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-
-      // Save (object->mark() | 1) into BasicLock's displaced header
-      __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
-
-      // src -> dest iff dest == rax, else rax, <- dest
-      // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg)
-      __ lock();
-      __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-      __ jcc(Assembler::equal, count_mon);
-
-      // Test if the oopMark is an obvious stack pointer, i.e.,
-      //  1) (mark & 3) == 0, and
-      //  2) rsp <= mark < mark + os::pagesize()
-      // These 3 tests can be done by evaluating the following
-      // expression: ((mark - rsp) & (3 - os::vm_page_size())),
-      // assuming both stack pointer and pagesize have their
-      // least significant 2 bits clear.
-      // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg
-
-      __ subptr(swap_reg, rsp);
-      __ andptr(swap_reg, 3 - (int)os::vm_page_size());
-
-      // Save the test result, for recursive case, the result is zero
-      __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
-      __ jcc(Assembler::notEqual, slow_path_lock);
-    } else {
-      assert(LockingMode == LM_LIGHTWEIGHT, "must be");
-      // Lacking registers and thread on x86_32. Always take slow path.
-      __ jmp(slow_path_lock);
-    }
-    __ bind(count_mon);
-    __ inc_held_monitor_count();
-
-    // Slow path will re-enter here
-    __ bind(lock_done);
-  }
-
-
-  // Finally just about ready to make the JNI call
-
-  // get JNIEnv* which is first argument to native
-  __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
-  __ movptr(Address(rsp, 0), rdx);
-
-  // Now set thread in native
-  __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
-
-  __ call(RuntimeAddress(native_func));
-
-  // Verify or restore cpu control state after JNI call
-  __ restore_cpu_control_state_after_jni(noreg);
-
-  // WARNING - on Windows Java Natives use pascal calling convention and pop the
-  // arguments off of the stack. We could just re-adjust the stack pointer here
-  // and continue to do SP relative addressing but we instead switch to FP
-  // relative addressing.
-
-  // Unpack native results.
-  switch (ret_type) {
-  case T_BOOLEAN: __ c2bool(rax);            break;
-  case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
-  case T_BYTE   : __ sign_extend_byte (rax); break;
-  case T_SHORT  : __ sign_extend_short(rax); break;
-  case T_INT    : /* nothing to do */        break;
-  case T_DOUBLE :
-  case T_FLOAT  :
-    // Result is in st0 we'll save as needed
-    break;
-  case T_ARRAY:                 // Really a handle
-  case T_OBJECT:                // Really a handle
-      break; // can't de-handlize until after safepoint check
-  case T_VOID: break;
-  case T_LONG: break;
-  default       : ShouldNotReachHere();
-  }
-
-  // Switch thread to "native transition" state before reading the synchronization state.
-  // This additional state is necessary because reading and testing the synchronization
-  // state is not atomic w.r.t. GC, as this scenario demonstrates:
-  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
-  //     VM thread changes sync state to synchronizing and suspends threads for GC.
-  //     Thread A is resumed to finish this native method, but doesn't block here since it
-  //     didn't see any synchronization is progress, and escapes.
-  __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
-
-  // Force this write out before the read below
-  if (!UseSystemMemoryBarrier) {
-    __ membar(Assembler::Membar_mask_bits(
-              Assembler::LoadLoad | Assembler::LoadStore |
-              Assembler::StoreLoad | Assembler::StoreStore));
-  }
-
-  if (AlwaysRestoreFPU) {
-    // Make sure the control word is correct.
-    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-  }
-
-  // check for safepoint operation in progress and/or pending suspend requests
-  { Label Continue, slow_path;
-
-    __ safepoint_poll(slow_path, thread, true /* at_return */, false /* in_nmethod */);
-
-    __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
-    __ jcc(Assembler::equal, Continue);
-    __ bind(slow_path);
-
-    // Don't use call_VM as it will see a possible pending exception and forward it
-    // and never return here preventing us from clearing _last_native_pc down below.
-    // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
-    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
-    // by hand.
-    //
-    __ vzeroupper();
-
-    save_native_result(masm, ret_type, stack_slots);
-    __ push(thread);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
-                                              JavaThread::check_special_condition_for_native_trans)));
-    __ increment(rsp, wordSize);
-    // Restore any method result value
-    restore_native_result(masm, ret_type, stack_slots);
-    __ bind(Continue);
-  }
-
-  // change thread state
-  __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
-
-  Label reguard;
-  Label reguard_done;
-  __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled);
-  __ jcc(Assembler::equal, reguard);
-
-  // slow path reguard  re-enters here
-  __ bind(reguard_done);
-
-  // Handle possible exception (will unlock if necessary)
-
-  // native result if any is live
-
-  // Unlock
-  Label slow_path_unlock;
-  Label unlock_done;
-  if (method->is_synchronized()) {
-
-    Label fast_done;
-
-    // Get locked oop from the handle we passed to jni
-    __ movptr(obj_reg, Address(oop_handle_reg, 0));
-
-    if (LockingMode == LM_LEGACY) {
-      Label not_recur;
-      // Simple recursive lock?
-      __ cmpptr(Address(rbp, lock_slot_rbp_offset), NULL_WORD);
-      __ jcc(Assembler::notEqual, not_recur);
-      __ dec_held_monitor_count();
-      __ jmpb(fast_done);
-      __ bind(not_recur);
-    }
-
-    // Must save rax, if it is live now because cmpxchg must use it
-    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-      save_native_result(masm, ret_type, stack_slots);
-    }
-
-    if (LockingMode == LM_MONITOR) {
-      __ jmp(slow_path_unlock);
-    } else if (LockingMode == LM_LEGACY) {
-      //  get old displaced header
-      __ movptr(rbx, Address(rbp, lock_slot_rbp_offset));
-
-      // get address of the stack lock
-      __ lea(rax, Address(rbp, lock_slot_rbp_offset));
-
-      // Atomic swap old header if oop still contains the stack lock
-      // src -> dest iff dest == rax, else rax, <- dest
-      // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg)
-      __ lock();
-      __ cmpxchgptr(rbx, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-      __ jcc(Assembler::notEqual, slow_path_unlock);
-      __ dec_held_monitor_count();
-    } else {
-      assert(LockingMode == LM_LIGHTWEIGHT, "must be");
-      __ lightweight_unlock(obj_reg, swap_reg, thread, lock_reg, slow_path_unlock);
-      __ dec_held_monitor_count();
-    }
-
-    // slow path re-enters here
-    __ bind(unlock_done);
-    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-      restore_native_result(masm, ret_type, stack_slots);
-    }
-
-    __ bind(fast_done);
-  }
-
-  if (DTraceMethodProbes) {
-    // Tell dtrace about this method exit
-    save_native_result(masm, ret_type, stack_slots);
-    __ mov_metadata(rax, method());
-    __ call_VM_leaf(
-         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
-         thread, rax);
-    restore_native_result(masm, ret_type, stack_slots);
-  }
-
-  // We can finally stop using that last_Java_frame we setup ages ago
-
-  __ reset_last_Java_frame(thread, false);
-
-  // Unbox oop result, e.g. JNIHandles::resolve value.
-  if (is_reference_type(ret_type)) {
-    __ resolve_jobject(rax /* value */,
-                       thread /* thread */,
-                       rcx /* tmp */);
-  }
-
-  if (CheckJNICalls) {
-    // clear_pending_jni_exception_check
-    __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD);
-  }
-
-  // reset handle block
-  __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
-  __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD);
-
-  // Any exception pending?
-  __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
-  __ jcc(Assembler::notEqual, exception_pending);
-
-  // no exception, we're almost done
-
-  // check that only result value is on FPU stack
-  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
-
-  // Fixup floating pointer results so that result looks like a return from a compiled method
-  if (ret_type == T_FLOAT) {
-    if (UseSSE >= 1) {
-      // Pop st0 and store as float and reload into xmm register
-      __ fstp_s(Address(rbp, -4));
-      __ movflt(xmm0, Address(rbp, -4));
-    }
-  } else if (ret_type == T_DOUBLE) {
-    if (UseSSE >= 2) {
-      // Pop st0 and store as double and reload into xmm register
-      __ fstp_d(Address(rbp, -8));
-      __ movdbl(xmm0, Address(rbp, -8));
-    }
-  }
-
-  // Return
-
-  __ leave();
-  __ ret(0);
-
-  // Unexpected paths are out of line and go here
-
-  // Slow path locking & unlocking
-  if (method->is_synchronized()) {
-
-    // BEGIN Slow path lock
-
-    __ bind(slow_path_lock);
-
-    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
-    // args are (oop obj, BasicLock* lock, JavaThread* thread)
-    __ push(thread);
-    __ push(lock_reg);
-    __ push(obj_reg);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)));
-    __ addptr(rsp, 3*wordSize);
-
-#ifdef ASSERT
-    { Label L;
-    __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
-    __ jcc(Assembler::equal, L);
-    __ stop("no pending exception allowed on exit from monitorenter");
-    __ bind(L);
-    }
-#endif
-    __ jmp(lock_done);
-
-    // END Slow path lock
-
-    // BEGIN Slow path unlock
-    __ bind(slow_path_unlock);
-    __ vzeroupper();
-    // Slow path unlock
-
-    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-      save_native_result(masm, ret_type, stack_slots);
-    }
-    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
-
-    __ pushptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
-    __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
-
-
-    // should be a peal
-    // +wordSize because of the push above
-    // args are (oop obj, BasicLock* lock, JavaThread* thread)
-    __ push(thread);
-    __ lea(rax, Address(rbp, lock_slot_rbp_offset));
-    __ push(rax);
-
-    __ push(obj_reg);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
-    __ addptr(rsp, 3*wordSize);
-#ifdef ASSERT
-    {
-      Label L;
-      __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
-      __ jcc(Assembler::equal, L);
-      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
-      __ bind(L);
-    }
-#endif /* ASSERT */
-
-    __ popptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
-
-    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-      restore_native_result(masm, ret_type, stack_slots);
-    }
-    __ jmp(unlock_done);
-    // END Slow path unlock
-
-  }
-
-  // SLOW PATH Reguard the stack if needed
-
-  __ bind(reguard);
-  __ vzeroupper();
-  save_native_result(masm, ret_type, stack_slots);
-  {
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
-  }
-  restore_native_result(masm, ret_type, stack_slots);
-  __ jmp(reguard_done);
-
-
-  // BEGIN EXCEPTION PROCESSING
-
-  // Forward  the exception
-  __ bind(exception_pending);
-
-  // remove possible return value from FPU register stack
-  __ empty_FPU_stack();
-
-  // pop our frame
-  __ leave();
-  // and forward the exception
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-
-  __ flush();
-
-  nmethod *nm = nmethod::new_native_nmethod(method,
-                                            compile_id,
-                                            masm->code(),
-                                            vep_offset,
-                                            frame_complete,
-                                            stack_slots / VMRegImpl::slots_per_word,
-                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
-                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
-                                            oop_maps);
-
-  return nm;
-
-}
-
-// this function returns the adjust size (in number of words) to a c2i adapter
-// activation for use during deoptimization
-int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
-  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
-}
-
-
-// Number of stack slots between incoming argument block and the start of
-// a new frame.  The PROLOG must add this many slots to the stack.  The
-// EPILOG must remove this many slots.  Intel needs one slot for
-// return address and one for rbp, (must save rbp)
-uint SharedRuntime::in_preserve_stack_slots() {
-  return 2+VerifyStackAtCalls;
-}
-
-uint SharedRuntime::out_preserve_stack_slots() {
-  return 0;
-}
-
-VMReg SharedRuntime::thread_register() {
-  Unimplemented();
-  return nullptr;
-}
-
-//------------------------------generate_deopt_blob----------------------------
-void SharedRuntime::generate_deopt_blob() {
-  // allocate space for the code
-  ResourceMark rm;
-  // setup code generation tools
-  // note: the buffer code size must account for StackShadowPages=50
-  const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id);
-  CodeBuffer   buffer(name, 1536, 1024);
-  MacroAssembler* masm = new MacroAssembler(&buffer);
-  int frame_size_in_words;
-  OopMap* map = nullptr;
-  // Account for the extra args we place on the stack
-  // by the time we call fetch_unroll_info
-  const int additional_words = 2; // deopt kind, thread
-
-  OopMapSet *oop_maps = new OopMapSet();
-
-  // -------------
-  // This code enters when returning to a de-optimized nmethod.  A return
-  // address has been pushed on the stack, and return values are in
-  // registers.
-  // If we are doing a normal deopt then we were called from the patched
-  // nmethod from the point we returned to the nmethod. So the return
-  // address on the stack is wrong by NativeCall::instruction_size
-  // We will adjust the value to it looks like we have the original return
-  // address on the stack (like when we eagerly deoptimized).
-  // In the case of an exception pending with deoptimized then we enter
-  // with a return address on the stack that points after the call we patched
-  // into the exception handler. We have the following register state:
-  //    rax,: exception
-  //    rbx,: exception handler
-  //    rdx: throwing pc
-  // So in this case we simply jam rdx into the useless return address and
-  // the stack looks just like we want.
-  //
-  // At this point we need to de-opt.  We save the argument return
-  // registers.  We call the first C routine, fetch_unroll_info().  This
-  // routine captures the return values and returns a structure which
-  // describes the current frame size and the sizes of all replacement frames.
-  // The current frame is compiled code and may contain many inlined
-  // functions, each with their own JVM state.  We pop the current frame, then
-  // push all the new frames.  Then we call the C routine unpack_frames() to
-  // populate these frames.  Finally unpack_frames() returns us the new target
-  // address.  Notice that callee-save registers are BLOWN here; they have
-  // already been captured in the vframeArray at the time the return PC was
-  // patched.
-  address start = __ pc();
-  Label cont;
-
-  // Prolog for non exception case!
-
-  // Save everything in sight.
-
-  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
-  // Normal deoptimization
-  __ push(Deoptimization::Unpack_deopt);
-  __ jmp(cont);
-
-  int reexecute_offset = __ pc() - start;
-
-  // Reexecute case
-  // return address is the pc describes what bci to do re-execute at
-
-  // No need to update map as each call to save_live_registers will produce identical oopmap
-  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
-
-  __ push(Deoptimization::Unpack_reexecute);
-  __ jmp(cont);
-
-  int exception_offset = __ pc() - start;
-
-  // Prolog for exception case
-
-  // all registers are dead at this entry point, except for rax, and
-  // rdx which contain the exception oop and exception pc
-  // respectively.  Set them in TLS and fall thru to the
-  // unpack_with_exception_in_tls entry point.
-
-  __ get_thread(rdi);
-  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), rdx);
-  __ movptr(Address(rdi, JavaThread::exception_oop_offset()), rax);
-
-  int exception_in_tls_offset = __ pc() - start;
-
-  // new implementation because exception oop is now passed in JavaThread
-
-  // Prolog for exception case
-  // All registers must be preserved because they might be used by LinearScan
-  // Exceptiop oop and throwing PC are passed in JavaThread
-  // tos: stack at point of call to method that threw the exception (i.e. only
-  // args are on the stack, no return address)
-
-  // make room on stack for the return address
-  // It will be patched later with the throwing pc. The correct value is not
-  // available now because loading it from memory would destroy registers.
-  __ push(0);
-
-  // Save everything in sight.
-
-  // No need to update map as each call to save_live_registers will produce identical oopmap
-  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
-
-  // Now it is safe to overwrite any register
-
-  // store the correct deoptimization type
-  __ push(Deoptimization::Unpack_exception);
-
-  // load throwing pc from JavaThread and patch it as the return address
-  // of the current frame. Then clear the field in JavaThread
-  __ get_thread(rdi);
-  __ movptr(rdx, Address(rdi, JavaThread::exception_pc_offset()));
-  __ movptr(Address(rbp, wordSize), rdx);
-  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), NULL_WORD);
-
-#ifdef ASSERT
-  // verify that there is really an exception oop in JavaThread
-  __ movptr(rax, Address(rdi, JavaThread::exception_oop_offset()));
-  __ verify_oop(rax);
-
-  // verify that there is no pending exception
-  Label no_pending_exception;
-  __ movptr(rax, Address(rdi, Thread::pending_exception_offset()));
-  __ testptr(rax, rax);
-  __ jcc(Assembler::zero, no_pending_exception);
-  __ stop("must not have pending exception here");
-  __ bind(no_pending_exception);
-#endif
-
-  __ bind(cont);
-
-  // Compiled code leaves the floating point stack dirty, empty it.
-  __ empty_FPU_stack();
-
-
-  // Call C code.  Need thread and this frame, but NOT official VM entry
-  // crud.  We cannot block on this call, no GC can happen.
-  __ get_thread(rcx);
-  __ push(rcx);
-  // fetch_unroll_info needs to call last_java_frame()
-  __ set_last_Java_frame(rcx, noreg, noreg, nullptr, noreg);
-
-  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
-
-  // Need to have an oopmap that tells fetch_unroll_info where to
-  // find any register it might need.
-
-  oop_maps->add_gc_map( __ pc()-start, map);
-
-  // Discard args to fetch_unroll_info
-  __ pop(rcx);
-  __ pop(rcx);
-
-  __ get_thread(rcx);
-  __ reset_last_Java_frame(rcx, false);
-
-  // Load UnrollBlock into EDI
-  __ mov(rdi, rax);
-
-  // Move the unpack kind to a safe place in the UnrollBlock because
-  // we are very short of registers
-
-  Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset());
-  // retrieve the deopt kind from the UnrollBlock.
-  __ movl(rax, unpack_kind);
-
-   Label noException;
-  __ cmpl(rax, Deoptimization::Unpack_exception);   // Was exception pending?
-  __ jcc(Assembler::notEqual, noException);
-  __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset()));
-  __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset()));
-  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD);
-  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD);
-
-  __ verify_oop(rax);
-
-  // Overwrite the result registers with the exception results.
-  __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
-  __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
-
-  __ bind(noException);
-
-  // Stack is back to only having register save data on the stack.
-  // Now restore the result registers. Everything else is either dead or captured
-  // in the vframeArray.
-
-  RegisterSaver::restore_result_registers(masm);
-
-  // Non standard control word may be leaked out through a safepoint blob, and we can
-  // deopt at a poll point with the non standard control word. However, we should make
-  // sure the control word is correct after restore_result_registers.
-  __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-
-  // All of the register save area has been popped of the stack. Only the
-  // return address remains.
-
-  // Pop all the frames we must move/replace.
-  //
-  // Frame picture (youngest to oldest)
-  // 1: self-frame (no frame link)
-  // 2: deopting frame  (no frame link)
-  // 3: caller of deopting frame (could be compiled/interpreted).
-  //
-  // Note: by leaving the return address of self-frame on the stack
-  // and using the size of frame 2 to adjust the stack
-  // when we are done the return to frame 3 will still be on the stack.
-
-  // Pop deoptimized frame
-  __ addptr(rsp, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset()));
-
-  // sp should be pointing at the return address to the caller (3)
-
-  // Pick up the initial fp we should save
-  // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
-  __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset()));
-
-#ifdef ASSERT
-  // Compilers generate code that bang the stack by as much as the
-  // interpreter would need. So this stack banging should never
-  // trigger a fault. Verify that it does not on non product builds.
-  __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset()));
-  __ bang_stack_size(rbx, rcx);
-#endif
-
-  // Load array of frame pcs into ECX
-  __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset()));
-
-  __ pop(rsi); // trash the old pc
-
-  // Load array of frame sizes into ESI
-  __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset()));
-
-  Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset());
-
-  __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset()));
-  __ movl(counter, rbx);
-
-  // Now adjust the caller's stack to make up for the extra locals
-  // but record the original sp so that we can save it in the skeletal interpreter
-  // frame and the stack walking of interpreter_sender will get the unextended sp
-  // value and not the "real" sp value.
-
-  Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset());
-  __ movptr(sp_temp, rsp);
-  __ movl2ptr(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset()));
-  __ subptr(rsp, rbx);
-
-  // Push interpreter frames in a loop
-  Label loop;
-  __ bind(loop);
-  __ movptr(rbx, Address(rsi, 0));      // Load frame size
-  __ subptr(rbx, 2*wordSize);           // we'll push pc and rbp, by hand
-  __ pushptr(Address(rcx, 0));          // save return address
-  __ enter();                           // save old & set new rbp,
-  __ subptr(rsp, rbx);                  // Prolog!
-  __ movptr(rbx, sp_temp);              // sender's sp
-  // This value is corrected by layout_activation_impl
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
-  __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
-  __ movptr(sp_temp, rsp);              // pass to next frame
-  __ addptr(rsi, wordSize);             // Bump array pointer (sizes)
-  __ addptr(rcx, wordSize);             // Bump array pointer (pcs)
-  __ decrementl(counter);             // decrement counter
-  __ jcc(Assembler::notZero, loop);
-  __ pushptr(Address(rcx, 0));          // save final return address
-
-  // Re-push self-frame
-  __ enter();                           // save old & set new rbp,
-
-  //  Return address and rbp, are in place
-  // We'll push additional args later. Just allocate a full sized
-  // register save area
-  __ subptr(rsp, (frame_size_in_words-additional_words - 2) * wordSize);
-
-  // Restore frame locals after moving the frame
-  __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
-  __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
-  __ fstp_d(Address(rsp, RegisterSaver::fpResultOffset()*wordSize));   // Pop float stack and store in local
-  if( UseSSE>=2 ) __ movdbl(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
-  if( UseSSE==1 ) __ movflt(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
-
-  // Set up the args to unpack_frame
-
-  __ pushl(unpack_kind);                     // get the unpack_kind value
-  __ get_thread(rcx);
-  __ push(rcx);
-
-  // set last_Java_sp, last_Java_fp
-  __ set_last_Java_frame(rcx, noreg, rbp, nullptr, noreg);
-
-  // Call C code.  Need thread but NOT official VM entry
-  // crud.  We cannot block on this call, no GC can happen.  Call should
-  // restore return values to their stack-slots with the new SP.
-  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
-  // Set an oopmap for the call site
-  oop_maps->add_gc_map( __ pc()-start, new OopMap( frame_size_in_words, 0 ));
-
-  // rax, contains the return result type
-  __ push(rax);
-
-  __ get_thread(rcx);
-  __ reset_last_Java_frame(rcx, false);
-
-  // Collect return values
-  __ movptr(rax,Address(rsp, (RegisterSaver::raxOffset() + additional_words + 1)*wordSize));
-  __ movptr(rdx,Address(rsp, (RegisterSaver::rdxOffset() + additional_words + 1)*wordSize));
-
-  // Clear floating point stack before returning to interpreter
-  __ empty_FPU_stack();
-
-  // Check if we should push the float or double return value.
-  Label results_done, yes_double_value;
-  __ cmpl(Address(rsp, 0), T_DOUBLE);
-  __ jcc (Assembler::zero, yes_double_value);
-  __ cmpl(Address(rsp, 0), T_FLOAT);
-  __ jcc (Assembler::notZero, results_done);
-
-  // return float value as expected by interpreter
-  if( UseSSE>=1 ) __ movflt(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
-  else            __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
-  __ jmp(results_done);
-
-  // return double value as expected by interpreter
-  __ bind(yes_double_value);
-  if( UseSSE>=2 ) __ movdbl(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
-  else            __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
-
-  __ bind(results_done);
-
-  // Pop self-frame.
-  __ leave();                              // Epilog!
-
-  // Jump to interpreter
-  __ ret(0);
-
-  // -------------
-  // make sure all code is generated
-  masm->flush();
-
-  _deopt_blob = DeoptimizationBlob::create( &buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
-  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-}
-
-//------------------------------generate_handler_blob------
-//
-// Generate a special Compile2Runtime blob that saves all registers,
-// setup oopmap, and calls safepoint code to stop the compiled code for
-// a safepoint.
-//
-SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address call_ptr) {
-
-  // Account for thread arg in our frame
-  const int additional_words = 1;
-  int frame_size_in_words;
-
-  assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
-  assert(is_polling_page_id(id), "expected a polling page stub id");
-
-  ResourceMark rm;
-  OopMapSet *oop_maps = new OopMapSet();
-  OopMap* map;
-
-  // allocate space for the code
-  // setup code generation tools
-  const char* name = SharedRuntime::stub_name(id);
-  CodeBuffer   buffer(name, 2048, 1024);
-  MacroAssembler* masm = new MacroAssembler(&buffer);
-
-  const Register java_thread = rdi; // callee-saved for VC++
-  address start   = __ pc();
-  address call_pc = nullptr;
-  bool cause_return = (id == SharedStubId::polling_page_return_handler_id);
-  bool save_vectors = (id == SharedStubId::polling_page_vectors_safepoint_handler_id);
-
-  // If cause_return is true we are at a poll_return and there is
-  // the return address on the stack to the caller on the nmethod
-  // that is safepoint. We can leave this return on the stack and
-  // effectively complete the return and safepoint in the caller.
-  // Otherwise we push space for a return address that the safepoint
-  // handler will install later to make the stack walking sensible.
-  if (!cause_return)
-    __ push(rbx);  // Make room for return address (or push it again)
-
-  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors);
-
-  // The following is basically a call_VM. However, we need the precise
-  // address of the call in order to generate an oopmap. Hence, we do all the
-  // work ourselves.
-
-  // Push thread argument and setup last_Java_sp
-  __ get_thread(java_thread);
-  __ push(java_thread);
-  __ set_last_Java_frame(java_thread, noreg, noreg, nullptr, noreg);
-
-  // if this was not a poll_return then we need to correct the return address now.
-  if (!cause_return) {
-    // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack.
-    // Additionally, rbx is a callee saved register and we can look at it later to determine
-    // if someone changed the return address for us!
-    __ movptr(rbx, Address(java_thread, JavaThread::saved_exception_pc_offset()));
-    __ movptr(Address(rbp, wordSize), rbx);
-  }
-
-  // do the call
-  __ call(RuntimeAddress(call_ptr));
-
-  // Set an oopmap for the call site.  This oopmap will map all
-  // oop-registers and debug-info registers as callee-saved.  This
-  // will allow deoptimization at this safepoint to find all possible
-  // debug-info recordings, as well as let GC find all oops.
-
-  oop_maps->add_gc_map( __ pc() - start, map);
-
-  // Discard arg
-  __ pop(rcx);
-
-  Label noException;
-
-  // Clear last_Java_sp again
-  __ get_thread(java_thread);
-  __ reset_last_Java_frame(java_thread, false);
-
-  __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD);
-  __ jcc(Assembler::equal, noException);
-
-  // Exception pending
-  RegisterSaver::restore_live_registers(masm, save_vectors);
-
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-
-  __ bind(noException);
-
-  Label no_adjust, bail, not_special;
-  if (!cause_return) {
-    // If our stashed return pc was modified by the runtime we avoid touching it
-    __ cmpptr(rbx, Address(rbp, wordSize));
-    __ jccb(Assembler::notEqual, no_adjust);
-
-    // Skip over the poll instruction.
-    // See NativeInstruction::is_safepoint_poll()
-    // Possible encodings:
-    //      85 00       test   %eax,(%rax)
-    //      85 01       test   %eax,(%rcx)
-    //      85 02       test   %eax,(%rdx)
-    //      85 03       test   %eax,(%rbx)
-    //      85 06       test   %eax,(%rsi)
-    //      85 07       test   %eax,(%rdi)
-    //
-    //      85 04 24    test   %eax,(%rsp)
-    //      85 45 00    test   %eax,0x0(%rbp)
-
-#ifdef ASSERT
-    __ movptr(rax, rbx); // remember where 0x85 should be, for verification below
-#endif
-    // rsp/rbp base encoding takes 3 bytes with the following register values:
-    // rsp 0x04
-    // rbp 0x05
-    __ movzbl(rcx, Address(rbx, 1));
-    __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05
-    __ subptr(rcx, 4);    // looking for 0x00 .. 0x01
-    __ cmpptr(rcx, 1);
-    __ jcc(Assembler::above, not_special);
-    __ addptr(rbx, 1);
-    __ bind(not_special);
-#ifdef ASSERT
-    // Verify the correct encoding of the poll we're about to skip.
-    __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl);
-    __ jcc(Assembler::notEqual, bail);
-    // Mask out the modrm bits
-    __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask);
-    // rax encodes to 0, so if the bits are nonzero it's incorrect
-    __ jcc(Assembler::notZero, bail);
-#endif
-    // Adjust return pc forward to step over the safepoint poll instruction
-    __ addptr(rbx, 2);
-    __ movptr(Address(rbp, wordSize), rbx);
-  }
-
-  __ bind(no_adjust);
-  // Normal exit, register restoring and exit
-  RegisterSaver::restore_live_registers(masm, save_vectors);
-
-  __ ret(0);
-
-#ifdef ASSERT
-  __ bind(bail);
-  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
-#endif
-
-  // make sure all code is generated
-  masm->flush();
-
-  // Fill-out other meta info
-  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
-}
-
-//
-// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
-//
-// Generate a stub that calls into vm to find out the proper destination
-// of a java call. All the argument registers are live at this point
-// but since this is generic code we don't know what they are and the caller
-// must do any gc of the args.
-//
-RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address destination) {
-  assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
-  assert(is_resolve_id(id), "expected a resolve stub id");
-
-  // allocate space for the code
-  ResourceMark rm;
-
-  const char* name = SharedRuntime::stub_name(id);
-  CodeBuffer buffer(name, 1000, 512);
-  MacroAssembler* masm                = new MacroAssembler(&buffer);
-
-  int frame_size_words;
-  enum frame_layout {
-                thread_off,
-                extra_words };
-
-  OopMapSet *oop_maps = new OopMapSet();
-  OopMap* map = nullptr;
-
-  int start = __ offset();
-
-  map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words);
-
-  int frame_complete = __ offset();
-
-  const Register thread = rdi;
-  __ get_thread(rdi);
-
-  __ push(thread);
-  __ set_last_Java_frame(thread, noreg, rbp, nullptr, noreg);
-
-  __ call(RuntimeAddress(destination));
-
-
-  // Set an oopmap for the call site.
-  // We need this not only for callee-saved registers, but also for volatile
-  // registers that the compiler might be keeping live across a safepoint.
-
-  oop_maps->add_gc_map( __ offset() - start, map);
-
-  // rax, contains the address we are going to jump to assuming no exception got installed
-
-  __ addptr(rsp, wordSize);
-
-  // clear last_Java_sp
-  __ reset_last_Java_frame(thread, true);
-  // check for pending exceptions
-  Label pending;
-  __ cmpptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
-  __ jcc(Assembler::notEqual, pending);
-
-  // get the returned Method*
-  __ get_vm_result_2(rbx, thread);
-  __ movptr(Address(rsp, RegisterSaver::rbx_offset() * wordSize), rbx);
-
-  __ movptr(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax);
-
-  RegisterSaver::restore_live_registers(masm);
-
-  // We are back to the original state on entry and ready to go.
-
-  __ jmp(rax);
-
-  // Pending exception after the safepoint
-
-  __ bind(pending);
-
-  RegisterSaver::restore_live_registers(masm);
-
-  // exception pending => remove activation and forward to exception handler
-
-  __ get_thread(thread);
-  __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
-  __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-
-  // -------------
-  // make sure all code is generated
-  masm->flush();
-
-  // return the  blob
-  // frame_size_words or bytes??
-  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
-}
-
-  //------------------------------------------------------------------------------------------------------------------------
-  // Continuation point for throwing of implicit exceptions that are not handled in
-  // the current activation. Fabricates an exception oop and initiates normal
-  // exception dispatching in this frame.
-  //
-  // Previously the compiler (c2) allowed for callee save registers on Java calls.
-  // This is no longer true after adapter frames were removed but could possibly
-  // be brought back in the future if the interpreter code was reworked and it
-  // was deemed worthwhile. The comment below was left to describe what must
-  // happen here if callee saves were resurrected. As it stands now this stub
-  // could actually be a vanilla BufferBlob and have now oopMap at all.
-  // Since it doesn't make much difference we've chosen to leave it the
-  // way it was in the callee save days and keep the comment.
-
-  // If we need to preserve callee-saved values we need a callee-saved oop map and
-  // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs.
-  // If the compiler needs all registers to be preserved between the fault
-  // point and the exception handler then it must assume responsibility for that in
-  // AbstractCompiler::continuation_for_implicit_null_exception or
-  // continuation_for_implicit_division_by_zero_exception. All other implicit
-  // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
-  // either at call sites or otherwise assume that stack unwinding will be initiated,
-  // so caller saved registers were assumed volatile in the compiler.
-RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address runtime_entry) {
-  assert(is_throw_id(id), "expected a throw stub id");
-
-  const char* name = SharedRuntime::stub_name(id);
-
-  // Information about frame layout at time of blocking runtime call.
-  // Note that we only have to preserve callee-saved registers since
-  // the compilers are responsible for supplying a continuation point
-  // if they expect all registers to be preserved.
-  enum layout {
-    thread_off,    // last_java_sp
-    arg1_off,
-    arg2_off,
-    rbp_off,       // callee saved register
-    ret_pc,
-    framesize
-  };
-
-  int insts_size = 256;
-  int locs_size  = 32;
-
-  ResourceMark rm;
-  const char* timer_msg = "SharedRuntime generate_throw_exception";
-  TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
-
-  CodeBuffer code(name, insts_size, locs_size);
-  OopMapSet* oop_maps  = new OopMapSet();
-  MacroAssembler* masm = new MacroAssembler(&code);
-
-  address start = __ pc();
-
-  // This is an inlined and slightly modified version of call_VM
-  // which has the ability to fetch the return PC out of
-  // thread-local storage and also sets up last_Java_sp slightly
-  // differently than the real call_VM
-  Register java_thread = rbx;
-  __ get_thread(java_thread);
-
-  __ enter(); // required for proper stackwalking of RuntimeStub frame
-
-  // pc and rbp, already pushed
-  __ subptr(rsp, (framesize-2) * wordSize); // prolog
-
-  // Frame is now completed as far as size and linkage.
-
-  int frame_complete = __ pc() - start;
-
-  // push java thread (becomes first argument of C function)
-  __ movptr(Address(rsp, thread_off * wordSize), java_thread);
-  // Set up last_Java_sp and last_Java_fp
-  __ set_last_Java_frame(java_thread, rsp, rbp, nullptr, noreg);
-
-  // Call runtime
-  BLOCK_COMMENT("call runtime_entry");
-  __ call(RuntimeAddress(runtime_entry));
-  // Generate oop map
-  OopMap* map =  new OopMap(framesize, 0);
-  oop_maps->add_gc_map(__ pc() - start, map);
-
-  // restore the thread (cannot use the pushed argument since arguments
-  // may be overwritten by C code generated by an optimizing compiler);
-  // however can use the register value directly if it is callee saved.
-  __ get_thread(java_thread);
-
-  __ reset_last_Java_frame(java_thread, true);
-
-  __ leave(); // required for proper stackwalking of RuntimeStub frame
-
-  // check for pending exceptions
-#ifdef ASSERT
-  Label L;
-  __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD);
-  __ jcc(Assembler::notEqual, L);
-  __ should_not_reach_here();
-  __ bind(L);
-#endif /* ASSERT */
-  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-
-
-  RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false);
-  return stub;
-}
-
-#if INCLUDE_JFR
-
-static void jfr_prologue(address the_pc, MacroAssembler* masm) {
-  Register java_thread = rdi;
-  __ get_thread(java_thread);
-  __ set_last_Java_frame(java_thread, rsp, rbp, the_pc, noreg);
-  __ movptr(Address(rsp, 0), java_thread);
-}
-
-// The handle is dereferenced through a load barrier.
-static void jfr_epilogue(MacroAssembler* masm) {
-  Register java_thread = rdi;
-  __ get_thread(java_thread);
-  __ reset_last_Java_frame(java_thread, true);
-}
-
-// For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
-// It returns a jobject handle to the event writer.
-// The handle is dereferenced and the return value is the event writer oop.
-RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
-  enum layout {
-    FPUState_off         = 0,
-    rbp_off              = FPUStateSizeInWords,
-    rdi_off,
-    rsi_off,
-    rcx_off,
-    rbx_off,
-    saved_argument_off,
-    saved_argument_off2, // 2nd half of double
-    framesize
-  };
-
-  int insts_size = 1024;
-  int locs_size = 64;
-  const char* name = SharedRuntime::stub_name(SharedStubId::jfr_write_checkpoint_id);
-  CodeBuffer code(name, insts_size, locs_size);
-  OopMapSet* oop_maps = new OopMapSet();
-  MacroAssembler* masm = new MacroAssembler(&code);
-
-  address start = __ pc();
-  __ enter();
-  int frame_complete = __ pc() - start;
-  address the_pc = __ pc();
-  jfr_prologue(the_pc, masm);
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1);
-  jfr_epilogue(masm);
-  __ resolve_global_jobject(rax, rdi, rdx);
-  __ leave();
-  __ ret(0);
-
-  OopMap* map = new OopMap(framesize, 1); // rbp
-  oop_maps->add_gc_map(the_pc - start, map);
-
-  RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
-    RuntimeStub::new_runtime_stub(name, &code, frame_complete,
-                                  (framesize >> (LogBytesPerWord - LogBytesPerInt)),
-                                  oop_maps, false);
-  return stub;
-}
-
-// For c2: call to return a leased buffer.
-RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
-  enum layout {
-    FPUState_off = 0,
-    rbp_off = FPUStateSizeInWords,
-    rdi_off,
-    rsi_off,
-    rcx_off,
-    rbx_off,
-    saved_argument_off,
-    saved_argument_off2, // 2nd half of double
-    framesize
-  };
-
-  int insts_size = 1024;
-  int locs_size = 64;
-  const char* name = SharedRuntime::stub_name(SharedStubId::jfr_return_lease_id);
-  CodeBuffer code(name, insts_size, locs_size);
-  OopMapSet* oop_maps = new OopMapSet();
-  MacroAssembler* masm = new MacroAssembler(&code);
-
-  address start = __ pc();
-  __ enter();
-  int frame_complete = __ pc() - start;
-  address the_pc = __ pc();
-  jfr_prologue(the_pc, masm);
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1);
-  jfr_epilogue(masm);
-  __ leave();
-  __ ret(0);
-
-  OopMap* map = new OopMap(framesize, 1); // rbp
-  oop_maps->add_gc_map(the_pc - start, map);
-
-  RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
-    RuntimeStub::new_runtime_stub(name, &code, frame_complete,
-                                  (framesize >> (LogBytesPerWord - LogBytesPerInt)),
-                                  oop_maps, false);
-  return stub;
-}
-
-#endif // INCLUDE_JFR
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
deleted file mode 100644
index 9ec556777b0..00000000000
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
+++ /dev/null
@@ -1,4314 +0,0 @@
-/*
- * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "asm/macroAssembler.hpp"
-#include "asm/macroAssembler.inline.hpp"
-#include "compiler/oopMap.hpp"
-#include "gc/shared/barrierSet.hpp"
-#include "gc/shared/barrierSetAssembler.hpp"
-#include "gc/shared/barrierSetNMethod.hpp"
-#include "interpreter/interpreter.hpp"
-#include "memory/universe.hpp"
-#include "nativeInst_x86.hpp"
-#include "oops/instanceOop.hpp"
-#include "oops/method.hpp"
-#include "oops/objArrayKlass.hpp"
-#include "oops/oop.inline.hpp"
-#include "prims/methodHandles.hpp"
-#include "runtime/frame.inline.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/javaThread.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "runtime/stubCodeGenerator.hpp"
-#include "runtime/stubRoutines.hpp"
-#ifdef COMPILER2
-#include "opto/runtime.hpp"
-#endif
-
-// Declaration and definition of StubGenerator (no .hpp file).
-// For a more detailed description of the stub routine structure
-// see the comment in stubRoutines.hpp
-
-#define __ _masm->
-#define a__ ((Assembler*)_masm)->
-
-#ifdef PRODUCT
-#define BLOCK_COMMENT(str) /* nothing */
-#else
-#define BLOCK_COMMENT(str) __ block_comment(str)
-#endif
-
-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
-
-const int FPU_CNTRL_WRD_MASK = 0xFFFF;
-
-ATTRIBUTE_ALIGNED(16) static const uint32_t KEY_SHUFFLE_MASK[] = {
-    0x00010203UL, 0x04050607UL, 0x08090A0BUL, 0x0C0D0E0FUL,
-};
-
-ATTRIBUTE_ALIGNED(16) static const uint32_t COUNTER_SHUFFLE_MASK[] = {
-    0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
-};
-
-ATTRIBUTE_ALIGNED(16) static const uint32_t GHASH_BYTE_SWAP_MASK[] = {
-    0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
-};
-
-ATTRIBUTE_ALIGNED(16) static const uint32_t GHASH_LONG_SWAP_MASK[] = {
-    0x0B0A0908UL, 0x0F0E0D0CUL, 0x03020100UL, 0x07060504UL,
-};
-
-// -------------------------------------------------------------------------------------------------------------------------
-// Stub Code definitions
-
-class StubGenerator: public StubCodeGenerator {
- private:
-
-#ifdef PRODUCT
-#define inc_counter_np(counter) ((void)0)
-#else
-  void inc_counter_np_(uint& counter) {
-    __ incrementl(ExternalAddress((address)&counter));
-  }
-#define inc_counter_np(counter) \
-  BLOCK_COMMENT("inc_counter " #counter); \
-  inc_counter_np_(counter);
-#endif //PRODUCT
-
-  void inc_copy_counter_np(BasicType t) {
-#ifndef PRODUCT
-    switch (t) {
-    case T_BYTE:    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return;
-    case T_SHORT:   inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return;
-    case T_INT:     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return;
-    case T_LONG:    inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return;
-    case T_OBJECT:  inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return;
-    default:        ShouldNotReachHere();
-    }
-#endif //PRODUCT
-  }
-
-  //------------------------------------------------------------------------------------------------------------------------
-  // Call stubs are used to call Java from C
-  //
-  //    [ return_from_Java     ] <--- rsp
-  //    [ argument word n      ]
-  //      ...
-  // -N [ argument word 1      ]
-  // -7 [ Possible padding for stack alignment ]
-  // -6 [ Possible padding for stack alignment ]
-  // -5 [ Possible padding for stack alignment ]
-  // -4 [ mxcsr save           ] <--- rsp_after_call
-  // -3 [ saved rbx,            ]
-  // -2 [ saved rsi            ]
-  // -1 [ saved rdi            ]
-  //  0 [ saved rbp,            ] <--- rbp,
-  //  1 [ return address       ]
-  //  2 [ ptr. to call wrapper ]
-  //  3 [ result               ]
-  //  4 [ result_type          ]
-  //  5 [ method               ]
-  //  6 [ entry_point          ]
-  //  7 [ parameters           ]
-  //  8 [ parameter_size       ]
-  //  9 [ thread               ]
-
-
-  address generate_call_stub(address& return_address) {
-    StubGenStubId stub_id = StubGenStubId::call_stub_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    // stub code parameters / addresses
-    assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code");
-    bool  sse_save = false;
-    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()!
-    const int     locals_count_in_bytes  (4*wordSize);
-    const Address mxcsr_save    (rbp, -4 * wordSize);
-    const Address saved_rbx     (rbp, -3 * wordSize);
-    const Address saved_rsi     (rbp, -2 * wordSize);
-    const Address saved_rdi     (rbp, -1 * wordSize);
-    const Address result        (rbp,  3 * wordSize);
-    const Address result_type   (rbp,  4 * wordSize);
-    const Address method        (rbp,  5 * wordSize);
-    const Address entry_point   (rbp,  6 * wordSize);
-    const Address parameters    (rbp,  7 * wordSize);
-    const Address parameter_size(rbp,  8 * wordSize);
-    const Address thread        (rbp,  9 * wordSize); // same as in generate_catch_exception()!
-    sse_save =  UseSSE > 0;
-
-    // stub code
-    __ enter();
-    __ movptr(rcx, parameter_size);              // parameter counter
-    __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes
-    __ addptr(rcx, locals_count_in_bytes);       // reserve space for register saves
-    __ subptr(rsp, rcx);
-    __ andptr(rsp, -(StackAlignmentInBytes));    // Align stack
-
-    // save rdi, rsi, & rbx, according to C calling conventions
-    __ movptr(saved_rdi, rdi);
-    __ movptr(saved_rsi, rsi);
-    __ movptr(saved_rbx, rbx);
-
-    // save and initialize %mxcsr
-    if (sse_save) {
-      Label skip_ldmx;
-      __ cmp32_mxcsr_std(mxcsr_save, rax);
-      __ jcc(Assembler::equal, skip_ldmx);
-      __ ldmxcsr(mxcsr_std);
-      __ bind(skip_ldmx);
-    }
-
-    // make sure the control word is correct.
-    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-
-#ifdef ASSERT
-    // make sure we have no pending exceptions
-    { Label L;
-      __ movptr(rcx, thread);
-      __ cmpptr(Address(rcx, Thread::pending_exception_offset()), NULL_WORD);
-      __ jcc(Assembler::equal, L);
-      __ stop("StubRoutines::call_stub: entered with pending exception");
-      __ bind(L);
-    }
-#endif
-
-    // pass parameters if any
-    BLOCK_COMMENT("pass parameters if any");
-    Label parameters_done;
-    __ movl(rcx, parameter_size);  // parameter counter
-    __ testl(rcx, rcx);
-    __ jcc(Assembler::zero, parameters_done);
-
-    // parameter passing loop
-
-    Label loop;
-    // Copy Java parameters in reverse order (receiver last)
-    // Note that the argument order is inverted in the process
-    // source is rdx[rcx: N-1..0]
-    // dest   is rsp[rbx: 0..N-1]
-
-    __ movptr(rdx, parameters);          // parameter pointer
-    __ xorptr(rbx, rbx);
-
-    __ BIND(loop);
-
-    // get parameter
-    __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize));
-    __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(),
-                    Interpreter::expr_offset_in_bytes(0)), rax);          // store parameter
-    __ increment(rbx);
-    __ decrement(rcx);
-    __ jcc(Assembler::notZero, loop);
-
-    // call Java function
-    __ BIND(parameters_done);
-    __ movptr(rbx, method);           // get Method*
-    __ movptr(rax, entry_point);      // get entry_point
-    __ mov(rsi, rsp);                 // set sender sp
-    BLOCK_COMMENT("call Java function");
-    __ call(rax);
-
-    BLOCK_COMMENT("call_stub_return_address:");
-    return_address = __ pc();
-
-#ifdef COMPILER2
-    {
-      Label L_skip;
-      if (UseSSE >= 2) {
-        __ verify_FPU(0, "call_stub_return");
-      } else {
-        for (int i = 1; i < 8; i++) {
-          __ ffree(i);
-        }
-
-        // UseSSE <= 1 so double result should be left on TOS
-        __ movl(rsi, result_type);
-        __ cmpl(rsi, T_DOUBLE);
-        __ jcc(Assembler::equal, L_skip);
-        if (UseSSE == 0) {
-          // UseSSE == 0 so float result should be left on TOS
-          __ cmpl(rsi, T_FLOAT);
-          __ jcc(Assembler::equal, L_skip);
-        }
-        __ ffree(0);
-      }
-      __ BIND(L_skip);
-    }
-#endif // COMPILER2
-
-    // store result depending on type
-    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
-    __ movptr(rdi, result);
-    Label is_long, is_float, is_double, exit;
-    __ movl(rsi, result_type);
-    __ cmpl(rsi, T_LONG);
-    __ jcc(Assembler::equal, is_long);
-    __ cmpl(rsi, T_FLOAT);
-    __ jcc(Assembler::equal, is_float);
-    __ cmpl(rsi, T_DOUBLE);
-    __ jcc(Assembler::equal, is_double);
-
-    // handle T_INT case
-    __ movl(Address(rdi, 0), rax);
-    __ BIND(exit);
-
-    // check that FPU stack is empty
-    __ verify_FPU(0, "generate_call_stub");
-
-    // pop parameters
-    __ lea(rsp, rsp_after_call);
-
-    // restore %mxcsr
-    if (sse_save) {
-      __ ldmxcsr(mxcsr_save);
-    }
-
-    // restore rdi, rsi and rbx,
-    __ movptr(rbx, saved_rbx);
-    __ movptr(rsi, saved_rsi);
-    __ movptr(rdi, saved_rdi);
-    __ addptr(rsp, 4*wordSize);
-
-    // return
-    __ pop(rbp);
-    __ ret(0);
-
-    // handle return types different from T_INT
-    __ BIND(is_long);
-    __ movl(Address(rdi, 0 * wordSize), rax);
-    __ movl(Address(rdi, 1 * wordSize), rdx);
-    __ jmp(exit);
-
-    __ BIND(is_float);
-    // interpreter uses xmm0 for return values
-    if (UseSSE >= 1) {
-      __ movflt(Address(rdi, 0), xmm0);
-    } else {
-      __ fstp_s(Address(rdi, 0));
-    }
-    __ jmp(exit);
-
-    __ BIND(is_double);
-    // interpreter uses xmm0 for return values
-    if (UseSSE >= 2) {
-      __ movdbl(Address(rdi, 0), xmm0);
-    } else {
-      __ fstp_d(Address(rdi, 0));
-    }
-    __ jmp(exit);
-
-    return start;
-  }
-
-
-  //------------------------------------------------------------------------------------------------------------------------
-  // Return point for a Java call if there's an exception thrown in Java code.
-  // The exception is caught and transformed into a pending exception stored in
-  // JavaThread that can be tested from within the VM.
-  //
-  // Note: Usually the parameters are removed by the callee. In case of an exception
-  //       crossing an activation frame boundary, that is not the case if the callee
-  //       is compiled code => need to setup the rsp.
-  //
-  // rax,: exception oop
-
-  address generate_catch_exception() {
-    StubGenStubId stub_id = StubGenStubId::catch_exception_id;
-    StubCodeMark mark(this, stub_id);
-    const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()!
-    const Address thread        (rbp,  9 * wordSize); // same as in generate_call_stub()!
-    address start = __ pc();
-
-    // get thread directly
-    __ movptr(rcx, thread);
-#ifdef ASSERT
-    // verify that threads correspond
-    { Label L;
-      __ get_thread(rbx);
-      __ cmpptr(rbx, rcx);
-      __ jcc(Assembler::equal, L);
-      __ stop("StubRoutines::catch_exception: threads must correspond");
-      __ bind(L);
-    }
-#endif
-    // set pending exception
-    __ verify_oop(rax);
-    __ movptr(Address(rcx, Thread::pending_exception_offset()), rax);
-    __ lea(Address(rcx, Thread::exception_file_offset()),
-           ExternalAddress((address)__FILE__), noreg);
-    __ movl(Address(rcx, Thread::exception_line_offset()), __LINE__ );
-    // complete return to VM
-    assert(StubRoutines::_call_stub_return_address != nullptr, "_call_stub_return_address must have been generated before");
-    __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
-
-    return start;
-  }
-
-
-  //------------------------------------------------------------------------------------------------------------------------
-  // Continuation point for runtime calls returning with a pending exception.
-  // The pending exception check happened in the runtime or native call stub.
-  // The pending exception in Thread is converted into a Java-level exception.
-  //
-  // Contract with Java-level exception handlers:
-  // rax: exception
-  // rdx: throwing pc
-  //
-  // NOTE: At entry of this stub, exception-pc must be on stack !!
-
-  address generate_forward_exception() {
-    StubGenStubId stub_id = StubGenStubId::forward_exception_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    const Register thread = rcx;
-
-    // other registers used in this stub
-    const Register exception_oop = rax;
-    const Register handler_addr  = rbx;
-    const Register exception_pc  = rdx;
-
-    // Upon entry, the sp points to the return address returning into Java
-    // (interpreted or compiled) code; i.e., the return address becomes the
-    // throwing pc.
-    //
-    // Arguments pushed before the runtime call are still on the stack but
-    // the exception handler will reset the stack pointer -> ignore them.
-    // A potential result in registers can be ignored as well.
-
-#ifdef ASSERT
-    // make sure this code is only executed if there is a pending exception
-    { Label L;
-      __ get_thread(thread);
-      __ cmpptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
-      __ jcc(Assembler::notEqual, L);
-      __ stop("StubRoutines::forward exception: no pending exception (1)");
-      __ bind(L);
-    }
-#endif
-
-    // compute exception handler into rbx,
-    __ get_thread(thread);
-    __ movptr(exception_pc, Address(rsp, 0));
-    BLOCK_COMMENT("call exception_handler_for_return_address");
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc);
-    __ mov(handler_addr, rax);
-
-    // setup rax & rdx, remove return address & clear pending exception
-    __ get_thread(thread);
-    __ pop(exception_pc);
-    __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
-    __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
-
-#ifdef ASSERT
-    // make sure exception is set
-    { Label L;
-      __ testptr(exception_oop, exception_oop);
-      __ jcc(Assembler::notEqual, L);
-      __ stop("StubRoutines::forward exception: no pending exception (2)");
-      __ bind(L);
-    }
-#endif
-
-    // Verify that there is really a valid exception in RAX.
-    __ verify_oop(exception_oop);
-
-    // continue at exception handler (return address removed)
-    // rax: exception
-    // rbx: exception handler
-    // rdx: throwing pc
-    __ jmp(handler_addr);
-
-    return start;
-  }
-
-  //----------------------------------------------------------------------------------------------------
-  // Support for void verify_mxcsr()
-  //
-  // This routine is used with -Xcheck:jni to verify that native
-  // JNI code does not return to Java code without restoring the
-  // MXCSR register to our expected state.
-
-
-  address generate_verify_mxcsr() {
-    StubGenStubId stub_id = StubGenStubId::verify_mxcsr_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    const Address mxcsr_save(rsp, 0);
-
-    if (CheckJNICalls && UseSSE > 0 ) {
-      Label ok_ret;
-      __ push(rax);
-      __ subptr(rsp, wordSize);      // allocate a temp location
-      __ cmp32_mxcsr_std(mxcsr_save, rax);
-      __ jcc(Assembler::equal, ok_ret);
-
-      __ warn("MXCSR changed by native JNI code.");
-
-      ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
-      __ ldmxcsr(mxcsr_std);
-
-      __ bind(ok_ret);
-      __ addptr(rsp, wordSize);
-      __ pop(rax);
-    }
-
-    __ ret(0);
-
-    return start;
-  }
-
-
-  //---------------------------------------------------------------------------
-  // Support for void verify_fpu_cntrl_wrd()
-  //
-  // This routine is used with -Xcheck:jni to verify that native
-  // JNI code does not return to Java code without restoring the
-  // FP control word to our expected state.
-
-  address generate_verify_fpu_cntrl_wrd() {
-    StubGenStubId stub_id = StubGenStubId::verify_fpu_cntrl_word_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    const Address fpu_cntrl_wrd_save(rsp, 0);
-
-    if (CheckJNICalls) {
-      Label ok_ret;
-      __ push(rax);
-      __ subptr(rsp, wordSize);      // allocate a temp location
-      __ fnstcw(fpu_cntrl_wrd_save);
-      __ movl(rax, fpu_cntrl_wrd_save);
-      __ andl(rax, FPU_CNTRL_WRD_MASK);
-      ExternalAddress fpu_std(StubRoutines::x86::addr_fpu_cntrl_wrd_std());
-      __ cmp32(rax, fpu_std);
-      __ jcc(Assembler::equal, ok_ret);
-
-      __ warn("Floating point control word changed by native JNI code.");
-
-      __ fldcw(fpu_std);
-
-      __ bind(ok_ret);
-      __ addptr(rsp, wordSize);
-      __ pop(rax);
-    }
-
-    __ ret(0);
-
-    return start;
-  }
-
-  //---------------------------------------------------------------------------
-  // Wrapper for slow-case handling of double-to-integer conversion
-  // d2i or f2i fast case failed either because it is nan or because
-  // of under/overflow.
-  // Input:  FPU TOS: float value
-  // Output: rax, (rdx): integer (long) result
-
-  address generate_d2i_wrapper(BasicType t, address fcn) {
-    StubGenStubId stub_id = StubGenStubId::d2i_wrapper_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-  // Capture info about frame layout
-  enum layout { FPUState_off         = 0,
-                rbp_off              = FPUStateSizeInWords,
-                rdi_off,
-                rsi_off,
-                rcx_off,
-                rbx_off,
-                saved_argument_off,
-                saved_argument_off2, // 2nd half of double
-                framesize
-  };
-
-  assert(FPUStateSizeInWords == 27, "update stack layout");
-
-    // Save outgoing argument to stack across push_FPU_state()
-    __ subptr(rsp, wordSize * 2);
-    __ fstp_d(Address(rsp, 0));
-
-    // Save CPU & FPU state
-    __ push(rbx);
-    __ push(rcx);
-    __ push(rsi);
-    __ push(rdi);
-    __ push(rbp);
-    __ push_FPU_state();
-
-    // push_FPU_state() resets the FP top of stack
-    // Load original double into FP top of stack
-    __ fld_d(Address(rsp, saved_argument_off * wordSize));
-    // Store double into stack as outgoing argument
-    __ subptr(rsp, wordSize*2);
-    __ fst_d(Address(rsp, 0));
-
-    // Prepare FPU for doing math in C-land
-    __ empty_FPU_stack();
-    // Call the C code to massage the double.  Result in EAX
-    if (t == T_INT)
-      { BLOCK_COMMENT("SharedRuntime::d2i"); }
-    else if (t == T_LONG)
-      { BLOCK_COMMENT("SharedRuntime::d2l"); }
-    __ call_VM_leaf( fcn, 2 );
-
-    // Restore CPU & FPU state
-    __ pop_FPU_state();
-    __ pop(rbp);
-    __ pop(rdi);
-    __ pop(rsi);
-    __ pop(rcx);
-    __ pop(rbx);
-    __ addptr(rsp, wordSize * 2);
-
-    __ ret(0);
-
-    return start;
-  }
-  //---------------------------------------------------------------------------------------------------
-
-  address generate_vector_mask(StubGenStubId stub_id, int32_t mask) {
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    for (int i = 0; i < 16; i++) {
-      __ emit_data(mask, relocInfo::none, 0);
-    }
-
-    return start;
-  }
-
-  address generate_count_leading_zeros_lut() {
-    __ align64();
-    StubGenStubId stub_id = StubGenStubId::vector_count_leading_zeros_lut_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x02020304, relocInfo::none, 0);
-    __ emit_data(0x01010101, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x02020304, relocInfo::none, 0);
-    __ emit_data(0x01010101, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x02020304, relocInfo::none, 0);
-    __ emit_data(0x01010101, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x02020304, relocInfo::none, 0);
-    __ emit_data(0x01010101, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    return start;
-  }
-
-
-  address generate_popcount_avx_lut() {
-    __ align64();
-    StubGenStubId stub_id = StubGenStubId::vector_popcount_lut_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x02010100, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x04030302, relocInfo::none, 0);
-    __ emit_data(0x02010100, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x04030302, relocInfo::none, 0);
-    __ emit_data(0x02010100, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x04030302, relocInfo::none, 0);
-    __ emit_data(0x02010100, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x03020201, relocInfo::none, 0);
-    __ emit_data(0x04030302, relocInfo::none, 0);
-    return start;
-  }
-
-
-  address generate_iota_indices() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::vector_iota_indices_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    // B
-    __ emit_data(0x03020100, relocInfo::none, 0);
-    __ emit_data(0x07060504, relocInfo::none, 0);
-    __ emit_data(0x0B0A0908, relocInfo::none, 0);
-    __ emit_data(0x0F0E0D0C, relocInfo::none, 0);
-    __ emit_data(0x13121110, relocInfo::none, 0);
-    __ emit_data(0x17161514, relocInfo::none, 0);
-    __ emit_data(0x1B1A1918, relocInfo::none, 0);
-    __ emit_data(0x1F1E1D1C, relocInfo::none, 0);
-    __ emit_data(0x23222120, relocInfo::none, 0);
-    __ emit_data(0x27262524, relocInfo::none, 0);
-    __ emit_data(0x2B2A2928, relocInfo::none, 0);
-    __ emit_data(0x2F2E2D2C, relocInfo::none, 0);
-    __ emit_data(0x33323130, relocInfo::none, 0);
-    __ emit_data(0x37363534, relocInfo::none, 0);
-    __ emit_data(0x3B3A3938, relocInfo::none, 0);
-    __ emit_data(0x3F3E3D3C, relocInfo::none, 0);
-
-    // W
-    __ emit_data(0x00010000, relocInfo::none, 0);
-    __ emit_data(0x00030002, relocInfo::none, 0);
-    __ emit_data(0x00050004, relocInfo::none, 0);
-    __ emit_data(0x00070006, relocInfo::none, 0);
-    __ emit_data(0x00090008, relocInfo::none, 0);
-    __ emit_data(0x000B000A, relocInfo::none, 0);
-    __ emit_data(0x000D000C, relocInfo::none, 0);
-    __ emit_data(0x000F000E, relocInfo::none, 0);
-    __ emit_data(0x00110010, relocInfo::none, 0);
-    __ emit_data(0x00130012, relocInfo::none, 0);
-    __ emit_data(0x00150014, relocInfo::none, 0);
-    __ emit_data(0x00170016, relocInfo::none, 0);
-    __ emit_data(0x00190018, relocInfo::none, 0);
-    __ emit_data(0x001B001A, relocInfo::none, 0);
-    __ emit_data(0x001D001C, relocInfo::none, 0);
-    __ emit_data(0x001F001E, relocInfo::none, 0);
-
-    // D
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000001, relocInfo::none, 0);
-    __ emit_data(0x00000002, relocInfo::none, 0);
-    __ emit_data(0x00000003, relocInfo::none, 0);
-    __ emit_data(0x00000004, relocInfo::none, 0);
-    __ emit_data(0x00000005, relocInfo::none, 0);
-    __ emit_data(0x00000006, relocInfo::none, 0);
-    __ emit_data(0x00000007, relocInfo::none, 0);
-    __ emit_data(0x00000008, relocInfo::none, 0);
-    __ emit_data(0x00000009, relocInfo::none, 0);
-    __ emit_data(0x0000000A, relocInfo::none, 0);
-    __ emit_data(0x0000000B, relocInfo::none, 0);
-    __ emit_data(0x0000000C, relocInfo::none, 0);
-    __ emit_data(0x0000000D, relocInfo::none, 0);
-    __ emit_data(0x0000000E, relocInfo::none, 0);
-    __ emit_data(0x0000000F, relocInfo::none, 0);
-
-    // Q
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000001, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000002, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000003, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000004, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000005, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000006, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000007, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-
-    // D - FP
-    __ emit_data(0x00000000, relocInfo::none, 0); // 0.0f
-    __ emit_data(0x3F800000, relocInfo::none, 0); // 1.0f
-    __ emit_data(0x40000000, relocInfo::none, 0); // 2.0f
-    __ emit_data(0x40400000, relocInfo::none, 0); // 3.0f
-    __ emit_data(0x40800000, relocInfo::none, 0); // 4.0f
-    __ emit_data(0x40A00000, relocInfo::none, 0); // 5.0f
-    __ emit_data(0x40C00000, relocInfo::none, 0); // 6.0f
-    __ emit_data(0x40E00000, relocInfo::none, 0); // 7.0f
-    __ emit_data(0x41000000, relocInfo::none, 0); // 8.0f
-    __ emit_data(0x41100000, relocInfo::none, 0); // 9.0f
-    __ emit_data(0x41200000, relocInfo::none, 0); // 10.0f
-    __ emit_data(0x41300000, relocInfo::none, 0); // 11.0f
-    __ emit_data(0x41400000, relocInfo::none, 0); // 12.0f
-    __ emit_data(0x41500000, relocInfo::none, 0); // 13.0f
-    __ emit_data(0x41600000, relocInfo::none, 0); // 14.0f
-    __ emit_data(0x41700000, relocInfo::none, 0); // 15.0f
-
-    // Q - FP
-    __ emit_data(0x00000000, relocInfo::none, 0); // 0.0d
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0); // 1.0d
-    __ emit_data(0x3FF00000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0); // 2.0d
-    __ emit_data(0x40000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0); // 3.0d
-    __ emit_data(0x40080000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0); // 4.0d
-    __ emit_data(0x40100000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0); // 5.0d
-    __ emit_data(0x40140000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0); // 6.0d
-    __ emit_data(0x40180000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0); // 7.0d
-    __ emit_data(0x401c0000, relocInfo::none, 0);
-    return start;
-  }
-
-  address generate_vector_reverse_bit_lut() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::vector_reverse_bit_lut_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x0C040800, relocInfo::none, 0);
-    __ emit_data(0x0E060A02, relocInfo::none, 0);
-    __ emit_data(0x0D050901, relocInfo::none, 0);
-    __ emit_data(0x0F070B03, relocInfo::none, 0);
-    __ emit_data(0x0C040800, relocInfo::none, 0);
-    __ emit_data(0x0E060A02, relocInfo::none, 0);
-    __ emit_data(0x0D050901, relocInfo::none, 0);
-    __ emit_data(0x0F070B03, relocInfo::none, 0);
-    __ emit_data(0x0C040800, relocInfo::none, 0);
-    __ emit_data(0x0E060A02, relocInfo::none, 0);
-    __ emit_data(0x0D050901, relocInfo::none, 0);
-    __ emit_data(0x0F070B03, relocInfo::none, 0);
-    __ emit_data(0x0C040800, relocInfo::none, 0);
-    __ emit_data(0x0E060A02, relocInfo::none, 0);
-    __ emit_data(0x0D050901, relocInfo::none, 0);
-    __ emit_data(0x0F070B03, relocInfo::none, 0);
-    return start;
-  }
-
-  address generate_vector_reverse_byte_perm_mask_long() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::vector_reverse_byte_perm_mask_long_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    return start;
-  }
-
-  address generate_vector_reverse_byte_perm_mask_int() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::vector_reverse_byte_perm_mask_int_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x08090A0B, relocInfo::none, 0);
-    __ emit_data(0x0C0D0E0F, relocInfo::none, 0);
-    return start;
-  }
-
-  address generate_vector_reverse_byte_perm_mask_short() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::vector_reverse_byte_perm_mask_short_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x02030001, relocInfo::none, 0);
-    __ emit_data(0x06070405, relocInfo::none, 0);
-    __ emit_data(0x0A0B0809, relocInfo::none, 0);
-    __ emit_data(0x0E0F0C0D, relocInfo::none, 0);
-    __ emit_data(0x02030001, relocInfo::none, 0);
-    __ emit_data(0x06070405, relocInfo::none, 0);
-    __ emit_data(0x0A0B0809, relocInfo::none, 0);
-    __ emit_data(0x0E0F0C0D, relocInfo::none, 0);
-    __ emit_data(0x02030001, relocInfo::none, 0);
-    __ emit_data(0x06070405, relocInfo::none, 0);
-    __ emit_data(0x0A0B0809, relocInfo::none, 0);
-    __ emit_data(0x0E0F0C0D, relocInfo::none, 0);
-    __ emit_data(0x02030001, relocInfo::none, 0);
-    __ emit_data(0x06070405, relocInfo::none, 0);
-    __ emit_data(0x0A0B0809, relocInfo::none, 0);
-    __ emit_data(0x0E0F0C0D, relocInfo::none, 0);
-    return start;
-  }
-
-  address generate_vector_byte_shuffle_mask() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::vector_byte_shuffle_mask_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x70707070, relocInfo::none, 0);
-    __ emit_data(0x70707070, relocInfo::none, 0);
-    __ emit_data(0x70707070, relocInfo::none, 0);
-    __ emit_data(0x70707070, relocInfo::none, 0);
-    __ emit_data(0xF0F0F0F0, relocInfo::none, 0);
-    __ emit_data(0xF0F0F0F0, relocInfo::none, 0);
-    __ emit_data(0xF0F0F0F0, relocInfo::none, 0);
-    __ emit_data(0xF0F0F0F0, relocInfo::none, 0);
-    return start;
-  }
-
-  address generate_vector_mask_long_double(StubGenStubId stub_id, int32_t maskhi, int32_t masklo) {
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    for (int i = 0; i < 8; i++) {
-      __ emit_data(masklo, relocInfo::none, 0);
-      __ emit_data(maskhi, relocInfo::none, 0);
-    }
-
-    return start;
-  }
-
-  //----------------------------------------------------------------------------------------------------
-
-  address generate_vector_byte_perm_mask() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::vector_byte_perm_mask_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    __ emit_data(0x00000001, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000003, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000005, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000007, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000002, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000004, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000006, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-
-    return start;
-  }
-
-  address generate_vector_custom_i32(StubGenStubId stub_id, Assembler::AvxVectorLen len,
-                                     int32_t val0, int32_t val1, int32_t val2, int32_t val3,
-                                     int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
-                                     int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
-                                     int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    assert(len != Assembler::AVX_NoVec, "vector len must be specified");
-    __ emit_data(val0, relocInfo::none, 0);
-    __ emit_data(val1, relocInfo::none, 0);
-    __ emit_data(val2, relocInfo::none, 0);
-    __ emit_data(val3, relocInfo::none, 0);
-    if (len >= Assembler::AVX_256bit) {
-      __ emit_data(val4, relocInfo::none, 0);
-      __ emit_data(val5, relocInfo::none, 0);
-      __ emit_data(val6, relocInfo::none, 0);
-      __ emit_data(val7, relocInfo::none, 0);
-      if (len >= Assembler::AVX_512bit) {
-        __ emit_data(val8, relocInfo::none, 0);
-        __ emit_data(val9, relocInfo::none, 0);
-        __ emit_data(val10, relocInfo::none, 0);
-        __ emit_data(val11, relocInfo::none, 0);
-        __ emit_data(val12, relocInfo::none, 0);
-        __ emit_data(val13, relocInfo::none, 0);
-        __ emit_data(val14, relocInfo::none, 0);
-        __ emit_data(val15, relocInfo::none, 0);
-      }
-    }
-
-    return start;
-  }
-
-  //----------------------------------------------------------------------------------------------------
-  // Non-destructive plausibility checks for oops
-
-  address generate_verify_oop() {
-    StubGenStubId stub_id = StubGenStubId::verify_oop_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    // Incoming arguments on stack after saving rax,:
-    //
-    // [tos    ]: saved rdx
-    // [tos + 1]: saved EFLAGS
-    // [tos + 2]: return address
-    // [tos + 3]: char* error message
-    // [tos + 4]: oop   object to verify
-    // [tos + 5]: saved rax, - saved by caller and bashed
-
-    Label exit, error;
-    __ pushf();
-    __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
-    __ push(rdx);                                // save rdx
-    // make sure object is 'reasonable'
-    __ movptr(rax, Address(rsp, 4 * wordSize));    // get object
-    __ testptr(rax, rax);
-    __ jcc(Assembler::zero, exit);               // if obj is null it is ok
-
-    // Check if the oop is in the right area of memory
-    const int oop_mask = Universe::verify_oop_mask();
-    const int oop_bits = Universe::verify_oop_bits();
-    __ mov(rdx, rax);
-    __ andptr(rdx, oop_mask);
-    __ cmpptr(rdx, oop_bits);
-    __ jcc(Assembler::notZero, error);
-
-    // make sure klass is 'reasonable', which is not zero.
-    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
-    __ testptr(rax, rax);
-    __ jcc(Assembler::zero, error);              // if klass is null it is broken
-
-    // return if everything seems ok
-    __ bind(exit);
-    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
-    __ pop(rdx);                                 // restore rdx
-    __ popf();                                   // restore EFLAGS
-    __ ret(3 * wordSize);                        // pop arguments
-
-    // handle errors
-    __ bind(error);
-    __ movptr(rax, Address(rsp, 5 * wordSize));  // get saved rax, back
-    __ pop(rdx);                                 // get saved rdx back
-    __ popf();                                   // get saved EFLAGS off stack -- will be ignored
-    __ pusha();                                  // push registers (eip = return address & msg are already pushed)
-    BLOCK_COMMENT("call MacroAssembler::debug");
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
-    __ hlt();
-    return start;
-  }
-
-
-  // Copy 64 bytes chunks
-  //
-  // Inputs:
-  //   from        - source array address
-  //   to_from     - destination array address - from
-  //   qword_count - 8-bytes element count, negative
-  //
-  void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
-    assert( UseSSE >= 2, "supported cpu only" );
-    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
-
-    // Copy 64-byte chunks
-    __ jmpb(L_copy_64_bytes);
-    __ align(OptoLoopAlignment);
-  __ BIND(L_copy_64_bytes_loop);
-
-    if (UseUnalignedLoadStores) {
-      if (UseAVX > 2) {
-        __ evmovdqul(xmm0, Address(from, 0), Assembler::AVX_512bit);
-        __ evmovdqul(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
-      } else if (UseAVX == 2) {
-        __ vmovdqu(xmm0, Address(from,  0));
-        __ vmovdqu(Address(from, to_from, Address::times_1,  0), xmm0);
-        __ vmovdqu(xmm1, Address(from, 32));
-        __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
-      } else {
-        __ movdqu(xmm0, Address(from, 0));
-        __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
-        __ movdqu(xmm1, Address(from, 16));
-        __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
-        __ movdqu(xmm2, Address(from, 32));
-        __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
-        __ movdqu(xmm3, Address(from, 48));
-        __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
-      }
-    } else {
-      __ movq(xmm0, Address(from, 0));
-      __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
-      __ movq(xmm1, Address(from, 8));
-      __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
-      __ movq(xmm2, Address(from, 16));
-      __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
-      __ movq(xmm3, Address(from, 24));
-      __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
-      __ movq(xmm4, Address(from, 32));
-      __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
-      __ movq(xmm5, Address(from, 40));
-      __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
-      __ movq(xmm6, Address(from, 48));
-      __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
-      __ movq(xmm7, Address(from, 56));
-      __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
-    }
-
-    __ addl(from, 64);
-  __ BIND(L_copy_64_bytes);
-    __ subl(qword_count, 8);
-    __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
-
-    if (UseUnalignedLoadStores && (UseAVX == 2)) {
-      // clean upper bits of YMM registers
-      __ vpxor(xmm0, xmm0);
-      __ vpxor(xmm1, xmm1);
-    }
-    __ addl(qword_count, 8);
-    __ jccb(Assembler::zero, L_exit);
-    //
-    // length is too short, just copy qwords
-    //
-  __ BIND(L_copy_8_bytes);
-    __ movq(xmm0, Address(from, 0));
-    __ movq(Address(from, to_from, Address::times_1), xmm0);
-    __ addl(from, 8);
-    __ decrement(qword_count);
-    __ jcc(Assembler::greater, L_copy_8_bytes);
-  __ BIND(L_exit);
-  }
-
-  address generate_disjoint_copy(StubGenStubId stub_id, address* entry) {
-    BasicType t;
-    bool aligned;
-    Address::ScaleFactor sf;
-    bool dest_uninitialized;
-
-    switch (stub_id) {
-    case jbyte_disjoint_arraycopy_id:
-      t = T_BYTE;
-      aligned = false;
-      sf = Address::times_1;
-      dest_uninitialized = false;
-      break;
-    case arrayof_jbyte_disjoint_arraycopy_id:
-      t = T_BYTE;
-      aligned = true;
-      sf = Address::times_1;
-      dest_uninitialized = false;
-      break;
-    case jshort_disjoint_arraycopy_id:
-      t = T_SHORT;
-      aligned = false;
-      sf = Address::times_2;
-      dest_uninitialized = false;
-      break;
-    case arrayof_jshort_disjoint_arraycopy_id:
-      t = T_SHORT;
-      aligned = true;
-      sf = Address::times_2;
-      dest_uninitialized = false;
-      break;
-    case jint_disjoint_arraycopy_id:
-      t = T_INT;
-      aligned = true;
-      sf = Address::times_4;
-      dest_uninitialized = false;
-      break;
-    case arrayof_jint_disjoint_arraycopy_id:
-      // since this is always aligned we can (should!) use the same
-      // stub as for case jint_disjoint_arraycopy
-      ShouldNotReachHere();
-      break;
-    case jlong_disjoint_arraycopy_id:
-    case arrayof_jlong_disjoint_arraycopy_id:
-      // Handled by a special generator routine on 32 bit
-      ShouldNotReachHere();
-      break;
-    case oop_disjoint_arraycopy_id:
-      t = T_OBJECT;
-      aligned = true;
-      sf = Address::times_ptr;
-      dest_uninitialized = false;
-      break;
-    case arrayof_oop_disjoint_arraycopy_id:
-      // since this is always aligned we can (should!) use the same
-      // stub as for case oop_disjoint_arraycopy
-      ShouldNotReachHere();
-      break;
-    case oop_disjoint_arraycopy_uninit_id:
-      t = T_OBJECT;
-      aligned = true;
-      sf = Address::times_ptr;
-      dest_uninitialized = true;
-      break;
-    case arrayof_oop_disjoint_arraycopy_uninit_id:
-      // since this is always aligned we can (should!) use the same
-      // stub as for case oop_disjoint_arraycopy_uninit
-      ShouldNotReachHere();
-      break;
-    default:
-      ShouldNotReachHere();
-      break;
-    }
-
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
-    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes;
-
-    int shift = Address::times_ptr - sf;
-
-    const Register from     = rsi;  // source array address
-    const Register to       = rdi;  // destination array address
-    const Register count    = rcx;  // elements count
-    const Register to_from  = to;   // (to - from)
-    const Register saved_to = rdx;  // saved destination array address
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ push(rdi);
-    __ movptr(from , Address(rsp, 12+ 4));
-    __ movptr(to   , Address(rsp, 12+ 8));
-    __ movl(count, Address(rsp, 12+ 12));
-
-    if (entry != nullptr) {
-      *entry = __ pc(); // Entry point from conjoint arraycopy stub.
-      BLOCK_COMMENT("Entry:");
-    }
-
-    if (t == T_OBJECT) {
-      __ testl(count, count);
-      __ jcc(Assembler::zero, L_0_count);
-    }
-
-    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
-    if (dest_uninitialized) {
-      decorators |= IS_DEST_UNINITIALIZED;
-    }
-    if (aligned) {
-      decorators |= ARRAYCOPY_ALIGNED;
-    }
-
-    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-    bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
-    {
-      bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT));
-      // UnsafeMemoryAccess page error: continue after unsafe access
-      UnsafeMemoryAccessMark umam(this, add_entry, true);
-      __ subptr(to, from); // to --> to_from
-      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-      if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
-        // align source address at 4 bytes address boundary
-        if (t == T_BYTE) {
-          // One byte misalignment happens only for byte arrays
-          __ testl(from, 1);
-          __ jccb(Assembler::zero, L_skip_align1);
-          __ movb(rax, Address(from, 0));
-          __ movb(Address(from, to_from, Address::times_1, 0), rax);
-          __ increment(from);
-          __ decrement(count);
-        __ BIND(L_skip_align1);
-        }
-        // Two bytes misalignment happens only for byte and short (char) arrays
-        __ testl(from, 2);
-        __ jccb(Assembler::zero, L_skip_align2);
-        __ movw(rax, Address(from, 0));
-        __ movw(Address(from, to_from, Address::times_1, 0), rax);
-        __ addptr(from, 2);
-        __ subl(count, 1<<(shift-1));
-      __ BIND(L_skip_align2);
-      }
-      if (!UseXMMForArrayCopy) {
-        __ mov(rax, count);      // save 'count'
-        __ shrl(count, shift); // bytes count
-        __ addptr(to_from, from);// restore 'to'
-        __ rep_mov();
-        __ subptr(to_from, from);// restore 'to_from'
-        __ mov(count, rax);      // restore 'count'
-        __ jmpb(L_copy_2_bytes); // all dwords were copied
-      } else {
-        if (!UseUnalignedLoadStores) {
-          // align to 8 bytes, we know we are 4 byte aligned to start
-          __ testptr(from, 4);
-          __ jccb(Assembler::zero, L_copy_64_bytes);
-          __ movl(rax, Address(from, 0));
-          __ movl(Address(from, to_from, Address::times_1, 0), rax);
-          __ addptr(from, 4);
-          __ subl(count, 1<<shift);
-        }
-      __ BIND(L_copy_64_bytes);
-        __ mov(rax, count);
-        __ shrl(rax, shift+1);  // 8 bytes chunk count
-        //
-        // Copy 8-byte chunks through XMM registers, 8 per iteration of the loop
-        //
-        xmm_copy_forward(from, to_from, rax);
-      }
-      // copy tailing dword
-    __ BIND(L_copy_4_bytes);
-      __ testl(count, 1<<shift);
-      __ jccb(Assembler::zero, L_copy_2_bytes);
-      __ movl(rax, Address(from, 0));
-      __ movl(Address(from, to_from, Address::times_1, 0), rax);
-      if (t == T_BYTE || t == T_SHORT) {
-        __ addptr(from, 4);
-      __ BIND(L_copy_2_bytes);
-        // copy tailing word
-        __ testl(count, 1<<(shift-1));
-        __ jccb(Assembler::zero, L_copy_byte);
-        __ movw(rax, Address(from, 0));
-        __ movw(Address(from, to_from, Address::times_1, 0), rax);
-        if (t == T_BYTE) {
-          __ addptr(from, 2);
-        __ BIND(L_copy_byte);
-          // copy tailing byte
-          __ testl(count, 1);
-          __ jccb(Assembler::zero, L_exit);
-          __ movb(rax, Address(from, 0));
-          __ movb(Address(from, to_from, Address::times_1, 0), rax);
-        __ BIND(L_exit);
-        } else {
-        __ BIND(L_copy_byte);
-        }
-      } else {
-      __ BIND(L_copy_2_bytes);
-      }
-    }
-
-    __ movl(count, Address(rsp, 12+12)); // reread 'count'
-    bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);
-
-    if (t == T_OBJECT) {
-    __ BIND(L_0_count);
-    }
-    inc_copy_counter_np(t);
-    __ pop(rdi);
-    __ pop(rsi);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ vzeroupper();
-    __ xorptr(rax, rax); // return 0
-    __ ret(0);
-    return start;
-  }
-
-
-  address generate_fill(StubGenStubId stub_id) {
-    BasicType t;
-    bool aligned;
-    switch(stub_id) {
-    case jbyte_fill_id:
-      t = T_BYTE;
-      aligned = false;
-      break;
-    case jshort_fill_id:
-      t = T_SHORT;
-      aligned = false;
-      break;
-    case jint_fill_id:
-      t = T_INT;
-      aligned = false;
-      break;
-    case arrayof_jbyte_fill_id:
-      t = T_BYTE;
-      aligned = true;
-      break;
-    case arrayof_jshort_fill_id:
-      t = T_SHORT;
-      aligned = true;
-      break;
-    case arrayof_jint_fill_id:
-      t = T_INT;
-      aligned = true;
-      break;
-    default:
-      ShouldNotReachHere();
-      break;
-    }
-
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    BLOCK_COMMENT("Entry:");
-
-    const Register to       = rdi;  // source array address
-    const Register value    = rdx;  // value
-    const Register count    = rsi;  // elements count
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ push(rdi);
-    __ movptr(to   , Address(rsp, 12+ 4));
-    __ movl(value, Address(rsp, 12+ 8));
-    __ movl(count, Address(rsp, 12+ 12));
-
-    __ generate_fill(t, aligned, to, value, count, rax, xmm0);
-
-    __ pop(rdi);
-    __ pop(rsi);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-    return start;
-  }
-
-  address generate_conjoint_copy(StubGenStubId stub_id,
-                                 address nooverlap_target,
-                                 address* entry) {
-    BasicType t;
-    bool aligned;
-    Address::ScaleFactor sf;
-    bool dest_uninitialized;
-
-    switch (stub_id) {
-    case jbyte_arraycopy_id:
-      t = T_BYTE;
-      aligned = false;
-      sf = Address::times_1;
-      dest_uninitialized = false;
-      break;
-    case arrayof_jbyte_arraycopy_id:
-      t = T_BYTE;
-      aligned = true;
-      sf = Address::times_1;
-      dest_uninitialized = false;
-      break;
-    case jshort_arraycopy_id:
-      t = T_SHORT;
-      aligned = false;
-      sf = Address::times_2;
-      dest_uninitialized = false;
-      break;
-    case arrayof_jshort_arraycopy_id:
-      t = T_SHORT;
-      aligned = true;
-      sf = Address::times_2;
-      dest_uninitialized = false;
-      break;
-    case jint_arraycopy_id:
-      t = T_INT;
-      aligned = true;
-      sf = Address::times_4;
-      dest_uninitialized = false;
-      break;
-    case arrayof_jint_arraycopy_id:
-      // since this is always aligned we can (should!) use the same
-      // stub as for case jint_arraycopy
-      ShouldNotReachHere();
-      break;
-    case jlong_arraycopy_id:
-    case arrayof_jlong_arraycopy_id:
-      // Handled by a special generator routine on 32 bit
-      ShouldNotReachHere();
-      break;
-    case oop_arraycopy_id:
-      t = T_OBJECT;
-      aligned = true;
-      sf = Address::times_ptr;
-      dest_uninitialized = false;
-      break;
-    case arrayof_oop_arraycopy_id:
-      // since this is always aligned we can (should!) use the same
-      // stub as for case oop_arraycopy
-      ShouldNotReachHere();
-      break;
-    case oop_arraycopy_uninit_id:
-      t = T_OBJECT;
-      aligned = true;
-      sf = Address::times_ptr;
-      dest_uninitialized = true;
-      break;
-    case arrayof_oop_arraycopy_uninit_id:
-      // since this is always aligned we can (should!) use the same
-      // stub as for case oop_arraycopy_uninit
-      ShouldNotReachHere();
-      break;
-    default:
-      ShouldNotReachHere();
-      break;
-    }
-
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte;
-    Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop;
-
-    int shift = Address::times_ptr - sf;
-
-    const Register src   = rax;  // source array address
-    const Register dst   = rdx;  // destination array address
-    const Register from  = rsi;  // source array address
-    const Register to    = rdi;  // destination array address
-    const Register count = rcx;  // elements count
-    const Register end   = rax;  // array end address
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ push(rdi);
-    __ movptr(src  , Address(rsp, 12+ 4));   // from
-    __ movptr(dst  , Address(rsp, 12+ 8));   // to
-    __ movl2ptr(count, Address(rsp, 12+12)); // count
-
-    if (entry != nullptr) {
-      *entry = __ pc(); // Entry point from generic arraycopy stub.
-      BLOCK_COMMENT("Entry:");
-    }
-
-    // nooverlap_target expects arguments in rsi and rdi.
-    __ mov(from, src);
-    __ mov(to  , dst);
-
-    // arrays overlap test: dispatch to disjoint stub if necessary.
-    RuntimeAddress nooverlap(nooverlap_target);
-    __ cmpptr(dst, src);
-    __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size
-    __ jump_cc(Assembler::belowEqual, nooverlap);
-    __ cmpptr(dst, end);
-    __ jump_cc(Assembler::aboveEqual, nooverlap);
-
-    if (t == T_OBJECT) {
-      __ testl(count, count);
-      __ jcc(Assembler::zero, L_0_count);
-    }
-
-    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
-    if (dest_uninitialized) {
-      decorators |= IS_DEST_UNINITIALIZED;
-    }
-    if (aligned) {
-      decorators |= ARRAYCOPY_ALIGNED;
-    }
-
-    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-    bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
-
-    {
-      bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT));
-      // UnsafeMemoryAccess page error: continue after unsafe access
-      UnsafeMemoryAccessMark umam(this, add_entry, true);
-      // copy from high to low
-      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-      if (t == T_BYTE || t == T_SHORT) {
-        // Align the end of destination array at 4 bytes address boundary
-        __ lea(end, Address(dst, count, sf, 0));
-        if (t == T_BYTE) {
-          // One byte misalignment happens only for byte arrays
-          __ testl(end, 1);
-          __ jccb(Assembler::zero, L_skip_align1);
-          __ decrement(count);
-          __ movb(rdx, Address(from, count, sf, 0));
-          __ movb(Address(to, count, sf, 0), rdx);
-        __ BIND(L_skip_align1);
-        }
-        // Two bytes misalignment happens only for byte and short (char) arrays
-        __ testl(end, 2);
-        __ jccb(Assembler::zero, L_skip_align2);
-        __ subptr(count, 1<<(shift-1));
-        __ movw(rdx, Address(from, count, sf, 0));
-        __ movw(Address(to, count, sf, 0), rdx);
-      __ BIND(L_skip_align2);
-        __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-        __ jcc(Assembler::below, L_copy_4_bytes);
-      }
-
-      if (!UseXMMForArrayCopy) {
-        __ std();
-        __ mov(rax, count); // Save 'count'
-        __ mov(rdx, to);    // Save 'to'
-        __ lea(rsi, Address(from, count, sf, -4));
-        __ lea(rdi, Address(to  , count, sf, -4));
-        __ shrptr(count, shift); // bytes count
-        __ rep_mov();
-        __ cld();
-        __ mov(count, rax); // restore 'count'
-        __ andl(count, (1<<shift)-1);      // mask the number of rest elements
-        __ movptr(from, Address(rsp, 12+4)); // reread 'from'
-        __ mov(to, rdx);   // restore 'to'
-        __ jmpb(L_copy_2_bytes); // all dword were copied
-      } else {
-        // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
-        __ testptr(end, 4);
-        __ jccb(Assembler::zero, L_copy_8_bytes);
-        __ subl(count, 1<<shift);
-        __ movl(rdx, Address(from, count, sf, 0));
-        __ movl(Address(to, count, sf, 0), rdx);
-        __ jmpb(L_copy_8_bytes);
-
-        __ align(OptoLoopAlignment);
-        // Move 8 bytes
-      __ BIND(L_copy_8_bytes_loop);
-        __ movq(xmm0, Address(from, count, sf, 0));
-        __ movq(Address(to, count, sf, 0), xmm0);
-      __ BIND(L_copy_8_bytes);
-        __ subl(count, 2<<shift);
-        __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
-        __ addl(count, 2<<shift);
-      }
-    __ BIND(L_copy_4_bytes);
-      // copy prefix qword
-      __ testl(count, 1<<shift);
-      __ jccb(Assembler::zero, L_copy_2_bytes);
-      __ movl(rdx, Address(from, count, sf, -4));
-      __ movl(Address(to, count, sf, -4), rdx);
-
-      if (t == T_BYTE || t == T_SHORT) {
-          __ subl(count, (1<<shift));
-        __ BIND(L_copy_2_bytes);
-          // copy prefix dword
-          __ testl(count, 1<<(shift-1));
-          __ jccb(Assembler::zero, L_copy_byte);
-          __ movw(rdx, Address(from, count, sf, -2));
-          __ movw(Address(to, count, sf, -2), rdx);
-          if (t == T_BYTE) {
-            __ subl(count, 1<<(shift-1));
-          __ BIND(L_copy_byte);
-            // copy prefix byte
-            __ testl(count, 1);
-            __ jccb(Assembler::zero, L_exit);
-            __ movb(rdx, Address(from, 0));
-            __ movb(Address(to, 0), rdx);
-          __ BIND(L_exit);
-          } else {
-          __ BIND(L_copy_byte);
-          }
-      } else {
-      __ BIND(L_copy_2_bytes);
-      }
-    }
-
-    __ movl2ptr(count, Address(rsp, 12+12)); // reread count
-    bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);
-
-    if (t == T_OBJECT) {
-    __ BIND(L_0_count);
-    }
-    inc_copy_counter_np(t);
-    __ pop(rdi);
-    __ pop(rsi);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ xorptr(rax, rax); // return 0
-    __ ret(0);
-    return start;
-  }
-
-
-  address generate_disjoint_long_copy(address* entry) {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::jlong_disjoint_arraycopy_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Label L_copy_8_bytes, L_copy_8_bytes_loop;
-    const Register from       = rax;  // source array address
-    const Register to         = rdx;  // destination array address
-    const Register count      = rcx;  // elements count
-    const Register to_from    = rdx;  // (to - from)
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ movptr(from , Address(rsp, 8+0));       // from
-    __ movptr(to   , Address(rsp, 8+4));       // to
-    __ movl2ptr(count, Address(rsp, 8+8));     // count
-
-    *entry = __ pc(); // Entry point from conjoint arraycopy stub.
-    BLOCK_COMMENT("Entry:");
-
-    {
-      // UnsafeMemoryAccess page error: continue after unsafe access
-      UnsafeMemoryAccessMark umam(this, true, true);
-      __ subptr(to, from); // to --> to_from
-      if (UseXMMForArrayCopy) {
-        xmm_copy_forward(from, to_from, count);
-      } else {
-        __ jmpb(L_copy_8_bytes);
-        __ align(OptoLoopAlignment);
-      __ BIND(L_copy_8_bytes_loop);
-        __ fild_d(Address(from, 0));
-        __ fistp_d(Address(from, to_from, Address::times_1));
-        __ addptr(from, 8);
-      __ BIND(L_copy_8_bytes);
-        __ decrement(count);
-        __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
-      }
-    }
-    inc_copy_counter_np(T_LONG);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ vzeroupper();
-    __ xorptr(rax, rax); // return 0
-    __ ret(0);
-    return start;
-  }
-
-  address generate_conjoint_long_copy(address nooverlap_target, address* entry) {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::jlong_arraycopy_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Label L_copy_8_bytes, L_copy_8_bytes_loop;
-    const Register from       = rax;  // source array address
-    const Register to         = rdx;  // destination array address
-    const Register count      = rcx;  // elements count
-    const Register end_from   = rax;  // source array end address
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ movptr(from , Address(rsp, 8+0));       // from
-    __ movptr(to   , Address(rsp, 8+4));       // to
-    __ movl2ptr(count, Address(rsp, 8+8));     // count
-
-    *entry = __ pc(); // Entry point from generic arraycopy stub.
-    BLOCK_COMMENT("Entry:");
-
-    // arrays overlap test
-    __ cmpptr(to, from);
-    RuntimeAddress nooverlap(nooverlap_target);
-    __ jump_cc(Assembler::belowEqual, nooverlap);
-    __ lea(end_from, Address(from, count, Address::times_8, 0));
-    __ cmpptr(to, end_from);
-    __ movptr(from, Address(rsp, 8));  // from
-    __ jump_cc(Assembler::aboveEqual, nooverlap);
-
-    {
-      // UnsafeMemoryAccess page error: continue after unsafe access
-      UnsafeMemoryAccessMark umam(this, true, true);
-
-      __ jmpb(L_copy_8_bytes);
-
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_8_bytes_loop);
-      if (UseXMMForArrayCopy) {
-        __ movq(xmm0, Address(from, count, Address::times_8));
-        __ movq(Address(to, count, Address::times_8), xmm0);
-      } else {
-        __ fild_d(Address(from, count, Address::times_8));
-        __ fistp_d(Address(to, count, Address::times_8));
-      }
-    __ BIND(L_copy_8_bytes);
-      __ decrement(count);
-      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
-
-    }
-    inc_copy_counter_np(T_LONG);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ xorptr(rax, rax); // return 0
-    __ ret(0);
-    return start;
-  }
-
-
-  // Helper for generating a dynamic type check.
-  // The sub_klass must be one of {rbx, rdx, rsi}.
-  // The temp is killed.
-  void generate_type_check(Register sub_klass,
-                           Address& super_check_offset_addr,
-                           Address& super_klass_addr,
-                           Register temp,
-                           Label* L_success, Label* L_failure) {
-    BLOCK_COMMENT("type_check:");
-
-    Label L_fallthrough;
-#define LOCAL_JCC(assembler_con, label_ptr)                             \
-    if (label_ptr != nullptr)  __ jcc(assembler_con, *(label_ptr));        \
-    else                    __ jcc(assembler_con, L_fallthrough) /*omit semi*/
-
-    // The following is a strange variation of the fast path which requires
-    // one less register, because needed values are on the argument stack.
-    // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
-    //                                  L_success, L_failure, null);
-    assert_different_registers(sub_klass, temp);
-
-    int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
-
-    // if the pointers are equal, we are done (e.g., String[] elements)
-    __ cmpptr(sub_klass, super_klass_addr);
-    LOCAL_JCC(Assembler::equal, L_success);
-
-    // check the supertype display:
-    __ movl2ptr(temp, super_check_offset_addr);
-    Address super_check_addr(sub_klass, temp, Address::times_1, 0);
-    __ movptr(temp, super_check_addr); // load displayed supertype
-    __ cmpptr(temp, super_klass_addr); // test the super type
-    LOCAL_JCC(Assembler::equal, L_success);
-
-    // if it was a primary super, we can just fail immediately
-    __ cmpl(super_check_offset_addr, sc_offset);
-    LOCAL_JCC(Assembler::notEqual, L_failure);
-
-    // The repne_scan instruction uses fixed registers, which will get spilled.
-    // We happen to know this works best when super_klass is in rax.
-    Register super_klass = temp;
-    __ movptr(super_klass, super_klass_addr);
-    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
-                                     L_success, L_failure);
-
-    __ bind(L_fallthrough);
-
-    if (L_success == nullptr) { BLOCK_COMMENT("L_success:"); }
-    if (L_failure == nullptr) { BLOCK_COMMENT("L_failure:"); }
-
-#undef LOCAL_JCC
-  }
-
-  //
-  //  Generate checkcasting array copy stub
-  //
-  //  Input:
-  //    4(rsp)   - source array address
-  //    8(rsp)   - destination array address
-  //   12(rsp)   - element count, can be zero
-  //   16(rsp)   - size_t ckoff (super_check_offset)
-  //   20(rsp)   - oop ckval (super_klass)
-  //
-  //  Output:
-  //    rax, ==  0  -  success
-  //    rax, == -1^K - failure, where K is partial transfer count
-  //
-  address generate_checkcast_copy(StubGenStubId stub_id, address* entry) {
-    bool dest_uninitialized;
-    switch(stub_id) {
-    case checkcast_arraycopy_id:
-      dest_uninitialized = false;
-      break;
-    case checkcast_arraycopy_uninit_id:
-      dest_uninitialized = true;
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Label L_load_element, L_store_element, L_do_card_marks, L_done;
-
-    // register use:
-    //  rax, rdx, rcx -- loop control (end_from, end_to, count)
-    //  rdi, rsi      -- element access (oop, klass)
-    //  rbx,           -- temp
-    const Register from       = rax;    // source array address
-    const Register to         = rdx;    // destination array address
-    const Register length     = rcx;    // elements count
-    const Register elem       = rdi;    // each oop copied
-    const Register elem_klass = rsi;    // each elem._klass (sub_klass)
-    const Register temp       = rbx;    // lone remaining temp
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-
-    __ push(rsi);
-    __ push(rdi);
-    __ push(rbx);
-
-    Address   from_arg(rsp, 16+ 4);     // from
-    Address     to_arg(rsp, 16+ 8);     // to
-    Address length_arg(rsp, 16+12);     // elements count
-    Address  ckoff_arg(rsp, 16+16);     // super_check_offset
-    Address  ckval_arg(rsp, 16+20);     // super_klass
-
-    // Load up:
-    __ movptr(from,     from_arg);
-    __ movptr(to,         to_arg);
-    __ movl2ptr(length, length_arg);
-
-    if (entry != nullptr) {
-      *entry = __ pc(); // Entry point from generic arraycopy stub.
-      BLOCK_COMMENT("Entry:");
-    }
-
-    //---------------------------------------------------------------
-    // Assembler stub will be used for this call to arraycopy
-    // if the two arrays are subtypes of Object[] but the
-    // destination array type is not equal to or a supertype
-    // of the source type.  Each element must be separately
-    // checked.
-
-    // Loop-invariant addresses.  They are exclusive end pointers.
-    Address end_from_addr(from, length, Address::times_ptr, 0);
-    Address   end_to_addr(to,   length, Address::times_ptr, 0);
-
-    Register end_from = from;           // re-use
-    Register end_to   = to;             // re-use
-    Register count    = length;         // re-use
-
-    // Loop-variant addresses.  They assume post-incremented count < 0.
-    Address from_element_addr(end_from, count, Address::times_ptr, 0);
-    Address   to_element_addr(end_to,   count, Address::times_ptr, 0);
-    Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
-
-    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST;
-    if (dest_uninitialized) {
-      decorators |= IS_DEST_UNINITIALIZED;
-    }
-
-    BasicType type = T_OBJECT;
-    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-    bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
-
-    // Copy from low to high addresses, indexed from the end of each array.
-    __ lea(end_from, end_from_addr);
-    __ lea(end_to,   end_to_addr);
-    assert(length == count, "");        // else fix next line:
-    __ negptr(count);                   // negate and test the length
-    __ jccb(Assembler::notZero, L_load_element);
-
-    // Empty array:  Nothing to do.
-    __ xorptr(rax, rax);                  // return 0 on (trivial) success
-    __ jmp(L_done);
-
-    // ======== begin loop ========
-    // (Loop is rotated; its entry is L_load_element.)
-    // Loop control:
-    //   for (count = -count; count != 0; count++)
-    // Base pointers src, dst are biased by 8*count,to last element.
-    __ align(OptoLoopAlignment);
-
-    __ BIND(L_store_element);
-    __ movptr(to_element_addr, elem);     // store the oop
-    __ increment(count);                // increment the count toward zero
-    __ jccb(Assembler::zero, L_do_card_marks);
-
-    // ======== loop entry is here ========
-    __ BIND(L_load_element);
-    __ movptr(elem, from_element_addr);   // load the oop
-    __ testptr(elem, elem);
-    __ jccb(Assembler::zero, L_store_element);
-
-    // (Could do a trick here:  Remember last successful non-null
-    // element stored and make a quick oop equality check on it.)
-
-    __ movptr(elem_klass, elem_klass_addr); // query the object klass
-    generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp,
-                        &L_store_element, nullptr);
-    // (On fall-through, we have failed the element type check.)
-    // ======== end loop ========
-
-    // It was a real error; we must depend on the caller to finish the job.
-    // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops.
-    // Emit GC store barriers for the oops we have copied (length_arg + count),
-    // and report their number to the caller.
-    assert_different_registers(to, count, rax);
-    Label L_post_barrier;
-    __ addl(count, length_arg);         // transfers = (length - remaining)
-    __ movl2ptr(rax, count);            // save the value
-    __ notptr(rax);                     // report (-1^K) to caller (does not affect flags)
-    __ jccb(Assembler::notZero, L_post_barrier);
-    __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
-
-    // Come here on success only.
-    __ BIND(L_do_card_marks);
-    __ xorptr(rax, rax);                // return 0 on success
-    __ movl2ptr(count, length_arg);
-
-    __ BIND(L_post_barrier);
-    __ movptr(to, to_arg);              // reload
-    bs->arraycopy_epilogue(_masm, decorators, type, from, to, count);
-
-    // Common exit point (success or failure).
-    __ BIND(L_done);
-    __ pop(rbx);
-    __ pop(rdi);
-    __ pop(rsi);
-    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    return start;
-  }
-
-  //
-  //  Generate 'unsafe' array copy stub
-  //  Though just as safe as the other stubs, it takes an unscaled
-  //  size_t argument instead of an element count.
-  //
-  //  Input:
-  //    4(rsp)   - source array address
-  //    8(rsp)   - destination array address
-  //   12(rsp)   - byte count, can be zero
-  //
-  //  Output:
-  //    rax, ==  0  -  success
-  //    rax, == -1  -  need to call System.arraycopy
-  //
-  // Examines the alignment of the operands and dispatches
-  // to a long, int, short, or byte copy loop.
-  //
-  address generate_unsafe_copy(address byte_copy_entry,
-                               address short_copy_entry,
-                               address int_copy_entry,
-                               address long_copy_entry) {
-
-    Label L_long_aligned, L_int_aligned, L_short_aligned;
-
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::unsafe_arraycopy_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    const Register from       = rax;  // source array address
-    const Register to         = rdx;  // destination array address
-    const Register count      = rcx;  // elements count
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ push(rdi);
-    Address  from_arg(rsp, 12+ 4);      // from
-    Address    to_arg(rsp, 12+ 8);      // to
-    Address count_arg(rsp, 12+12);      // byte count
-
-    // Load up:
-    __ movptr(from ,  from_arg);
-    __ movptr(to   ,    to_arg);
-    __ movl2ptr(count, count_arg);
-
-    // bump this on entry, not on exit:
-    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
-
-    const Register bits = rsi;
-    __ mov(bits, from);
-    __ orptr(bits, to);
-    __ orptr(bits, count);
-
-    __ testl(bits, BytesPerLong-1);
-    __ jccb(Assembler::zero, L_long_aligned);
-
-    __ testl(bits, BytesPerInt-1);
-    __ jccb(Assembler::zero, L_int_aligned);
-
-    __ testl(bits, BytesPerShort-1);
-    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
-
-    __ BIND(L_short_aligned);
-    __ shrptr(count, LogBytesPerShort); // size => short_count
-    __ movl(count_arg, count);          // update 'count'
-    __ jump(RuntimeAddress(short_copy_entry));
-
-    __ BIND(L_int_aligned);
-    __ shrptr(count, LogBytesPerInt); // size => int_count
-    __ movl(count_arg, count);          // update 'count'
-    __ jump(RuntimeAddress(int_copy_entry));
-
-    __ BIND(L_long_aligned);
-    __ shrptr(count, LogBytesPerLong); // size => qword_count
-    __ movl(count_arg, count);          // update 'count'
-    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
-    __ pop(rsi);
-    __ jump(RuntimeAddress(long_copy_entry));
-
-    return start;
-  }
-
-
-  // Perform range checks on the proposed arraycopy.
-  // Smashes src_pos and dst_pos.  (Uses them up for temps.)
-  void arraycopy_range_checks(Register src,
-                              Register src_pos,
-                              Register dst,
-                              Register dst_pos,
-                              Address& length,
-                              Label& L_failed) {
-    BLOCK_COMMENT("arraycopy_range_checks:");
-    const Register src_end = src_pos;   // source array end position
-    const Register dst_end = dst_pos;   // destination array end position
-    __ addl(src_end, length); // src_pos + length
-    __ addl(dst_end, length); // dst_pos + length
-
-    //  if (src_pos + length > arrayOop(src)->length() ) FAIL;
-    __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes()));
-    __ jcc(Assembler::above, L_failed);
-
-    //  if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
-    __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes()));
-    __ jcc(Assembler::above, L_failed);
-
-    BLOCK_COMMENT("arraycopy_range_checks done");
-  }
-
-
-  //
-  //  Generate generic array copy stubs
-  //
-  //  Input:
-  //     4(rsp)    -  src oop
-  //     8(rsp)    -  src_pos
-  //    12(rsp)    -  dst oop
-  //    16(rsp)    -  dst_pos
-  //    20(rsp)    -  element count
-  //
-  //  Output:
-  //    rax, ==  0  -  success
-  //    rax, == -1^K - failure, where K is partial transfer count
-  //
-  address generate_generic_copy(address entry_jbyte_arraycopy,
-                                address entry_jshort_arraycopy,
-                                address entry_jint_arraycopy,
-                                address entry_oop_arraycopy,
-                                address entry_jlong_arraycopy,
-                                address entry_checkcast_arraycopy) {
-    Label L_failed, L_failed_0, L_objArray;
-
-    { int modulus = CodeEntryAlignment;
-      int target  = modulus - 5; // 5 = sizeof jmp(L_failed)
-      int advance = target - (__ offset() % modulus);
-      if (advance < 0)  advance += modulus;
-      if (advance > 0)  __ nop(advance);
-    }
-    StubGenStubId stub_id = StubGenStubId::generic_arraycopy_id;
-    StubCodeMark mark(this, stub_id);
-
-    // Short-hop target to L_failed.  Makes for denser prologue code.
-    __ BIND(L_failed_0);
-    __ jmp(L_failed);
-    assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed");
-
-    __ align(CodeEntryAlignment);
-    address start = __ pc();
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ push(rdi);
-
-    // bump this on entry, not on exit:
-    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
-
-    // Input values
-    Address SRC     (rsp, 12+ 4);
-    Address SRC_POS (rsp, 12+ 8);
-    Address DST     (rsp, 12+12);
-    Address DST_POS (rsp, 12+16);
-    Address LENGTH  (rsp, 12+20);
-
-    //-----------------------------------------------------------------------
-    // Assembler stub will be used for this call to arraycopy
-    // if the following conditions are met:
-    //
-    // (1) src and dst must not be null.
-    // (2) src_pos must not be negative.
-    // (3) dst_pos must not be negative.
-    // (4) length  must not be negative.
-    // (5) src klass and dst klass should be the same and not null.
-    // (6) src and dst should be arrays.
-    // (7) src_pos + length must not exceed length of src.
-    // (8) dst_pos + length must not exceed length of dst.
-    //
-
-    const Register src     = rax;       // source array oop
-    const Register src_pos = rsi;
-    const Register dst     = rdx;       // destination array oop
-    const Register dst_pos = rdi;
-    const Register length  = rcx;       // transfer count
-
-    //  if (src == null) return -1;
-    __ movptr(src, SRC);      // src oop
-    __ testptr(src, src);
-    __ jccb(Assembler::zero, L_failed_0);
-
-    //  if (src_pos < 0) return -1;
-    __ movl2ptr(src_pos, SRC_POS);  // src_pos
-    __ testl(src_pos, src_pos);
-    __ jccb(Assembler::negative, L_failed_0);
-
-    //  if (dst == nullptr) return -1;
-    __ movptr(dst, DST);      // dst oop
-    __ testptr(dst, dst);
-    __ jccb(Assembler::zero, L_failed_0);
-
-    //  if (dst_pos < 0) return -1;
-    __ movl2ptr(dst_pos, DST_POS);  // dst_pos
-    __ testl(dst_pos, dst_pos);
-    __ jccb(Assembler::negative, L_failed_0);
-
-    //  if (length < 0) return -1;
-    __ movl2ptr(length, LENGTH);   // length
-    __ testl(length, length);
-    __ jccb(Assembler::negative, L_failed_0);
-
-    //  if (src->klass() == nullptr) return -1;
-    Address src_klass_addr(src, oopDesc::klass_offset_in_bytes());
-    Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
-    const Register rcx_src_klass = rcx;    // array klass
-    __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes()));
-
-#ifdef ASSERT
-    //  assert(src->klass() != nullptr);
-    BLOCK_COMMENT("assert klasses not null");
-    { Label L1, L2;
-      __ testptr(rcx_src_klass, rcx_src_klass);
-      __ jccb(Assembler::notZero, L2);   // it is broken if klass is null
-      __ bind(L1);
-      __ stop("broken null klass");
-      __ bind(L2);
-      __ cmpptr(dst_klass_addr, NULL_WORD);
-      __ jccb(Assembler::equal, L1);      // this would be broken also
-      BLOCK_COMMENT("assert done");
-    }
-#endif //ASSERT
-
-    // Load layout helper (32-bits)
-    //
-    //  |array_tag|     | header_size | element_type |     |log2_element_size|
-    // 32        30    24            16              8     2                 0
-    //
-    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
-    //
-
-    int lh_offset = in_bytes(Klass::layout_helper_offset());
-    Address src_klass_lh_addr(rcx_src_klass, lh_offset);
-
-    // Handle objArrays completely differently...
-    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
-    __ cmpl(src_klass_lh_addr, objArray_lh);
-    __ jcc(Assembler::equal, L_objArray);
-
-    //  if (src->klass() != dst->klass()) return -1;
-    __ cmpptr(rcx_src_klass, dst_klass_addr);
-    __ jccb(Assembler::notEqual, L_failed_0);
-
-    const Register rcx_lh = rcx;  // layout helper
-    assert(rcx_lh == rcx_src_klass, "known alias");
-    __ movl(rcx_lh, src_klass_lh_addr);
-
-    //  if (!src->is_Array()) return -1;
-    __ cmpl(rcx_lh, Klass::_lh_neutral_value);
-    __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp
-
-    // At this point, it is known to be a typeArray (array_tag 0x3).
-#ifdef ASSERT
-    { Label L;
-      __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
-      __ jcc(Assembler::greaterEqual, L); // signed cmp
-      __ stop("must be a primitive array");
-      __ bind(L);
-    }
-#endif
-
-    assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
-    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
-
-    // TypeArrayKlass
-    //
-    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
-    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
-    //
-    const Register rsi_offset = rsi; // array offset
-    const Register src_array  = src; // src array offset
-    const Register dst_array  = dst; // dst array offset
-    const Register rdi_elsize = rdi; // log2 element size
-
-    __ mov(rsi_offset, rcx_lh);
-    __ shrptr(rsi_offset, Klass::_lh_header_size_shift);
-    __ andptr(rsi_offset, Klass::_lh_header_size_mask);   // array_offset
-    __ addptr(src_array, rsi_offset);  // src array offset
-    __ addptr(dst_array, rsi_offset);  // dst array offset
-    __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize
-
-    // next registers should be set before the jump to corresponding stub
-    const Register from       = src; // source array address
-    const Register to         = dst; // destination array address
-    const Register count      = rcx; // elements count
-    // some of them should be duplicated on stack
-#define FROM   Address(rsp, 12+ 4)
-#define TO     Address(rsp, 12+ 8)   // Not used now
-#define COUNT  Address(rsp, 12+12)   // Only for oop arraycopy
-
-    BLOCK_COMMENT("scale indexes to element size");
-    __ movl2ptr(rsi, SRC_POS);  // src_pos
-    __ shlptr(rsi);             // src_pos << rcx (log2 elsize)
-    assert(src_array == from, "");
-    __ addptr(from, rsi);       // from = src_array + SRC_POS << log2 elsize
-    __ movl2ptr(rdi, DST_POS);  // dst_pos
-    __ shlptr(rdi);             // dst_pos << rcx (log2 elsize)
-    assert(dst_array == to, "");
-    __ addptr(to,  rdi);        // to   = dst_array + DST_POS << log2 elsize
-    __ movptr(FROM, from);      // src_addr
-    __ mov(rdi_elsize, rcx_lh); // log2 elsize
-    __ movl2ptr(count, LENGTH); // elements count
-
-    BLOCK_COMMENT("choose copy loop based on element size");
-    __ cmpl(rdi_elsize, 0);
-
-    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy));
-    __ cmpl(rdi_elsize, LogBytesPerShort);
-    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy));
-    __ cmpl(rdi_elsize, LogBytesPerInt);
-    __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy));
-#ifdef ASSERT
-    __ cmpl(rdi_elsize, LogBytesPerLong);
-    __ jccb(Assembler::notEqual, L_failed);
-#endif
-    __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it.
-    __ pop(rsi);
-    __ jump(RuntimeAddress(entry_jlong_arraycopy));
-
-  __ BIND(L_failed);
-    __ xorptr(rax, rax);
-    __ notptr(rax); // return -1
-    __ pop(rdi);
-    __ pop(rsi);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    // ObjArrayKlass
-  __ BIND(L_objArray);
-    // live at this point:  rcx_src_klass, src[_pos], dst[_pos]
-
-    Label L_plain_copy, L_checkcast_copy;
-    //  test array classes for subtyping
-    __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality
-    __ jccb(Assembler::notEqual, L_checkcast_copy);
-
-    // Identically typed arrays can be copied without element-wise checks.
-    assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass);
-    arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
-
-  __ BIND(L_plain_copy);
-    __ movl2ptr(count, LENGTH); // elements count
-    __ movl2ptr(src_pos, SRC_POS);  // reload src_pos
-    __ lea(from, Address(src, src_pos, Address::times_ptr,
-                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
-    __ movl2ptr(dst_pos, DST_POS);  // reload dst_pos
-    __ lea(to,   Address(dst, dst_pos, Address::times_ptr,
-                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
-    __ movptr(FROM,  from);   // src_addr
-    __ movptr(TO,    to);     // dst_addr
-    __ movl(COUNT, count);  // count
-    __ jump(RuntimeAddress(entry_oop_arraycopy));
-
-  __ BIND(L_checkcast_copy);
-    // live at this point:  rcx_src_klass, dst[_pos], src[_pos]
-    {
-      // Handy offsets:
-      int  ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
-      int sco_offset = in_bytes(Klass::super_check_offset_offset());
-
-      Register rsi_dst_klass = rsi;
-      Register rdi_temp      = rdi;
-      assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
-      assert(rdi_temp      == dst_pos, "expected alias w/ dst_pos");
-      Address dst_klass_lh_addr(rsi_dst_klass, lh_offset);
-
-      // Before looking at dst.length, make sure dst is also an objArray.
-      __ movptr(rsi_dst_klass, dst_klass_addr);
-      __ cmpl(dst_klass_lh_addr, objArray_lh);
-      __ jccb(Assembler::notEqual, L_failed);
-
-      // It is safe to examine both src.length and dst.length.
-      __ movl2ptr(src_pos, SRC_POS);        // reload rsi
-      arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
-      // (Now src_pos and dst_pos are killed, but not src and dst.)
-
-      // We'll need this temp (don't forget to pop it after the type check).
-      __ push(rbx);
-      Register rbx_src_klass = rbx;
-
-      __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx
-      __ movptr(rsi_dst_klass, dst_klass_addr);
-      Address super_check_offset_addr(rsi_dst_klass, sco_offset);
-      Label L_fail_array_check;
-      generate_type_check(rbx_src_klass,
-                          super_check_offset_addr, dst_klass_addr,
-                          rdi_temp, nullptr, &L_fail_array_check);
-      // (On fall-through, we have passed the array type check.)
-      __ pop(rbx);
-      __ jmp(L_plain_copy);
-
-      __ BIND(L_fail_array_check);
-      // Reshuffle arguments so we can call checkcast_arraycopy:
-
-      // match initial saves for checkcast_arraycopy
-      // push(rsi);    // already done; see above
-      // push(rdi);    // already done; see above
-      // push(rbx);    // already done; see above
-
-      // Marshal outgoing arguments now, freeing registers.
-      Address   from_arg(rsp, 16+ 4);   // from
-      Address     to_arg(rsp, 16+ 8);   // to
-      Address length_arg(rsp, 16+12);   // elements count
-      Address  ckoff_arg(rsp, 16+16);   // super_check_offset
-      Address  ckval_arg(rsp, 16+20);   // super_klass
-
-      Address SRC_POS_arg(rsp, 16+ 8);
-      Address DST_POS_arg(rsp, 16+16);
-      Address  LENGTH_arg(rsp, 16+20);
-      // push rbx, changed the incoming offsets (why not just use rbp,??)
-      // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, "");
-
-      __ movptr(rbx, Address(rsi_dst_klass, ek_offset));
-      __ movl2ptr(length, LENGTH_arg);    // reload elements count
-      __ movl2ptr(src_pos, SRC_POS_arg);  // reload src_pos
-      __ movl2ptr(dst_pos, DST_POS_arg);  // reload dst_pos
-
-      __ movptr(ckval_arg, rbx);          // destination element type
-      __ movl(rbx, Address(rbx, sco_offset));
-      __ movl(ckoff_arg, rbx);          // corresponding class check offset
-
-      __ movl(length_arg, length);      // outgoing length argument
-
-      __ lea(from, Address(src, src_pos, Address::times_ptr,
-                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
-      __ movptr(from_arg, from);
-
-      __ lea(to, Address(dst, dst_pos, Address::times_ptr,
-                          arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
-      __ movptr(to_arg, to);
-      __ jump(RuntimeAddress(entry_checkcast_arraycopy));
-    }
-
-    return start;
-  }
-
-  void generate_arraycopy_stubs() {
-    address entry;
-    address entry_jbyte_arraycopy;
-    address entry_jshort_arraycopy;
-    address entry_jint_arraycopy;
-    address entry_oop_arraycopy;
-    address entry_jlong_arraycopy;
-    address entry_checkcast_arraycopy;
-
-    StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
-        generate_disjoint_copy(StubGenStubId::arrayof_jbyte_disjoint_arraycopy_id, &entry);
-    StubRoutines::_arrayof_jbyte_arraycopy =
-        generate_conjoint_copy(StubGenStubId::arrayof_jbyte_arraycopy_id, entry, nullptr);
-    StubRoutines::_jbyte_disjoint_arraycopy =
-      generate_disjoint_copy(StubGenStubId::jbyte_disjoint_arraycopy_id, &entry);
-    StubRoutines::_jbyte_arraycopy =
-        generate_conjoint_copy(StubGenStubId::jbyte_arraycopy_id,  entry, &entry_jbyte_arraycopy);
-
-    StubRoutines::_arrayof_jshort_disjoint_arraycopy =
-        generate_disjoint_copy(StubGenStubId::arrayof_jshort_disjoint_arraycopy_id, &entry);
-    StubRoutines::_arrayof_jshort_arraycopy =
-        generate_conjoint_copy(StubGenStubId::arrayof_jshort_arraycopy_id, entry, nullptr);
-    StubRoutines::_jshort_disjoint_arraycopy =
-        generate_disjoint_copy(StubGenStubId::jshort_disjoint_arraycopy_id, &entry);
-    StubRoutines::_jshort_arraycopy =
-        generate_conjoint_copy(StubGenStubId::jshort_arraycopy_id, entry, &entry_jshort_arraycopy);
-
-    // Next arrays are always aligned on 4 bytes at least.
-    StubRoutines::_jint_disjoint_arraycopy =
-        generate_disjoint_copy(StubGenStubId::jint_disjoint_arraycopy_id, &entry);
-    StubRoutines::_jint_arraycopy =
-        generate_conjoint_copy(StubGenStubId::jint_arraycopy_id, entry, &entry_jint_arraycopy);
-
-    StubRoutines::_oop_disjoint_arraycopy =
-        generate_disjoint_copy(StubGenStubId::oop_disjoint_arraycopy_id, &entry);
-    StubRoutines::_oop_arraycopy =
-        generate_conjoint_copy(StubGenStubId::oop_arraycopy_id, entry, &entry_oop_arraycopy);
-
-    StubRoutines::_oop_disjoint_arraycopy_uninit =
-        generate_disjoint_copy(StubGenStubId::oop_disjoint_arraycopy_uninit_id, &entry);
-    StubRoutines::_oop_arraycopy_uninit =
-        generate_conjoint_copy(StubGenStubId::oop_arraycopy_uninit_id, entry, nullptr);
-
-    StubRoutines::_jlong_disjoint_arraycopy =
-        generate_disjoint_long_copy(&entry);
-    StubRoutines::_jlong_arraycopy =
-        generate_conjoint_long_copy(entry, &entry_jlong_arraycopy);
-
-    StubRoutines::_jbyte_fill = generate_fill(StubGenStubId::jbyte_fill_id);
-    StubRoutines::_jshort_fill = generate_fill(StubGenStubId::jshort_fill_id);
-    StubRoutines::_jint_fill = generate_fill(StubGenStubId::jint_fill_id);
-    StubRoutines::_arrayof_jbyte_fill = generate_fill(StubGenStubId::arrayof_jbyte_fill_id);
-    StubRoutines::_arrayof_jshort_fill = generate_fill(StubGenStubId::arrayof_jshort_fill_id);
-    StubRoutines::_arrayof_jint_fill = generate_fill(StubGenStubId::arrayof_jint_fill_id);
-
-    StubRoutines::_arrayof_jint_disjoint_arraycopy       = StubRoutines::_jint_disjoint_arraycopy;
-    StubRoutines::_arrayof_oop_disjoint_arraycopy        = StubRoutines::_oop_disjoint_arraycopy;
-    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
-    StubRoutines::_arrayof_jlong_disjoint_arraycopy      = StubRoutines::_jlong_disjoint_arraycopy;
-
-    StubRoutines::_arrayof_jint_arraycopy       = StubRoutines::_jint_arraycopy;
-    StubRoutines::_arrayof_oop_arraycopy        = StubRoutines::_oop_arraycopy;
-    StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
-    StubRoutines::_arrayof_jlong_arraycopy      = StubRoutines::_jlong_arraycopy;
-
-    StubRoutines::_checkcast_arraycopy =
-      generate_checkcast_copy(StubGenStubId::checkcast_arraycopy_id, &entry_checkcast_arraycopy);
-    StubRoutines::_checkcast_arraycopy_uninit =
-      generate_checkcast_copy(StubGenStubId::checkcast_arraycopy_uninit_id, nullptr);
-
-    StubRoutines::_unsafe_arraycopy =
-        generate_unsafe_copy(entry_jbyte_arraycopy,
-                             entry_jshort_arraycopy,
-                             entry_jint_arraycopy,
-                             entry_jlong_arraycopy);
-
-    StubRoutines::_generic_arraycopy =
-        generate_generic_copy( entry_jbyte_arraycopy,
-                               entry_jshort_arraycopy,
-                               entry_jint_arraycopy,
-                               entry_oop_arraycopy,
-                               entry_jlong_arraycopy,
-                               entry_checkcast_arraycopy);
-  }
-
-  // AES intrinsic stubs
-  enum {AESBlockSize = 16};
-
-  address key_shuffle_mask_addr() {
-    return (address)KEY_SHUFFLE_MASK;
-  }
-
-  address counter_shuffle_mask_addr() {
-    return (address)COUNTER_SHUFFLE_MASK;
-  }
-
-  // Utility routine for loading a 128-bit key word in little endian format
-  // can optionally specify that the shuffle mask is already in an xmmregister
-  void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) {
-    __ movdqu(xmmdst, Address(key, offset));
-    if (xmm_shuf_mask != xnoreg) {
-      __ pshufb(xmmdst, xmm_shuf_mask);
-    } else {
-      __ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()));
-    }
-  }
-
-  // aesenc using specified key+offset
-  // can optionally specify that the shuffle mask is already in an xmmregister
-  void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) {
-    load_key(xmmtmp, key, offset, xmm_shuf_mask);
-    __ aesenc(xmmdst, xmmtmp);
-  }
-
-  // aesdec using specified key+offset
-  // can optionally specify that the shuffle mask is already in an xmmregister
-  void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) {
-    load_key(xmmtmp, key, offset, xmm_shuf_mask);
-    __ aesdec(xmmdst, xmmtmp);
-  }
-
-  // Utility routine for increase 128bit counter (iv in CTR mode)
-  //  XMM_128bit,  D3, D2, D1, D0
-  void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
-    __ pextrd(reg, xmmdst, 0x0);
-    __ addl(reg, inc_delta);
-    __ pinsrd(xmmdst, reg, 0x0);
-    __ jcc(Assembler::carryClear, next_block); // jump if no carry
-
-    __ pextrd(reg, xmmdst, 0x01); // Carry-> D1
-    __ addl(reg, 0x01);
-    __ pinsrd(xmmdst, reg, 0x01);
-    __ jcc(Assembler::carryClear, next_block); // jump if no carry
-
-    __ pextrd(reg, xmmdst, 0x02); // Carry-> D2
-    __ addl(reg, 0x01);
-    __ pinsrd(xmmdst, reg, 0x02);
-    __ jcc(Assembler::carryClear, next_block); // jump if no carry
-
-    __ pextrd(reg, xmmdst, 0x03); // Carry -> D3
-    __ addl(reg, 0x01);
-    __ pinsrd(xmmdst, reg, 0x03);
-
-    __ BIND(next_block);          // next instruction
-  }
-
-
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //
-  address generate_aescrypt_encryptBlock() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::aescrypt_encryptBlock_id;
-    StubCodeMark mark(this, stub_id);
-    Label L_doLast;
-    address start = __ pc();
-
-    const Register from        = rdx;      // source array address
-    const Register to          = rdx;      // destination array address
-    const Register key         = rcx;      // key array address
-    const Register keylen      = rax;
-    const Address  from_param(rbp, 8+0);
-    const Address  to_param  (rbp, 8+4);
-    const Address  key_param (rbp, 8+8);
-
-    const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_key_shuf_mask = xmm1;
-    const XMMRegister xmm_temp1  = xmm2;
-    const XMMRegister xmm_temp2  = xmm3;
-    const XMMRegister xmm_temp3  = xmm4;
-    const XMMRegister xmm_temp4  = xmm5;
-
-    __ enter();   // required for proper stackwalking of RuntimeStub frame
-
-    __ movptr(from, from_param);
-    __ movptr(key, key_param);
-
-    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
-    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-
-    __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
-    __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
-    __ movptr(to, to_param);
-
-    // For encryption, the java expanded key ordering is just what we need
-
-    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
-    __ pxor(xmm_result, xmm_temp1);
-
-    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
-    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
-    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
-
-    __ aesenc(xmm_result, xmm_temp1);
-    __ aesenc(xmm_result, xmm_temp2);
-    __ aesenc(xmm_result, xmm_temp3);
-    __ aesenc(xmm_result, xmm_temp4);
-
-    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
-    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
-    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
-
-    __ aesenc(xmm_result, xmm_temp1);
-    __ aesenc(xmm_result, xmm_temp2);
-    __ aesenc(xmm_result, xmm_temp3);
-    __ aesenc(xmm_result, xmm_temp4);
-
-    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
-
-    __ cmpl(keylen, 44);
-    __ jccb(Assembler::equal, L_doLast);
-
-    __ aesenc(xmm_result, xmm_temp1);
-    __ aesenc(xmm_result, xmm_temp2);
-
-    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
-
-    __ cmpl(keylen, 52);
-    __ jccb(Assembler::equal, L_doLast);
-
-    __ aesenc(xmm_result, xmm_temp1);
-    __ aesenc(xmm_result, xmm_temp2);
-
-    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
-
-    __ BIND(L_doLast);
-    __ aesenc(xmm_result, xmm_temp1);
-    __ aesenclast(xmm_result, xmm_temp2);
-    __ movdqu(Address(to, 0), xmm_result);        // store the result
-    __ xorptr(rax, rax); // return 0
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    return start;
-  }
-
-
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //
-  address generate_aescrypt_decryptBlock() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::aescrypt_decryptBlock_id;
-    StubCodeMark mark(this, stub_id);
-    Label L_doLast;
-    address start = __ pc();
-
-    const Register from        = rdx;      // source array address
-    const Register to          = rdx;      // destination array address
-    const Register key         = rcx;      // key array address
-    const Register keylen      = rax;
-    const Address  from_param(rbp, 8+0);
-    const Address  to_param  (rbp, 8+4);
-    const Address  key_param (rbp, 8+8);
-
-    const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_key_shuf_mask = xmm1;
-    const XMMRegister xmm_temp1  = xmm2;
-    const XMMRegister xmm_temp2  = xmm3;
-    const XMMRegister xmm_temp3  = xmm4;
-    const XMMRegister xmm_temp4  = xmm5;
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-
-    __ movptr(from, from_param);
-    __ movptr(key, key_param);
-
-    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
-    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-
-    __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
-    __ movdqu(xmm_result, Address(from, 0));
-    __ movptr(to, to_param);
-
-    // for decryption java expanded key ordering is rotated one position from what we want
-    // so we start from 0x10 here and hit 0x00 last
-    // we don't know if the key is aligned, hence not using load-execute form
-    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
-    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
-    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
-
-    __ pxor  (xmm_result, xmm_temp1);
-    __ aesdec(xmm_result, xmm_temp2);
-    __ aesdec(xmm_result, xmm_temp3);
-    __ aesdec(xmm_result, xmm_temp4);
-
-    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
-    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
-    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
-
-    __ aesdec(xmm_result, xmm_temp1);
-    __ aesdec(xmm_result, xmm_temp2);
-    __ aesdec(xmm_result, xmm_temp3);
-    __ aesdec(xmm_result, xmm_temp4);
-
-    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
-    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
-
-    __ cmpl(keylen, 44);
-    __ jccb(Assembler::equal, L_doLast);
-
-    __ aesdec(xmm_result, xmm_temp1);
-    __ aesdec(xmm_result, xmm_temp2);
-
-    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
-
-    __ cmpl(keylen, 52);
-    __ jccb(Assembler::equal, L_doLast);
-
-    __ aesdec(xmm_result, xmm_temp1);
-    __ aesdec(xmm_result, xmm_temp2);
-
-    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
-    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
-
-    __ BIND(L_doLast);
-    __ aesdec(xmm_result, xmm_temp1);
-    __ aesdec(xmm_result, xmm_temp2);
-
-    // for decryption the aesdeclast operation is always on key+0x00
-    __ aesdeclast(xmm_result, xmm_temp3);
-    __ movdqu(Address(to, 0), xmm_result);  // store the result
-    __ xorptr(rax, rax); // return 0
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    return start;
-  }
-
-  void handleSOERegisters(bool saving) {
-    const int saveFrameSizeInBytes = 4 * wordSize;
-    const Address saved_rbx     (rbp, -3 * wordSize);
-    const Address saved_rsi     (rbp, -2 * wordSize);
-    const Address saved_rdi     (rbp, -1 * wordSize);
-
-    if (saving) {
-      __ subptr(rsp, saveFrameSizeInBytes);
-      __ movptr(saved_rsi, rsi);
-      __ movptr(saved_rdi, rdi);
-      __ movptr(saved_rbx, rbx);
-    } else {
-      // restoring
-      __ movptr(rsi, saved_rsi);
-      __ movptr(rdi, saved_rdi);
-      __ movptr(rbx, saved_rbx);
-    }
-  }
-
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //   c_rarg3   - r vector byte array address
-  //   c_rarg4   - input length
-  //
-  // Output:
-  //   rax       - input length
-  //
-  address generate_cipherBlockChaining_encryptAESCrypt() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::cipherBlockChaining_encryptAESCrypt_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
-    const Register from        = rsi;      // source array address
-    const Register to          = rdx;      // destination array address
-    const Register key         = rcx;      // key array address
-    const Register rvec        = rdi;      // r byte array initialized from initvector array address
-                                           // and left with the results of the last encryption block
-    const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
-    const Register pos         = rax;
-
-    // xmm register assignments for the loops below
-    const XMMRegister xmm_result = xmm0;
-    const XMMRegister xmm_temp   = xmm1;
-    // first 6 keys preloaded into xmm2-xmm7
-    const int XMM_REG_NUM_KEY_FIRST = 2;
-    const int XMM_REG_NUM_KEY_LAST  = 7;
-    const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    handleSOERegisters(true /*saving*/);
-
-    // load registers from incoming parameters
-    const Address  from_param(rbp, 8+0);
-    const Address  to_param  (rbp, 8+4);
-    const Address  key_param (rbp, 8+8);
-    const Address  rvec_param (rbp, 8+12);
-    const Address  len_param  (rbp, 8+16);
-    __ movptr(from , from_param);
-    __ movptr(to   , to_param);
-    __ movptr(key  , key_param);
-    __ movptr(rvec , rvec_param);
-    __ movptr(len_reg , len_param);
-
-    const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
-    __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
-    // load up xmm regs 2 thru 7 with keys 0-5
-    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
-      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
-      offset += 0x10;
-    }
-
-    __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
-
-    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
-    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    __ cmpl(rax, 44);
-    __ jcc(Assembler::notEqual, L_key_192_256);
-
-    // 128 bit code follows here
-    __ movl(pos, 0);
-    __ align(OptoLoopAlignment);
-    __ BIND(L_loopTop_128);
-    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
-    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
-
-    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
-      __ aesenc(xmm_result, as_XMMRegister(rnum));
-    }
-    for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
-      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
-    }
-    load_key(xmm_temp, key, 0xa0);
-    __ aesenclast(xmm_result, xmm_temp);
-
-    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
-    // no need to store r to memory until we exit
-    __ addptr(pos, AESBlockSize);
-    __ subptr(len_reg, AESBlockSize);
-    __ jcc(Assembler::notEqual, L_loopTop_128);
-
-    __ BIND(L_exit);
-    __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
-
-    handleSOERegisters(false /*restoring*/);
-    __ movptr(rax, len_param); // return length
-    __ leave();                                  // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    __ BIND(L_key_192_256);
-    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
-    __ cmpl(rax, 52);
-    __ jcc(Assembler::notEqual, L_key_256);
-
-    // 192-bit code follows here (could be changed to use more xmm registers)
-    __ movl(pos, 0);
-    __ align(OptoLoopAlignment);
-    __ BIND(L_loopTop_192);
-    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
-    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
-
-    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
-      __ aesenc(xmm_result, as_XMMRegister(rnum));
-    }
-    for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
-      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
-    }
-    load_key(xmm_temp, key, 0xc0);
-    __ aesenclast(xmm_result, xmm_temp);
-
-    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
-    // no need to store r to memory until we exit
-    __ addptr(pos, AESBlockSize);
-    __ subptr(len_reg, AESBlockSize);
-    __ jcc(Assembler::notEqual, L_loopTop_192);
-    __ jmp(L_exit);
-
-    __ BIND(L_key_256);
-    // 256-bit code follows here (could be changed to use more xmm registers)
-    __ movl(pos, 0);
-    __ align(OptoLoopAlignment);
-    __ BIND(L_loopTop_256);
-    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
-    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
-
-    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
-    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
-      __ aesenc(xmm_result, as_XMMRegister(rnum));
-    }
-    for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
-      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
-    }
-    load_key(xmm_temp, key, 0xe0);
-    __ aesenclast(xmm_result, xmm_temp);
-
-    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
-    // no need to store r to memory until we exit
-    __ addptr(pos, AESBlockSize);
-    __ subptr(len_reg, AESBlockSize);
-    __ jcc(Assembler::notEqual, L_loopTop_256);
-    __ jmp(L_exit);
-
-    return start;
-  }
-
-
-  // CBC AES Decryption.
-  // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
-  //
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //   c_rarg3   - r vector byte array address
-  //   c_rarg4   - input length
-  //
-  // Output:
-  //   rax       - input length
-  //
-
-  address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::cipherBlockChaining_decryptAESCrypt_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    const Register from        = rsi;      // source array address
-    const Register to          = rdx;      // destination array address
-    const Register key         = rcx;      // key array address
-    const Register rvec        = rdi;      // r byte array initialized from initvector array address
-                                           // and left with the results of the last encryption block
-    const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
-    const Register pos         = rax;
-
-    const int PARALLEL_FACTOR = 4;
-    const int ROUNDS[3] = { 10, 12, 14 }; //aes rounds for key128, key192, key256
-
-    Label L_exit;
-    Label L_singleBlock_loopTop[3]; //128, 192, 256
-    Label L_multiBlock_loopTop[3]; //128, 192, 256
-
-    const XMMRegister xmm_prev_block_cipher = xmm0; // holds cipher of previous block
-    const XMMRegister xmm_key_shuf_mask = xmm1;
-
-    const XMMRegister xmm_key_tmp0 = xmm2;
-    const XMMRegister xmm_key_tmp1 = xmm3;
-
-    // registers holding the six results in the parallelized loop
-    const XMMRegister xmm_result0 = xmm4;
-    const XMMRegister xmm_result1 = xmm5;
-    const XMMRegister xmm_result2 = xmm6;
-    const XMMRegister xmm_result3 = xmm7;
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    handleSOERegisters(true /*saving*/);
-
-    // load registers from incoming parameters
-    const Address  from_param(rbp, 8+0);
-    const Address  to_param  (rbp, 8+4);
-    const Address  key_param (rbp, 8+8);
-    const Address  rvec_param (rbp, 8+12);
-    const Address  len_param  (rbp, 8+16);
-
-    __ movptr(from , from_param);
-    __ movptr(to   , to_param);
-    __ movptr(key  , key_param);
-    __ movptr(rvec , rvec_param);
-    __ movptr(len_reg , len_param);
-
-    __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
-    __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec
-
-    __ xorptr(pos, pos);
-
-    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
-    // rvec is reused
-    __ movl(rvec, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    __ cmpl(rvec, 52);
-    __ jcc(Assembler::equal, L_multiBlock_loopTop[1]);
-    __ cmpl(rvec, 60);
-    __ jcc(Assembler::equal, L_multiBlock_loopTop[2]);
-
-#define DoFour(opc, src_reg)           \
-  __ opc(xmm_result0, src_reg);         \
-  __ opc(xmm_result1, src_reg);         \
-  __ opc(xmm_result2, src_reg);         \
-  __ opc(xmm_result3, src_reg);         \
-
-    for (int k = 0; k < 3; ++k) {
-      __ align(OptoLoopAlignment);
-      __ BIND(L_multiBlock_loopTop[k]);
-      __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left
-      __ jcc(Assembler::less, L_singleBlock_loopTop[k]);
-
-      __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers
-      __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
-      __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
-      __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
-
-      // the java expanded key ordering is rotated one position from what we want
-      // so we start from 0x10 here and hit 0x00 last
-      load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask);
-      DoFour(pxor, xmm_key_tmp0); //xor with first key
-      // do the aes dec rounds
-      for (int rnum = 1; rnum <= ROUNDS[k];) {
-        //load two keys at a time
-        //k1->0x20, ..., k9->0xa0, k10->0x00
-        load_key(xmm_key_tmp1, key, (rnum + 1) * 0x10, xmm_key_shuf_mask);
-        load_key(xmm_key_tmp0, key, ((rnum + 2) % (ROUNDS[k] + 1)) * 0x10, xmm_key_shuf_mask); // hit 0x00 last!
-        DoFour(aesdec, xmm_key_tmp1);
-        rnum++;
-        if (rnum != ROUNDS[k]) {
-          DoFour(aesdec, xmm_key_tmp0);
-        }
-        else {
-          DoFour(aesdeclast, xmm_key_tmp0);
-        }
-        rnum++;
-      }
-
-      // for each result, xor with the r vector of previous cipher block
-      __ pxor(xmm_result0, xmm_prev_block_cipher);
-      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize));
-      __ pxor(xmm_result1, xmm_prev_block_cipher);
-      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize));
-      __ pxor(xmm_result2, xmm_prev_block_cipher);
-      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize));
-      __ pxor(xmm_result3, xmm_prev_block_cipher);
-      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks
-
-            // store 4 results into the next 64 bytes of output
-       __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
-       __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
-       __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
-       __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
-
-       __ addptr(pos, 4 * AESBlockSize);
-       __ subptr(len_reg, 4 * AESBlockSize);
-       __ jmp(L_multiBlock_loopTop[k]);
-
-       //singleBlock starts here
-       __ align(OptoLoopAlignment);
-       __ BIND(L_singleBlock_loopTop[k]);
-       __ cmpptr(len_reg, 0); // any blocks left?
-       __ jcc(Assembler::equal, L_exit);
-       __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
-       __ movdqa(xmm_result1, xmm_result0);
-
-       load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask);
-       __ pxor(xmm_result0, xmm_key_tmp0);
-       // do the aes dec rounds
-       for (int rnum = 1; rnum < ROUNDS[k]; rnum++) {
-         // the java expanded key ordering is rotated one position from what we want
-         load_key(xmm_key_tmp0, key, (rnum + 1) * 0x10, xmm_key_shuf_mask);
-         __ aesdec(xmm_result0, xmm_key_tmp0);
-       }
-       load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
-       __ aesdeclast(xmm_result0, xmm_key_tmp0);
-       __ pxor(xmm_result0, xmm_prev_block_cipher); // xor with the current r vector
-       __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result0); // store into the next 16 bytes of output
-       // no need to store r to memory until we exit
-       __ movdqa(xmm_prev_block_cipher, xmm_result1); // set up next r vector with cipher input from this block
-
-       __ addptr(pos, AESBlockSize);
-       __ subptr(len_reg, AESBlockSize);
-       __ jmp(L_singleBlock_loopTop[k]);
-    }//for 128/192/256
-
-    __ BIND(L_exit);
-    __ movptr(rvec, rvec_param);                        // restore this since reused earlier
-    __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object
-    handleSOERegisters(false /*restoring*/);
-    __ movptr(rax, len_param);                          // return length
-    __ leave();                                         // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    return start;
-  }
-
-  // CTR AES crypt.
-  // In 32-bit stub, parallelize 4 blocks at a time
-  // Arguments:
-  //
-  // Inputs:
-  //   c_rarg0   - source byte array address
-  //   c_rarg1   - destination byte array address
-  //   c_rarg2   - K (key) in little endian int array
-  //   c_rarg3   - counter vector byte array address
-  //   c_rarg4   - input length
-  //
-  // Output:
-  //   rax       - input length
-  //
-  address generate_counterMode_AESCrypt_Parallel() {
-    assert(UseAES, "need AES instructions and misaligned SSE support");
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::counterMode_AESCrypt_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    const Register from        = rsi;      // source array address
-    const Register to          = rdx;      // destination array address
-    const Register key         = rcx;      // key array address
-    const Register counter     = rdi;      // counter byte array initialized from initvector array address
-                                           // and updated with the incremented counter in the end
-    const Register len_reg     = rbx;
-    const Register pos         = rax;
-
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    handleSOERegisters(true /*saving*/); // save rbx, rsi, rdi
-
-    // load registers from incoming parameters
-    const Address  from_param(rbp, 8+0);
-    const Address  to_param  (rbp, 8+4);
-    const Address  key_param (rbp, 8+8);
-    const Address  rvec_param (rbp, 8+12);
-    const Address  len_param  (rbp, 8+16);
-    const Address  saved_counter_param(rbp, 8 + 20);
-    const Address  used_addr_param(rbp, 8 + 24);
-
-    __ movptr(from , from_param);
-    __ movptr(to   , to_param);
-    __ movptr(len_reg , len_param);
-
-    // Use the partially used encrpyted counter from last invocation
-    Label L_exit_preLoop, L_preLoop_start;
-
-    // Use the registers 'counter' and 'key' here in this preloop
-    // to hold of last 2 params 'used' and 'saved_encCounter_start'
-    Register used = counter;
-    Register saved_encCounter_start = key;
-    Register used_addr = saved_encCounter_start;
-
-    __ movptr(used_addr, used_addr_param);
-    __ movptr(used, Address(used_addr, 0));
-    __ movptr(saved_encCounter_start, saved_counter_param);
-
-    __ BIND(L_preLoop_start);
-    __ cmpptr(used, 16);
-    __ jcc(Assembler::aboveEqual, L_exit_preLoop);
-    __ cmpptr(len_reg, 0);
-    __ jcc(Assembler::lessEqual, L_exit_preLoop);
-    __ movb(rax, Address(saved_encCounter_start, used));
-    __ xorb(rax, Address(from, 0));
-    __ movb(Address(to, 0), rax);
-    __ addptr(from, 1);
-    __ addptr(to, 1);
-    __ addptr(used, 1);
-    __ subptr(len_reg, 1);
-
-    __ jmp(L_preLoop_start);
-
-    __ BIND(L_exit_preLoop);
-    __ movptr(used_addr, used_addr_param);
-    __ movptr(used_addr, used_addr_param);
-    __ movl(Address(used_addr, 0), used);
-
-    // load the parameters 'key' and 'counter'
-    __ movptr(key, key_param);
-    __ movptr(counter, rvec_param);
-
-    // xmm register assignments for the loops below
-    const XMMRegister xmm_curr_counter      = xmm0;
-    const XMMRegister xmm_counter_shuf_mask = xmm1;  // need to be reloaded
-    const XMMRegister xmm_key_shuf_mask     = xmm2;  // need to be reloaded
-    const XMMRegister xmm_key               = xmm3;
-    const XMMRegister xmm_result0           = xmm4;
-    const XMMRegister xmm_result1           = xmm5;
-    const XMMRegister xmm_result2           = xmm6;
-    const XMMRegister xmm_result3           = xmm7;
-    const XMMRegister xmm_from0             = xmm1;   //reuse XMM register
-    const XMMRegister xmm_from1             = xmm2;
-    const XMMRegister xmm_from2             = xmm3;
-    const XMMRegister xmm_from3             = xmm4;
-
-    //for key_128, key_192, key_256
-    const int rounds[3] = {10, 12, 14};
-    Label L_singleBlockLoopTop[3];
-    Label L_multiBlock_loopTop[3];
-    Label L_key192_top, L_key256_top;
-    Label L_incCounter[3][4]; // 3: different key length,  4: 4 blocks at a time
-    Label L_incCounter_single[3]; //for single block, key128, key192, key256
-    Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
-    Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
-
-    Label L_exit;
-    const int PARALLEL_FACTOR = 4;  //because of the limited register number
-
-    // initialize counter with initial counter
-    __ movdqu(xmm_curr_counter, Address(counter, 0x00));
-    __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
-    __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase
-
-    // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
-    __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
-    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
-    __ cmpl(rax, 52);
-    __ jcc(Assembler::equal, L_key192_top);
-    __ cmpl(rax, 60);
-    __ jcc(Assembler::equal, L_key256_top);
-
-    //key128 begins here
-    __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
-
-#define CTR_DoFour(opc, src_reg)               \
-    __ opc(xmm_result0, src_reg);              \
-    __ opc(xmm_result1, src_reg);              \
-    __ opc(xmm_result2, src_reg);              \
-    __ opc(xmm_result3, src_reg);
-
-    // k == 0 :  generate code for key_128
-    // k == 1 :  generate code for key_192
-    // k == 2 :  generate code for key_256
-    for (int k = 0; k < 3; ++k) {
-      //multi blocks starts here
-      __ align(OptoLoopAlignment);
-      __ BIND(L_multiBlock_loopTop[k]);
-      __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
-      __ jcc(Assembler::less, L_singleBlockLoopTop[k]);
-
-      __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
-      __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
-
-      //load, then increase counters
-      CTR_DoFour(movdqa, xmm_curr_counter);
-      __ push(rbx);
-      inc_counter(rbx, xmm_result1, 0x01, L_incCounter[k][0]);
-      inc_counter(rbx, xmm_result2, 0x02, L_incCounter[k][1]);
-      inc_counter(rbx, xmm_result3, 0x03, L_incCounter[k][2]);
-      inc_counter(rbx, xmm_curr_counter, 0x04, L_incCounter[k][3]);
-      __ pop (rbx);
-
-      load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); // load Round 0 key. interleaving for better performance
-
-      CTR_DoFour(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR
-      CTR_DoFour(pxor, xmm_key);   //PXOR with Round 0 key
-
-      for (int i = 1; i < rounds[k]; ++i) {
-        load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask);
-        CTR_DoFour(aesenc, xmm_key);
-      }
-      load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask);
-      CTR_DoFour(aesenclast, xmm_key);
-
-      // get next PARALLEL_FACTOR blocks into xmm_from registers
-      __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
-      __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
-      __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
-
-      // PXOR with input text
-      __ pxor(xmm_result0, xmm_from0); //result0 is xmm4
-      __ pxor(xmm_result1, xmm_from1);
-      __ pxor(xmm_result2, xmm_from2);
-
-      // store PARALLEL_FACTOR results into the next 64 bytes of output
-      __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
-      __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
-      __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
-
-      // do it here after xmm_result0 is saved, because xmm_from3 reuse the same register of xmm_result0.
-      __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
-      __ pxor(xmm_result3, xmm_from3);
-      __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
-
-      __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
-      __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
-      __ jmp(L_multiBlock_loopTop[k]);
-
-      // singleBlock starts here
-      __ align(OptoLoopAlignment);
-      __ BIND(L_singleBlockLoopTop[k]);
-      __ cmpptr(len_reg, 0);
-      __ jcc(Assembler::equal, L_exit);
-      __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
-      __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
-      __ movdqa(xmm_result0, xmm_curr_counter);
-      load_key(xmm_key, key, 0x00, xmm_key_shuf_mask);
-      __ push(rbx);//rbx is used for increasing counter
-      inc_counter(rbx, xmm_curr_counter, 0x01, L_incCounter_single[k]);
-      __ pop (rbx);
-      __ pshufb(xmm_result0, xmm_counter_shuf_mask);
-      __ pxor(xmm_result0, xmm_key);
-      for (int i = 1; i < rounds[k]; i++) {
-        load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask);
-        __ aesenc(xmm_result0, xmm_key);
-      }
-      load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask);
-      __ aesenclast(xmm_result0, xmm_key);
-      __ cmpptr(len_reg, AESBlockSize);
-      __ jcc(Assembler::less, L_processTail_insr[k]);
-        __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
-        __ pxor(xmm_result0, xmm_from0);
-        __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
-        __ addptr(pos, AESBlockSize);
-        __ subptr(len_reg, AESBlockSize);
-        __ jmp(L_singleBlockLoopTop[k]);
-
-      __ BIND(L_processTail_insr[k]);                                               // Process the tail part of the input array
-        __ addptr(pos, len_reg);                                                    // 1. Insert bytes from src array into xmm_from0 register
-        __ testptr(len_reg, 8);
-        __ jcc(Assembler::zero, L_processTail_4_insr[k]);
-          __ subptr(pos,8);
-          __ pinsrd(xmm_from0, Address(from, pos), 0);
-          __ pinsrd(xmm_from0, Address(from, pos, Address::times_1, 4), 1);
-        __ BIND(L_processTail_4_insr[k]);
-        __ testptr(len_reg, 4);
-        __ jcc(Assembler::zero, L_processTail_2_insr[k]);
-          __ subptr(pos,4);
-          __ pslldq(xmm_from0, 4);
-          __ pinsrd(xmm_from0, Address(from, pos), 0);
-        __ BIND(L_processTail_2_insr[k]);
-        __ testptr(len_reg, 2);
-        __ jcc(Assembler::zero, L_processTail_1_insr[k]);
-          __ subptr(pos, 2);
-          __ pslldq(xmm_from0, 2);
-          __ pinsrw(xmm_from0, Address(from, pos), 0);
-        __ BIND(L_processTail_1_insr[k]);
-        __ testptr(len_reg, 1);
-        __ jcc(Assembler::zero, L_processTail_exit_insr[k]);
-          __ subptr(pos, 1);
-          __ pslldq(xmm_from0, 1);
-          __ pinsrb(xmm_from0, Address(from, pos), 0);
-        __ BIND(L_processTail_exit_insr[k]);
-
-        __ movptr(saved_encCounter_start, saved_counter_param);
-        __ movdqu(Address(saved_encCounter_start, 0), xmm_result0);               // 2. Perform pxor of the encrypted counter and plaintext Bytes.
-        __ pxor(xmm_result0, xmm_from0);                                          //    Also the encrypted counter is saved for next invocation.
-
-        __ testptr(len_reg, 8);
-        __ jcc(Assembler::zero, L_processTail_4_extr[k]);                        // 3. Extract bytes from xmm_result0 into the dest. array
-          __ pextrd(Address(to, pos), xmm_result0, 0);
-          __ pextrd(Address(to, pos, Address::times_1, 4), xmm_result0, 1);
-          __ psrldq(xmm_result0, 8);
-          __ addptr(pos, 8);
-        __ BIND(L_processTail_4_extr[k]);
-        __ testptr(len_reg, 4);
-        __ jcc(Assembler::zero, L_processTail_2_extr[k]);
-          __ pextrd(Address(to, pos), xmm_result0, 0);
-          __ psrldq(xmm_result0, 4);
-          __ addptr(pos, 4);
-        __ BIND(L_processTail_2_extr[k]);
-        __ testptr(len_reg, 2);
-        __ jcc(Assembler::zero, L_processTail_1_extr[k]);
-          __ pextrb(Address(to, pos), xmm_result0, 0);
-          __ pextrb(Address(to, pos, Address::times_1, 1), xmm_result0, 1);
-          __ psrldq(xmm_result0, 2);
-          __ addptr(pos, 2);
-        __ BIND(L_processTail_1_extr[k]);
-        __ testptr(len_reg, 1);
-        __ jcc(Assembler::zero, L_processTail_exit_extr[k]);
-          __ pextrb(Address(to, pos), xmm_result0, 0);
-
-        __ BIND(L_processTail_exit_extr[k]);
-        __ movptr(used_addr, used_addr_param);
-        __ movl(Address(used_addr, 0), len_reg);
-        __ jmp(L_exit);
-    }
-
-    __ BIND(L_exit);
-    __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
-    __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
-    __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
-    handleSOERegisters(false /*restoring*/);
-    __ movptr(rax, len_param); // return length
-    __ leave();                // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    __ BIND (L_key192_top);
-    __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
-    __ jmp(L_multiBlock_loopTop[1]); //key192
-
-    __ BIND (L_key256_top);
-    __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
-    __ jmp(L_multiBlock_loopTop[2]); //key192
-
-    return start;
-  }
-
-  // ofs and limit are use for multi-block byte array.
-  // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs)
-  address generate_md5_implCompress(StubGenStubId stub_id) {
-    bool multi_block;
-    switch(stub_id) {
-    case StubGenStubId::md5_implCompress_id:
-      multi_block = false;
-      break;
-    case StubGenStubId::md5_implCompressMB_id:
-      multi_block = true;
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    const Register buf_param = rbp;
-    const Address state_param(rsp, 0 * wordSize);
-    const Address ofs_param  (rsp, 1 * wordSize);
-    const Address limit_param(rsp, 2 * wordSize);
-
-    __ enter();
-    __ push(rbx);
-    __ push(rdi);
-    __ push(rsi);
-    __ push(rbp);
-    __ subptr(rsp, 3 * wordSize);
-
-    __ movptr(rsi, Address(rbp, 8 + 4));
-    __ movptr(state_param, rsi);
-    if (multi_block) {
-      __ movptr(rsi, Address(rbp, 8 + 8));
-      __ movptr(ofs_param, rsi);
-      __ movptr(rsi, Address(rbp, 8 + 12));
-      __ movptr(limit_param, rsi);
-    }
-    __ movptr(buf_param, Address(rbp, 8 + 0)); // do it last because it override rbp
-    __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block);
-
-    __ addptr(rsp, 3 * wordSize);
-    __ pop(rbp);
-    __ pop(rsi);
-    __ pop(rdi);
-    __ pop(rbx);
-    __ leave();
-    __ ret(0);
-    return start;
-  }
-
-  address generate_upper_word_mask() {
-    __ align64();
-    StubGenStubId stub_id = StubGenStubId::upper_word_mask_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0x00000000, relocInfo::none, 0);
-    __ emit_data(0xFFFFFFFF, relocInfo::none, 0);
-    return start;
-  }
-
-  address generate_shuffle_byte_flip_mask() {
-    __ align64();
-    StubGenStubId stub_id = StubGenStubId::shuffle_byte_flip_mask_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
-    __ emit_data(0x08090a0b, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    return start;
-  }
-
-  // ofs and limit are use for multi-block byte array.
-  // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
-  address generate_sha1_implCompress(StubGenStubId stub_id) {
-    bool multi_block;
-    switch(stub_id) {
-    case StubGenStubId::sha1_implCompress_id:
-      multi_block = false;
-      break;
-    case StubGenStubId::sha1_implCompressMB_id:
-      multi_block = true;
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Register buf   = rax;
-    Register state = rdx;
-    Register ofs   = rcx;
-    Register limit = rdi;
-
-    const Address  buf_param(rbp, 8 + 0);
-    const Address  state_param(rbp, 8 + 4);
-    const Address  ofs_param(rbp, 8 + 8);
-    const Address  limit_param(rbp, 8 + 12);
-
-    const XMMRegister abcd = xmm0;
-    const XMMRegister e0 = xmm1;
-    const XMMRegister e1 = xmm2;
-    const XMMRegister msg0 = xmm3;
-
-    const XMMRegister msg1 = xmm4;
-    const XMMRegister msg2 = xmm5;
-    const XMMRegister msg3 = xmm6;
-    const XMMRegister shuf_mask = xmm7;
-
-    __ enter();
-    __ subptr(rsp, 8 * wordSize);
-    handleSOERegisters(true /*saving*/);
-
-    __ movptr(buf, buf_param);
-    __ movptr(state, state_param);
-    if (multi_block) {
-      __ movptr(ofs, ofs_param);
-      __ movptr(limit, limit_param);
-    }
-
-    __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
-      buf, state, ofs, limit, rsp, multi_block);
-
-    handleSOERegisters(false /*restoring*/);
-    __ addptr(rsp, 8 * wordSize);
-    __ leave();
-    __ ret(0);
-    return start;
-  }
-
-  address generate_pshuffle_byte_flip_mask() {
-    __ align64();
-    StubGenStubId stub_id = StubGenStubId::pshuffle_byte_flip_mask_id;
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-    __ emit_data(0x00010203, relocInfo::none, 0);
-    __ emit_data(0x04050607, relocInfo::none, 0);
-    __ emit_data(0x08090a0b, relocInfo::none, 0);
-    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
-    return start;
-  }
-
-  // ofs and limit are use for multi-block byte array.
-  // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
- address generate_sha256_implCompress(StubGenStubId stub_id) {
-    bool multi_block;
-    switch(stub_id) {
-    case StubGenStubId::sha256_implCompress_id:
-      multi_block = false;
-      break;
-    case StubGenStubId::sha256_implCompressMB_id:
-      multi_block = true;
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-
-    __ align(CodeEntryAlignment);
-    StubCodeMark mark(this, stub_id);
-    address start = __ pc();
-
-    Register buf = rbx;
-    Register state = rsi;
-    Register ofs = rdx;
-    Register limit = rcx;
-
-    const Address  buf_param(rbp, 8 + 0);
-    const Address  state_param(rbp, 8 + 4);
-    const Address  ofs_param(rbp, 8 + 8);
-    const Address  limit_param(rbp, 8 + 12);
-
-    const XMMRegister msg = xmm0;
-    const XMMRegister state0 = xmm1;
-    const XMMRegister state1 = xmm2;
-    const XMMRegister msgtmp0 = xmm3;
-
-    const XMMRegister msgtmp1 = xmm4;
-    const XMMRegister msgtmp2 = xmm5;
-    const XMMRegister msgtmp3 = xmm6;
-    const XMMRegister msgtmp4 = xmm7;
-
-    __ enter();
-    __ subptr(rsp, 8 * wordSize);
-    handleSOERegisters(true /*saving*/);
-    __ movptr(buf, buf_param);
-    __ movptr(state, state_param);
-    if (multi_block) {
-     __ movptr(ofs, ofs_param);
-     __ movptr(limit, limit_param);
-    }
-
-    __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
-      buf, state, ofs, limit, rsp, multi_block);
-
-    handleSOERegisters(false);
-    __ addptr(rsp, 8 * wordSize);
-    __ leave();
-    __ ret(0);
-    return start;
-  }
-
-  // byte swap x86 long
-  address ghash_long_swap_mask_addr() {
-    return (address)GHASH_LONG_SWAP_MASK;
-  }
-
-  // byte swap x86 byte array
-  address ghash_byte_swap_mask_addr() {
-    return (address)GHASH_BYTE_SWAP_MASK;
-  }
-
-  /* Single and multi-block ghash operations */
-  address generate_ghash_processBlocks() {
-    assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
-    __ align(CodeEntryAlignment);
-    Label L_ghash_loop, L_exit;
-    StubGenStubId stub_id = StubGenStubId::ghash_processBlocks_id;
-    StubCodeMark mark(this, stub_id);
-
-    address start = __ pc();
-
-    const Register state        = rdi;
-    const Register subkeyH      = rsi;
-    const Register data         = rdx;
-    const Register blocks       = rcx;
-
-    const Address  state_param(rbp, 8+0);
-    const Address  subkeyH_param(rbp, 8+4);
-    const Address  data_param(rbp, 8+8);
-    const Address  blocks_param(rbp, 8+12);
-
-    const XMMRegister xmm_temp0 = xmm0;
-    const XMMRegister xmm_temp1 = xmm1;
-    const XMMRegister xmm_temp2 = xmm2;
-    const XMMRegister xmm_temp3 = xmm3;
-    const XMMRegister xmm_temp4 = xmm4;
-    const XMMRegister xmm_temp5 = xmm5;
-    const XMMRegister xmm_temp6 = xmm6;
-    const XMMRegister xmm_temp7 = xmm7;
-
-    __ enter();
-    handleSOERegisters(true);  // Save registers
-
-    __ movptr(state, state_param);
-    __ movptr(subkeyH, subkeyH_param);
-    __ movptr(data, data_param);
-    __ movptr(blocks, blocks_param);
-
-    __ movdqu(xmm_temp0, Address(state, 0));
-    __ pshufb(xmm_temp0, ExternalAddress(ghash_long_swap_mask_addr()));
-
-    __ movdqu(xmm_temp1, Address(subkeyH, 0));
-    __ pshufb(xmm_temp1, ExternalAddress(ghash_long_swap_mask_addr()));
-
-    __ BIND(L_ghash_loop);
-    __ movdqu(xmm_temp2, Address(data, 0));
-    __ pshufb(xmm_temp2, ExternalAddress(ghash_byte_swap_mask_addr()));
-
-    __ pxor(xmm_temp0, xmm_temp2);
-
-    //
-    // Multiply with the hash key
-    //
-    __ movdqu(xmm_temp3, xmm_temp0);
-    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
-    __ movdqu(xmm_temp4, xmm_temp0);
-    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
-
-    __ movdqu(xmm_temp5, xmm_temp0);
-    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
-    __ movdqu(xmm_temp6, xmm_temp0);
-    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
-
-    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
-
-    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
-    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
-    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
-    __ pxor(xmm_temp3, xmm_temp5);
-    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
-                                        // of the carry-less multiplication of
-                                        // xmm0 by xmm1.
-
-    // We shift the result of the multiplication by one bit position
-    // to the left to cope for the fact that the bits are reversed.
-    __ movdqu(xmm_temp7, xmm_temp3);
-    __ movdqu(xmm_temp4, xmm_temp6);
-    __ pslld (xmm_temp3, 1);
-    __ pslld(xmm_temp6, 1);
-    __ psrld(xmm_temp7, 31);
-    __ psrld(xmm_temp4, 31);
-    __ movdqu(xmm_temp5, xmm_temp7);
-    __ pslldq(xmm_temp4, 4);
-    __ pslldq(xmm_temp7, 4);
-    __ psrldq(xmm_temp5, 12);
-    __ por(xmm_temp3, xmm_temp7);
-    __ por(xmm_temp6, xmm_temp4);
-    __ por(xmm_temp6, xmm_temp5);
-
-    //
-    // First phase of the reduction
-    //
-    // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
-    // independently.
-    __ movdqu(xmm_temp7, xmm_temp3);
-    __ movdqu(xmm_temp4, xmm_temp3);
-    __ movdqu(xmm_temp5, xmm_temp3);
-    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
-    __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
-    __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
-    __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
-    __ pxor(xmm_temp7, xmm_temp5);
-    __ movdqu(xmm_temp4, xmm_temp7);
-    __ pslldq(xmm_temp7, 12);
-    __ psrldq(xmm_temp4, 4);
-    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
-
-    //
-    // Second phase of the reduction
-    //
-    // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
-    // shift operations.
-    __ movdqu(xmm_temp2, xmm_temp3);
-    __ movdqu(xmm_temp7, xmm_temp3);
-    __ movdqu(xmm_temp5, xmm_temp3);
-    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
-    __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
-    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
-    __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
-    __ pxor(xmm_temp2, xmm_temp5);
-    __ pxor(xmm_temp2, xmm_temp4);
-    __ pxor(xmm_temp3, xmm_temp2);
-    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
-
-    __ decrement(blocks);
-    __ jcc(Assembler::zero, L_exit);
-    __ movdqu(xmm_temp0, xmm_temp6);
-    __ addptr(data, 16);
-    __ jmp(L_ghash_loop);
-
-    __ BIND(L_exit);
-       // Byte swap 16-byte result
-    __ pshufb(xmm_temp6, ExternalAddress(ghash_long_swap_mask_addr()));
-    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
-
-    handleSOERegisters(false);  // restore registers
-    __ leave();
-    __ ret(0);
-    return start;
-  }
-
-  /**
-   *  Arguments:
-   *
-   * Inputs:
-   *   rsp(4)   - int crc
-   *   rsp(8)   - byte* buf
-   *   rsp(12)  - int length
-   *
-   * Output:
-   *       rax   - int crc result
-   */
-  address generate_updateBytesCRC32() {
-    assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
-
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::updateBytesCRC32_id;
-    StubCodeMark mark(this, stub_id);
-
-    address start = __ pc();
-
-    const Register crc   = rdx;  // crc
-    const Register buf   = rsi;  // source java byte array address
-    const Register len   = rcx;  // length
-    const Register table = rdi;  // crc_table address (reuse register)
-    const Register tmp   = rbx;
-    assert_different_registers(crc, buf, len, table, tmp, rax);
-
-    BLOCK_COMMENT("Entry:");
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ push(rsi);
-    __ push(rdi);
-    __ push(rbx);
-
-    Address crc_arg(rbp, 8 + 0);
-    Address buf_arg(rbp, 8 + 4);
-    Address len_arg(rbp, 8 + 8);
-
-    // Load up:
-    __ movl(crc,   crc_arg);
-    __ movptr(buf, buf_arg);
-    __ movl(len,   len_arg);
-
-    __ kernel_crc32(crc, buf, len, table, tmp);
-
-    __ movl(rax, crc);
-    __ pop(rbx);
-    __ pop(rdi);
-    __ pop(rsi);
-    __ vzeroupper();
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    return start;
-  }
-
-  /**
-  *  Arguments:
-  *
-  * Inputs:
-  *   rsp(4)   - int crc
-  *   rsp(8)   - byte* buf
-  *   rsp(12)  - int length
-  *   rsp(16)  - table_start - optional (present only when doing a library_calll,
-  *              not used by x86 algorithm)
-  *
-  * Output:
-  *       rax  - int crc result
-  */
-  address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
-    assert(UseCRC32CIntrinsics, "need SSE4_2");
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::updateBytesCRC32C_id;
-    StubCodeMark mark(this, stub_id);
-
-    address start = __ pc();
-    const Register crc = rax;  // crc
-    const Register buf = rcx;  // source java byte array address
-    const Register len = rdx;  // length
-    const Register d = rbx;
-    const Register g = rsi;
-    const Register h = rdi;
-    const Register empty = noreg; // will never be used, in order not
-                                  // to change a signature for crc32c_IPL_Alg2_Alt2
-                                  // between 64/32 I'm just keeping it here
-    assert_different_registers(crc, buf, len, d, g, h);
-
-    BLOCK_COMMENT("Entry:");
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 +
-                                     // we need to add additional 4 because __ enter
-                                     // have just pushed ebp on a stack
-    Address buf_arg(rsp, 4 + 4 + 4);
-    Address len_arg(rsp, 4 + 4 + 8);
-      // Load up:
-      __ movl(crc, crc_arg);
-      __ movl(buf, buf_arg);
-      __ movl(len, len_arg);
-      __ push(d);
-      __ push(g);
-      __ push(h);
-      __ crc32c_ipl_alg2_alt2(crc, buf, len,
-                              d, g, h,
-                              empty, empty, empty,
-                              xmm0, xmm1, xmm2,
-                              is_pclmulqdq_supported);
-      __ pop(h);
-      __ pop(g);
-      __ pop(d);
-    __ vzeroupper();
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    return start;
-  }
-
- address generate_libmExp() {
-    StubGenStubId stub_id = StubGenStubId::dexp_id;
-    StubCodeMark mark(this, stub_id);
-
-    address start = __ pc();
-
-    BLOCK_COMMENT("Entry:");
-    __ enter(); // required for proper stackwalking of RuntimeStub frame
-    __ fast_exp(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-                rax, rcx, rdx, rbx);
-    __ leave(); // required for proper stackwalking of RuntimeStub frame
-    __ ret(0);
-
-    return start;
-
-  }
-
- address generate_libmLog() {
-   StubGenStubId stub_id = StubGenStubId::dlog_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ enter(); // required for proper stackwalking of RuntimeStub frame
-   __ fast_log(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-               rax, rcx, rdx, rbx);
-   __ leave(); // required for proper stackwalking of RuntimeStub frame
-   __ ret(0);
-
-   return start;
-
- }
-
- address generate_libmLog10() {
-   StubGenStubId stub_id = StubGenStubId::dlog10_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ enter(); // required for proper stackwalking of RuntimeStub frame
-   __ fast_log10(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-               rax, rcx, rdx, rbx);
-   __ leave(); // required for proper stackwalking of RuntimeStub frame
-   __ ret(0);
-
-   return start;
-
- }
-
- address generate_libmPow() {
-   StubGenStubId stub_id = StubGenStubId::dpow_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ enter(); // required for proper stackwalking of RuntimeStub frame
-   __ fast_pow(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-               rax, rcx, rdx, rbx);
-   __ leave(); // required for proper stackwalking of RuntimeStub frame
-   __ ret(0);
-
-   return start;
-
- }
-
- address generate_libm_reduce_pi04l() {
-   StubGenStubId stub_id = StubGenStubId::dlibm_reduce_pi04l_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ libm_reduce_pi04l(rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
-
-   return start;
-
- }
-
- address generate_libm_sin_cos_huge() {
-   StubGenStubId stub_id = StubGenStubId::dlibm_sin_cos_huge_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ libm_sincos_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
-
-   return start;
-
- }
-
- address generate_libmSin() {
-   StubGenStubId stub_id = StubGenStubId::dsin_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ enter(); // required for proper stackwalking of RuntimeStub frame
-   __ fast_sin(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-               rax, rbx, rdx);
-   __ leave(); // required for proper stackwalking of RuntimeStub frame
-   __ ret(0);
-
-   return start;
-
- }
-
- address generate_libmCos() {
-   StubGenStubId stub_id = StubGenStubId::dcos_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ enter(); // required for proper stackwalking of RuntimeStub frame
-   __ fast_cos(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-               rax, rcx, rdx, rbx);
-   __ leave(); // required for proper stackwalking of RuntimeStub frame
-   __ ret(0);
-
-   return start;
-
- }
-
- address generate_libm_tan_cot_huge() {
-   StubGenStubId stub_id = StubGenStubId::dlibm_tan_cot_huge_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ libm_tancot_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
-
-   return start;
-
- }
-
- address generate_libmTan() {
-   StubGenStubId stub_id = StubGenStubId::dtan_id;
-   StubCodeMark mark(this, stub_id);
-
-   address start = __ pc();
-
-   BLOCK_COMMENT("Entry:");
-   __ enter(); // required for proper stackwalking of RuntimeStub frame
-   __ fast_tan(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
-               rax, rcx, rdx, rbx);
-   __ leave(); // required for proper stackwalking of RuntimeStub frame
-   __ ret(0);
-
-   return start;
-
- }
-
-  address generate_method_entry_barrier() {
-    __ align(CodeEntryAlignment);
-    StubGenStubId stub_id = StubGenStubId::method_entry_barrier_id;
-    StubCodeMark mark(this, stub_id);
-
-    Label deoptimize_label;
-
-    address start = __ pc();
-
-    __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing
-
-    BLOCK_COMMENT("Entry:");
-    __ enter(); // save rbp
-
-    // save rbx, because we want to use that value.
-    // We could do without it but then we depend on the number of slots used by pusha
-    __ push(rbx);
-
-    __ lea(rbx, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for rbx - this should be the return address
-
-    __ pusha();
-
-    // xmm0 and xmm1 may be used for passing float/double arguments
-
-    if (UseSSE >= 2) {
-      const int xmm_size = wordSize * 4;
-      __ subptr(rsp, xmm_size * 2);
-      __ movdbl(Address(rsp, xmm_size * 1), xmm1);
-      __ movdbl(Address(rsp, xmm_size * 0), xmm0);
-    } else if (UseSSE >= 1) {
-      const int xmm_size = wordSize * 2;
-      __ subptr(rsp, xmm_size * 2);
-      __ movflt(Address(rsp, xmm_size * 1), xmm1);
-      __ movflt(Address(rsp, xmm_size * 0), xmm0);
-    }
-
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(address*)>(BarrierSetNMethod::nmethod_stub_entry_barrier)), rbx);
-
-    if (UseSSE >= 2) {
-      const int xmm_size = wordSize * 4;
-      __ movdbl(xmm0, Address(rsp, xmm_size * 0));
-      __ movdbl(xmm1, Address(rsp, xmm_size * 1));
-      __ addptr(rsp, xmm_size * 2);
-    } else if (UseSSE >= 1) {
-      const int xmm_size = wordSize * 2;
-      __ movflt(xmm0, Address(rsp, xmm_size * 0));
-      __ movflt(xmm1, Address(rsp, xmm_size * 1));
-      __ addptr(rsp, xmm_size * 2);
-    }
-
-    __ cmpl(rax, 1); // 1 means deoptimize
-    __ jcc(Assembler::equal, deoptimize_label);
-
-    __ popa();
-    __ pop(rbx);
-
-    __ leave();
-
-    __ addptr(rsp, 1 * wordSize); // cookie
-    __ ret(0);
-
-    __ BIND(deoptimize_label);
-
-    __ popa();
-    __ pop(rbx);
-
-    __ leave();
-
-    // this can be taken out, but is good for verification purposes. getting a SIGSEGV
-    // here while still having a correct stack is valuable
-    __ testptr(rsp, Address(rsp, 0));
-
-    __ movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier
-    __ jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point
-
-    return start;
-  }
-
- private:
-
-  void create_control_words() {
-    // Round to nearest, 53-bit mode, exceptions masked
-    StubRoutines::x86::_fpu_cntrl_wrd_std   = 0x027F;
-    // Round to zero, 53-bit mode, exception mased
-    StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0x0D7F;
-    // Round to nearest, 24-bit mode, exceptions masked
-    StubRoutines::x86::_fpu_cntrl_wrd_24    = 0x007F;
-    // Round to nearest, 64-bit mode, exceptions masked, flags specialized
-    StubRoutines::x86::_mxcsr_std           = EnableX86ECoreOpts ? 0x1FBF : 0x1F80;
-    // Note: the following two constants are 80-bit values
-    //       layout is critical for correct loading by FPU.
-    // Bias for strict fp multiply/divide
-    StubRoutines::x86::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000
-    StubRoutines::x86::_fpu_subnormal_bias1[1]= 0x80000000;
-    StubRoutines::x86::_fpu_subnormal_bias1[2]= 0x03ff;
-    // Un-Bias for strict fp multiply/divide
-    StubRoutines::x86::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000
-    StubRoutines::x86::_fpu_subnormal_bias2[1]= 0x80000000;
-    StubRoutines::x86::_fpu_subnormal_bias2[2]= 0x7bff;
-  }
-
-  address generate_cont_thaw() {
-    if (!Continuations::enabled()) return nullptr;
-    Unimplemented();
-    return nullptr;
-  }
-
-  address generate_cont_returnBarrier() {
-    if (!Continuations::enabled()) return nullptr;
-    Unimplemented();
-    return nullptr;
-  }
-
-  address generate_cont_returnBarrier_exception() {
-    if (!Continuations::enabled()) return nullptr;
-    Unimplemented();
-    return nullptr;
-  }
-
-  //---------------------------------------------------------------------------
-  // Initialization
-
-  void generate_initial_stubs() {
-    // Generates all stubs and initializes the entry points
-
-    //------------------------------------------------------------------------------------------------------------------------
-    // entry points that exist in all platforms
-    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
-    //       the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
-    StubRoutines::_forward_exception_entry      = generate_forward_exception();
-
-    StubRoutines::_call_stub_entry              =
-      generate_call_stub(StubRoutines::_call_stub_return_address);
-    // is referenced by megamorphic call
-    StubRoutines::_catch_exception_entry        = generate_catch_exception();
-
-    // platform dependent
-    create_control_words();
-
-    // Initialize table for copy memory (arraycopy) check.
-    if (UnsafeMemoryAccess::_table == nullptr) {
-      UnsafeMemoryAccess::create_table(16 + 4); // 16 for copyMemory; 4 for setMemory
-    }
-
-    StubRoutines::x86::_verify_mxcsr_entry         = generate_verify_mxcsr();
-    StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd();
-    StubRoutines::x86::_d2i_wrapper                = generate_d2i_wrapper(T_INT,  CAST_FROM_FN_PTR(address, SharedRuntime::d2i));
-    StubRoutines::x86::_d2l_wrapper                = generate_d2i_wrapper(T_LONG, CAST_FROM_FN_PTR(address, SharedRuntime::d2l));
-
-    if (UseCRC32Intrinsics) {
-      // set table address before stub generation which use it
-      StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
-      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
-    }
-
-    if (UseCRC32CIntrinsics) {
-      bool supports_clmul = VM_Version::supports_clmul();
-      StubRoutines::x86::generate_CRC32C_table(supports_clmul);
-      StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
-      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
-    }
-    if (VM_Version::supports_sse2() && UseLibmIntrinsic && InlineIntrinsics) {
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
-        StubRoutines::_dexp = generate_libmExp();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
-        StubRoutines::_dlog = generate_libmLog();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) {
-        StubRoutines::_dlog10 = generate_libmLog10();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) {
-        StubRoutines::_dpow = generate_libmPow();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
-        vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
-        vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
-        StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
-        vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
-        StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
-        StubRoutines::_dsin = generate_libmSin();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
-        StubRoutines::_dcos = generate_libmCos();
-      }
-      if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
-        StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
-        StubRoutines::_dtan = generate_libmTan();
-      }
-    }
-  }
-
-  void generate_continuation_stubs() {
-    // Continuation stubs:
-    StubRoutines::_cont_thaw          = generate_cont_thaw();
-    StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier();
-    StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception();
-  }
-
-  void generate_final_stubs() {
-    // Generates all stubs and initializes the entry points
-
-    // support for verify_oop (must happen after universe_init)
-    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
-
-    // arraycopy stubs used by compilers
-    generate_arraycopy_stubs();
-
-    StubRoutines::_method_entry_barrier = generate_method_entry_barrier();
-  }
-
-  void generate_compiler_stubs() {
-#if COMPILER2_OR_JVMCI
-
-    // entry points that are C2/JVMCI specific
-
-    StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask(StubGenStubId::vector_float_sign_mask_id, 0x7FFFFFFF);
-    StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask(StubGenStubId::vector_float_sign_flip_id, 0x80000000);
-    StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double(StubGenStubId::vector_double_sign_mask_id, 0x7FFFFFFF, 0xFFFFFFFF);
-    StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double(StubGenStubId::vector_double_sign_flip_id, 0x80000000, 0x00000000);
-    StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask(StubGenStubId::vector_short_to_byte_mask_id, 0x00ff00ff);
-    StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask(StubGenStubId::vector_int_to_byte_mask_id, 0x000000ff);
-    StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask(StubGenStubId::vector_int_to_short_mask_id, 0x0000ffff);
-    StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32(StubGenStubId::vector_32_bit_mask_id, Assembler::AVX_512bit,
-                                                                        0xFFFFFFFF, 0, 0, 0);
-    StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32(StubGenStubId::vector_64_bit_mask_id, Assembler::AVX_512bit,
-                                                                        0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
-    StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask(StubGenStubId::vector_int_shuffle_mask_id, 0x03020100);
-    StubRoutines::x86::_vector_byte_shuffle_mask = generate_vector_byte_shuffle_mask();
-    StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask(StubGenStubId::vector_short_shuffle_mask_id, 0x01000100);
-    StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask_long_double(StubGenStubId::vector_long_shuffle_mask_id, 0x00000001, 0x0);
-    StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask();
-    StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double(StubGenStubId::vector_long_sign_mask_id, 0x80000000, 0x00000000);
-    StubRoutines::x86::_vector_all_bits_set = generate_vector_mask(StubGenStubId::vector_all_bits_set_id, 0xFFFFFFFF);
-    StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask(StubGenStubId::vector_int_mask_cmp_bits_id, 0x00000001);
-    StubRoutines::x86::_vector_iota_indices = generate_iota_indices();
-    StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut();
-    StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut();
-    StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long();
-    StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int();
-    StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short();
-
-    if (VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) {
-      // lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight.
-      StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut();
-    }
-
-    // don't bother generating these AES intrinsic stubs unless global flag is set
-    if (UseAESIntrinsics) {
-      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
-      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
-      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
-      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
-    }
-
-    if (UseAESCTRIntrinsics) {
-      StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
-    }
-
-    if (UseMD5Intrinsics) {
-      StubRoutines::_md5_implCompress = generate_md5_implCompress(StubGenStubId::md5_implCompress_id);
-      StubRoutines::_md5_implCompressMB = generate_md5_implCompress(StubGenStubId::md5_implCompressMB_id);
-    }
-    if (UseSHA1Intrinsics) {
-      StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
-      StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
-      StubRoutines::_sha1_implCompress = generate_sha1_implCompress(StubGenStubId::sha1_implCompress_id);
-      StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(StubGenStubId::sha1_implCompressMB_id);
-    }
-    if (UseSHA256Intrinsics) {
-      StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
-      StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
-      StubRoutines::_sha256_implCompress = generate_sha256_implCompress(StubGenStubId::sha256_implCompress_id);
-      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(StubGenStubId::sha256_implCompressMB_id);
-    }
-
-    // Generate GHASH intrinsics code
-    if (UseGHASHIntrinsics) {
-      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
-    }
-#endif // COMPILER2_OR_JVMCI
-  }
-
-
- public:
-  StubGenerator(CodeBuffer* code, StubGenBlobId blob_id) : StubCodeGenerator(code, blob_id) {
-    switch(blob_id) {
-    case initial_id:
-      generate_initial_stubs();
-      break;
-     case continuation_id:
-      generate_continuation_stubs();
-      break;
-    case compiler_id:
-      generate_compiler_stubs();
-      break;
-    case final_id:
-      generate_final_stubs();
-      break;
-    default:
-      fatal("unexpected blob id: %d", blob_id);
-      break;
-    };
-  }
-}; // end class declaration
-
-void StubGenerator_generate(CodeBuffer* code, StubGenBlobId blob_id) {
-  StubGenerator g(code, blob_id);
-}
diff --git a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp b/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp
deleted file mode 100644
index 810f421f141..00000000000
--- a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "runtime/deoptimization.hpp"
-#include "runtime/frame.inline.hpp"
-#include "runtime/javaThread.hpp"
-#include "runtime/stubRoutines.hpp"
-
-// Implementation of the platform-specific part of StubRoutines - for
-// a description of how to extend it, see the stubRoutines.hpp file.
-
-jint StubRoutines::x86::_fpu_cntrl_wrd_std   = 0;
-jint StubRoutines::x86::_fpu_cntrl_wrd_24    = 0;
-jint StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0;
-
-jint StubRoutines::x86::_mxcsr_std = 0;
-
-jint StubRoutines::x86::_fpu_subnormal_bias1[3] = { 0, 0, 0 };
-jint StubRoutines::x86::_fpu_subnormal_bias2[3] = { 0, 0, 0 };
-
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
deleted file mode 100644
index df8633bdd15..00000000000
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "asm/macroAssembler.hpp"
-#include "compiler/disassembler.hpp"
-#include "interpreter/interp_masm.hpp"
-#include "interpreter/interpreter.hpp"
-#include "interpreter/interpreterRuntime.hpp"
-#include "interpreter/templateInterpreterGenerator.hpp"
-#include "runtime/arguments.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "runtime/stubRoutines.hpp"
-
-#define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
-
-
-address TemplateInterpreterGenerator::generate_slow_signature_handler() {
-  address entry = __ pc();
-  // rbx,: method
-  // rcx: temporary
-  // rdi: pointer to locals
-  // rsp: end of copied parameters area
-  __ mov(rcx, rsp);
-  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), rbx, rdi, rcx);
-  __ ret(0);
-  return entry;
-}
-
-/**
- * Method entry for static native methods:
- *   int java.util.zip.CRC32.update(int crc, int b)
- */
-address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
-  assert(UseCRC32Intrinsics, "this intrinsic is not supported");
-  address entry = __ pc();
-
-  // rbx: Method*
-  // rsi: senderSP must preserved for slow path, set SP to it on fast path
-  // rdx: scratch
-  // rdi: scratch
-
-  Label slow_path;
-  // If we need a safepoint check, generate full interpreter entry.
-  __ get_thread(rdi);
-  __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
-
-  // We don't generate local frame and don't align stack because
-  // we call stub code and there is no safepoint on this path.
-
-  // Load parameters
-  const Register crc = rax;  // crc
-  const Register val = rdx;  // source java byte value
-  const Register tbl = rdi;  // scratch
-
-  // Arguments are reversed on java expression stack
-  __ movl(val, Address(rsp,   wordSize)); // byte value
-  __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
-
-  __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
-  __ notl(crc); // ~crc
-  __ update_byte_crc32(crc, val, tbl);
-  __ notl(crc); // ~crc
-  // result in rax
-
-  // _areturn
-  __ pop(rdi);                // get return address
-  __ mov(rsp, rsi);           // set sp to sender sp
-  __ jmp(rdi);
-
-  // generate a vanilla native entry as the slow path
-  __ bind(slow_path);
-  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
-  return entry;
-}
-
-/**
- * Method entry for static native methods:
- *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
- *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
- */
-address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-  assert(UseCRC32Intrinsics, "this intrinsic is not supported");
-  address entry = __ pc();
-
-  // rbx,: Method*
-  // rsi: senderSP must preserved for slow path, set SP to it on fast path
-  // rdx: scratch
-  // rdi: scratch
-
-  Label slow_path;
-  // If we need a safepoint check, generate full interpreter entry.
-  __ get_thread(rdi);
-  __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
-
-  // We don't generate local frame and don't align stack because
-  // we call stub code and there is no safepoint on this path.
-
-  // Load parameters
-  const Register crc = rax;  // crc
-  const Register buf = rdx;  // source java byte array address
-  const Register len = rdi;  // length
-
-  // value              x86_32
-  // interp. arg ptr    ESP + 4
-  // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
-  //                                         3           2      1        0
-  // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
-  //                                              4         2,3      1        0
-
-  // Arguments are reversed on java expression stack
-  __ movl(len,   Address(rsp,   4 + 0)); // Length
-  // Calculate address of start element
-  if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
-    __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf
-    __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
-    __ movl(crc,   Address(rsp, 4 + 4 * wordSize)); // Initial CRC
-  } else {
-    __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
-    __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
-    __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
-    __ movl(crc,   Address(rsp, 4 + 3 * wordSize)); // Initial CRC
-  }
-
-  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
-  // result in rax
-
-  // _areturn
-  __ pop(rdi);                // get return address
-  __ mov(rsp, rsi);           // set sp to sender sp
-  __ jmp(rdi);
-
-  // generate a vanilla native entry as the slow path
-  __ bind(slow_path);
-  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
-  return entry;
-}
-
-/**
-* Method entry for static native methods:
-*   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
-*   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
-*/
-address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-  assert(UseCRC32CIntrinsics, "this intrinsic is not supported");
-  address entry = __ pc();
-  // Load parameters
-  const Register crc = rax;  // crc
-  const Register buf = rcx;  // source java byte array address
-  const Register len = rdx;  // length
-  const Register end = len;
-
-  // value              x86_32
-  // interp. arg ptr    ESP + 4
-  // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end)
-  //                                         3           2      1        0
-  // int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end)
-  //                                              4         2,3          1        0
-
-  // Arguments are reversed on java expression stack
-  __ movl(end, Address(rsp, 4 + 0)); // end
-  __ subl(len, Address(rsp, 4 + 1 * wordSize));  // end - offset == length
-  // Calculate address of start element
-  if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
-    __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address
-    __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
-    __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
-  } else {
-    __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
-    __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
-    __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
-    __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
-  }
-  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
-  // result in rax
-  // _areturn
-  __ pop(rdi);                // get return address
-  __ mov(rsp, rsi);           // set sp to sender sp
-  __ jmp(rdi);
-
-  return entry;
-}
-
-/**
- * Method entry for static native method:
- *    java.lang.Float.intBitsToFloat(int bits)
- */
-address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() {
-  if (UseSSE >= 1) {
-    address entry = __ pc();
-
-    // rsi: the sender's SP
-
-    // Skip safepoint check (compiler intrinsic versions of this method
-    // do not perform safepoint checks either).
-
-    // Load 'bits' into xmm0 (interpreter returns results in xmm0)
-    __ movflt(xmm0, Address(rsp, wordSize));
-
-    // Return
-    __ pop(rdi); // get return address
-    __ mov(rsp, rsi); // set rsp to the sender's SP
-    __ jmp(rdi);
-    return entry;
-  }
-
-  return nullptr;
-}
-
-/**
- * Method entry for static native method:
- *    java.lang.Float.floatToRawIntBits(float value)
- */
-address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
-  if (UseSSE >= 1) {
-    address entry = __ pc();
-
-    // rsi: the sender's SP
-
-    // Skip safepoint check (compiler intrinsic versions of this method
-    // do not perform safepoint checks either).
-
-    // Load the parameter (a floating-point value) into rax.
-    __ movl(rax, Address(rsp, wordSize));
-
-    // Return
-    __ pop(rdi); // get return address
-    __ mov(rsp, rsi); // set rsp to the sender's SP
-    __ jmp(rdi);
-    return entry;
-  }
-
-  return nullptr;
-}
-
-
-/**
- * Method entry for static native method:
- *    java.lang.Double.longBitsToDouble(long bits)
- */
-address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() {
-   if (UseSSE >= 2) {
-     address entry = __ pc();
-
-     // rsi: the sender's SP
-
-     // Skip safepoint check (compiler intrinsic versions of this method
-     // do not perform safepoint checks either).
-
-     // Load 'bits' into xmm0 (interpreter returns results in xmm0)
-     __ movdbl(xmm0, Address(rsp, wordSize));
-
-     // Return
-     __ pop(rdi); // get return address
-     __ mov(rsp, rsi); // set rsp to the sender's SP
-     __ jmp(rdi);
-     return entry;
-   }
-
-   return nullptr;
-}
-
-/**
- * Method entry for static native method:
- *    java.lang.Double.doubleToRawLongBits(double value)
- */
-address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() {
-  if (UseSSE >= 2) {
-    address entry = __ pc();
-
-    // rsi: the sender's SP
-
-    // Skip safepoint check (compiler intrinsic versions of this method
-    // do not perform safepoint checks either).
-
-    // Load the parameter (a floating-point value) into rax.
-    __ movl(rdx, Address(rsp, 2*wordSize));
-    __ movl(rax, Address(rsp, wordSize));
-
-    // Return
-    __ pop(rdi); // get return address
-    __ mov(rsp, rsi); // set rsp to the sender's SP
-    __ jmp(rdi);
-    return entry;
-  }
-
-  return nullptr;
-}
-
-/**
- * Method entry for static method:
- *    java.lang.Float.float16ToFloat(short floatBinary16)
- */
-address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() {
-  assert(VM_Version::supports_float16(), "this intrinsic is not supported");
-  address entry = __ pc();
-
-  // rsi: the sender's SP
-
-  // Load value into xmm0 and convert
-  __ movswl(rax, Address(rsp, wordSize));
-  __ flt16_to_flt(xmm0, rax);
-
-  // Return
-  __ pop(rdi); // get return address
-  __ mov(rsp, rsi); // set rsp to the sender's SP
-  __ jmp(rdi);
-  return entry;
-}
-
-/**
- * Method entry for static method:
- *    java.lang.Float.floatToFloat16(float value)
- */
-address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() {
-  assert(VM_Version::supports_float16(), "this intrinsic is not supported");
-  address entry = __ pc();
-
-  // rsi: the sender's SP
-
-  // Load value into xmm0, convert and put result into rax
-  __ movflt(xmm0, Address(rsp, wordSize));
-  __ flt_to_flt16(rax, xmm0, xmm1);
-
-  // Return
-  __ pop(rdi); // get return address
-  __ mov(rsp, rsi); // set rsp to the sender's SP
-  __ jmp(rdi);
-  return entry;
-}
-
-address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
-
-  // rbx,: Method*
-  // rcx: scratrch
-  // rsi: sender sp
-
-  address entry_point = __ pc();
-
-  // These don't need a safepoint check because they aren't virtually
-  // callable. We won't enter these intrinsics from compiled code.
-  // If in the future we added an intrinsic which was virtually callable
-  // we'd have to worry about how to safepoint so that this code is used.
-
-  // mathematical functions inlined by compiler
-  // (interpreter must provide identical implementation
-  // in order to avoid monotonicity bugs when switching
-  // from interpreter to compiler in the middle of some
-  // computation)
-  //
-  // stack: [ ret adr ] <-- rsp
-  //        [ lo(arg) ]
-  //        [ hi(arg) ]
-  //
-  if (kind == Interpreter::java_lang_math_tanh) {
-    return nullptr;
-  }
-
-  if (kind == Interpreter::java_lang_math_fmaD) {
-    if (!UseFMA) {
-      return nullptr; // Generate a vanilla entry
-    }
-    __ movdbl(xmm2, Address(rsp, 5 * wordSize));
-    __ movdbl(xmm1, Address(rsp, 3 * wordSize));
-    __ movdbl(xmm0, Address(rsp, 1 * wordSize));
-    __ fmad(xmm0, xmm1, xmm2, xmm0);
-    __ pop(rdi);                               // get return address
-    __ mov(rsp, rsi);                          // set sp to sender sp
-    __ jmp(rdi);
-
-    return entry_point;
-  } else if (kind == Interpreter::java_lang_math_fmaF) {
-    if (!UseFMA) {
-      return nullptr; // Generate a vanilla entry
-    }
-    __ movflt(xmm2, Address(rsp, 3 * wordSize));
-    __ movflt(xmm1, Address(rsp, 2 * wordSize));
-    __ movflt(xmm0, Address(rsp, 1 * wordSize));
-    __ fmaf(xmm0, xmm1, xmm2, xmm0);
-    __ pop(rdi);                               // get return address
-    __ mov(rsp, rsi);                          // set sp to sender sp
-    __ jmp(rdi);
-
-    return entry_point;
- }
-
-  __ fld_d(Address(rsp, 1*wordSize));
-  switch (kind) {
-    case Interpreter::java_lang_math_sin :
-        __ subptr(rsp, 2 * wordSize);
-        __ fstp_d(Address(rsp, 0));
-        if (VM_Version::supports_sse2() && StubRoutines::dsin() != nullptr) {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
-        } else {
-          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
-        }
-        __ addptr(rsp, 2 * wordSize);
-        break;
-    case Interpreter::java_lang_math_cos :
-        __ subptr(rsp, 2 * wordSize);
-        __ fstp_d(Address(rsp, 0));
-        if (VM_Version::supports_sse2() && StubRoutines::dcos() != nullptr) {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
-        } else {
-          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
-        }
-        __ addptr(rsp, 2 * wordSize);
-        break;
-    case Interpreter::java_lang_math_tan :
-        __ subptr(rsp, 2 * wordSize);
-        __ fstp_d(Address(rsp, 0));
-        if (StubRoutines::dtan() != nullptr) {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
-        } else {
-          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
-        }
-        __ addptr(rsp, 2 * wordSize);
-        break;
-    case Interpreter::java_lang_math_sqrt:
-        __ fsqrt();
-        break;
-    case Interpreter::java_lang_math_abs:
-        __ fabs();
-        break;
-    case Interpreter::java_lang_math_log:
-        __ subptr(rsp, 2 * wordSize);
-        __ fstp_d(Address(rsp, 0));
-        if (StubRoutines::dlog() != nullptr) {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
-        } else {
-          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
-        }
-        __ addptr(rsp, 2 * wordSize);
-        break;
-    case Interpreter::java_lang_math_log10:
-        __ subptr(rsp, 2 * wordSize);
-        __ fstp_d(Address(rsp, 0));
-        if (StubRoutines::dlog10() != nullptr) {
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
-        } else {
-          __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
-        }
-        __ addptr(rsp, 2 * wordSize);
-        break;
-    case Interpreter::java_lang_math_pow:
-      __ fld_d(Address(rsp, 3*wordSize)); // second argument
-      __ subptr(rsp, 4 * wordSize);
-      __ fstp_d(Address(rsp, 0));
-      __ fstp_d(Address(rsp, 2 * wordSize));
-      if (StubRoutines::dpow() != nullptr) {
-        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
-      } else {
-        __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
-      }
-      __ addptr(rsp, 4 * wordSize);
-      break;
-    case Interpreter::java_lang_math_exp:
-      __ subptr(rsp, 2*wordSize);
-      __ fstp_d(Address(rsp, 0));
-      if (StubRoutines::dexp() != nullptr) {
-        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
-      } else {
-        __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
-      }
-      __ addptr(rsp, 2*wordSize);
-    break;
-    default                              :
-        ShouldNotReachHere();
-  }
-
-  // return double result in xmm0 for interpreter and compilers.
-  if (UseSSE >= 2) {
-    __ subptr(rsp, 2*wordSize);
-    __ fstp_d(Address(rsp, 0));
-    __ movdbl(xmm0, Address(rsp, 0));
-    __ addptr(rsp, 2*wordSize);
-  }
-
-  // done, result in FPU ST(0) or XMM0
-  __ pop(rdi);                               // get return address
-  __ mov(rsp, rsi);                          // set sp to sender sp
-  __ jmp(rdi);
-
-  return entry_point;
-}
-
-// Not supported
-address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; }
-
diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
deleted file mode 100644
index 6dd2ddd5874..00000000000
--- a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "prims/upcallLinker.hpp"
-
-address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
-                                       BasicType* out_sig_bt, int total_out_args,
-                                       BasicType ret_type,
-                                       jobject jabi, jobject jconv,
-                                       bool needs_return_buffer, int ret_buf_size) {
-  ShouldNotCallThis();
-  return nullptr;
-}
diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp
deleted file mode 100644
index 3e70a45b58b..00000000000
--- a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "asm/macroAssembler.hpp"
-#include "code/compiledIC.hpp"
-#include "code/vtableStubs.hpp"
-#include "interp_masm_x86.hpp"
-#include "memory/resourceArea.hpp"
-#include "oops/instanceKlass.hpp"
-#include "oops/klassVtable.hpp"
-#include "runtime/sharedRuntime.hpp"
-#include "vmreg_x86.inline.hpp"
-#ifdef COMPILER2
-#include "opto/runtime.hpp"
-#endif
-
-// machine-dependent part of VtableStubs: create VtableStub of correct size and
-// initialize its code
-
-#define __ masm->
-
-#ifndef PRODUCT
-extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
-#endif
-
-// These stubs are used by the compiler only.
-// Argument registers, which must be preserved:
-//   rcx - receiver (always first argument)
-//   rdx - second argument (if any)
-// Other registers that might be usable:
-//   rax - inline cache register (is interface for itable stub)
-//   rbx - method (used when calling out to interpreter)
-// Available now, but may become callee-save at some point:
-//   rsi, rdi
-// Note that rax and rdx are also used for return values.
-
-VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
-  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
-  const int stub_code_length = code_size_limit(true);
-  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
-  // Can be null if there is no free space in the code cache.
-  if (s == nullptr) {
-    return nullptr;
-  }
-
-  // Count unused bytes in instruction sequences of variable size.
-  // We add them to the computed buffer size in order to avoid
-  // overflow in subsequently generated stubs.
-  address   start_pc;
-  int       slop_bytes = 0;
-  int       slop_delta = 0;
-  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
-  const int index_dependent_slop     = 0;
-
-  ResourceMark    rm;
-  CodeBuffer      cb(s->entry_point(), stub_code_length);
-  MacroAssembler* masm = new MacroAssembler(&cb);
-
-#if (!defined(PRODUCT) && defined(COMPILER2))
-  if (CountCompiledCalls) {
-    __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-  }
-#endif
-
-  // get receiver (need to skip return address on top of stack)
-  assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx");
-
-  // get receiver klass
-  address npe_addr = __ pc();
-  __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
-
-#ifndef PRODUCT
-  if (DebugVtables) {
-    Label L;
-    start_pc = __ pc();
-    // check offset vs vtable length
-    __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
-    slop_delta  = 10 - (__ pc() - start_pc);  // cmpl varies in length, depending on data
-    slop_bytes += slop_delta;
-    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
-
-    __ jcc(Assembler::greater, L);
-    __ movl(rbx, vtable_index);
-    // VTABLE TODO: find upper bound for call_VM length.
-    start_pc = __ pc();
-    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx);
-    slop_delta  = 500 - (__ pc() - start_pc);
-    slop_bytes += slop_delta;
-    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
-    __ bind(L);
-  }
-#endif // PRODUCT
-
-  const Register method = rbx;
-
-  // load Method* and target address
-  start_pc = __ pc();
-  __ lookup_virtual_method(rax, vtable_index, method);
-  slop_delta  = 6 - (int)(__ pc() - start_pc);
-  slop_bytes += slop_delta;
-  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
-
-#ifndef PRODUCT
-  if (DebugVtables) {
-    Label L;
-    __ cmpptr(method, NULL_WORD);
-    __ jcc(Assembler::equal, L);
-    __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD);
-    __ jcc(Assembler::notZero, L);
-    __ stop("Vtable entry is null");
-    __ bind(L);
-  }
-#endif // PRODUCT
-
-  // rax: receiver klass
-  // method (rbx): Method*
-  // rcx: receiver
-  address ame_addr = __ pc();
-  __ jmp( Address(method, Method::from_compiled_offset()));
-
-  masm->flush();
-  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
-  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
-
-  return s;
-}
-
-
-VtableStub* VtableStubs::create_itable_stub(int itable_index) {
-  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
-  const int stub_code_length = code_size_limit(false);
-  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
-  // Can be null if there is no free space in the code cache.
-  if (s == nullptr) {
-    return nullptr;
-  }
-  // Count unused bytes in instruction sequences of variable size.
-  // We add them to the computed buffer size in order to avoid
-  // overflow in subsequently generated stubs.
-  address   start_pc;
-  int       slop_bytes = 0;
-  int       slop_delta = 0;
-  const int index_dependent_slop = (itable_index == 0) ? 4 :     // code size change with transition from 8-bit to 32-bit constant (@index == 32).
-                                   (itable_index < 32) ? 3 : 0;  // index == 0 generates even shorter code.
-
-  ResourceMark    rm;
-  CodeBuffer      cb(s->entry_point(), stub_code_length);
-  MacroAssembler* masm = new MacroAssembler(&cb);
-
-#if (!defined(PRODUCT) && defined(COMPILER2))
-  if (CountCompiledCalls) {
-    __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-  }
-#endif /* PRODUCT */
-
-  // Entry arguments:
-  //  rax: CompiledICData
-  //  rcx: Receiver
-
-  // Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi
-  // (If we need to make rsi, rdi callee-save, do a push/pop here.)
-  const Register recv_klass_reg     = rsi;
-  const Register holder_klass_reg   = rax; // declaring interface klass (DEFC)
-  const Register resolved_klass_reg = rdi; // resolved interface klass (REFC)
-  const Register temp_reg           = rdx;
-  const Register method             = rbx;
-  const Register icdata_reg         = rax;
-  const Register receiver           = rcx;
-
-  __ movptr(resolved_klass_reg, Address(icdata_reg, CompiledICData::itable_refc_klass_offset()));
-  __ movptr(holder_klass_reg,   Address(icdata_reg, CompiledICData::itable_defc_klass_offset()));
-
-  Label L_no_such_interface;
-
-  // get receiver klass (also an implicit null-check)
-  assert(VtableStub::receiver_location() ==  rcx->as_VMReg(), "receiver expected in  rcx");
-  address npe_addr = __ pc();
-  __ load_klass(recv_klass_reg, rcx, noreg);
-
-  start_pc = __ pc();
-  __ push(rdx); // temp_reg
-
-  // Receiver subtype check against REFC.
-  // Get selected method from declaring class and itable index
-  __ lookup_interface_method_stub(recv_klass_reg, // input
-                                  holder_klass_reg, // input
-                                  resolved_klass_reg, // input
-                                  method, // output
-                                  temp_reg,
-                                  noreg,
-                                  receiver, // input (x86_32 only: to restore recv_klass value)
-                                  itable_index,
-                                  L_no_such_interface);
-  const ptrdiff_t  lookupSize = __ pc() - start_pc;
-
-  // We expect we need index_dependent_slop extra bytes. Reason:
-  // The emitted code in lookup_interface_method changes when itable_index exceeds 31.
-  // For windows, a narrow estimate was found to be 104. Other OSes not tested.
-  const ptrdiff_t estimate = 104;
-  const ptrdiff_t codesize = lookupSize + index_dependent_slop;
-  slop_delta  = (int)(estimate - codesize);
-  slop_bytes += slop_delta;
-  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
-
-  // method (rbx): Method*
-  // rcx: receiver
-
-#ifdef ASSERT
-  if (DebugVtables) {
-    Label L1;
-    __ cmpptr(method, NULL_WORD);
-    __ jcc(Assembler::equal, L1);
-    __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD);
-    __ jcc(Assembler::notZero, L1);
-    __ stop("Method* is null");
-    __ bind(L1);
-  }
-#endif // ASSERT
-
-  __ pop(rdx);
-  address ame_addr = __ pc();
-  __ jmp(Address(method, Method::from_compiled_offset()));
-
-  __ bind(L_no_such_interface);
-  // Handle IncompatibleClassChangeError in itable stubs.
-  // More detailed error message.
-  // We force resolving of the call site by jumping to the "handle
-  // wrong method" stub, and so let the interpreter runtime do all the
-  // dirty work.
-  __ pop(rdx);
-  __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
-
-  masm->flush();
-  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
-  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
-
-  return s;
-}
-
-int VtableStub::pd_code_alignment() {
-  // x86 cache line size is 64 bytes, but we want to limit alignment loss.
-  const unsigned int icache_line_size = wordSize;
-  return icache_line_size;
-}
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
deleted file mode 100644
index 0b8dee7392a..00000000000
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ /dev/null
@@ -1,13702 +0,0 @@
-//
-// Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-//
-// This code is free software; you can redistribute it and/or modify it
-// under the terms of the GNU General Public License version 2 only, as
-// published by the Free Software Foundation.
-//
-// This code is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-// version 2 for more details (a copy is included in the LICENSE file that
-// accompanied this code).
-//
-// You should have received a copy of the GNU General Public License version
-// 2 along with this work; if not, write to the Free Software Foundation,
-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-//
-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-// or visit www.oracle.com if you need additional information or have any
-// questions.
-//
-//
-
-// X86 Architecture Description File
-
-//----------REGISTER DEFINITION BLOCK------------------------------------------
-// This information is used by the matcher and the register allocator to
-// describe individual registers and classes of registers within the target
-// architecture.
-
-register %{
-//----------Architecture Description Register Definitions----------------------
-// General Registers
-// "reg_def"  name ( register save type, C convention save type,
-//                   ideal register type, encoding );
-// Register Save Types:
-//
-// NS  = No-Save:       The register allocator assumes that these registers
-//                      can be used without saving upon entry to the method, &
-//                      that they do not need to be saved at call sites.
-//
-// SOC = Save-On-Call:  The register allocator assumes that these registers
-//                      can be used without saving upon entry to the method,
-//                      but that they must be saved at call sites.
-//
-// SOE = Save-On-Entry: The register allocator assumes that these registers
-//                      must be saved before using them upon entry to the
-//                      method, but they do not need to be saved at call
-//                      sites.
-//
-// AS  = Always-Save:   The register allocator assumes that these registers
-//                      must be saved before using them upon entry to the
-//                      method, & that they must be saved at call sites.
-//
-// Ideal Register Type is used to determine how to save & restore a
-// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
-// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
-//
-// The encoding number is the actual bit-pattern placed into the opcodes.
-
-// General Registers
-// Previously set EBX, ESI, and EDI as save-on-entry for java code
-// Turn off SOE in java-code due to frequent use of uncommon-traps.
-// Now that allocator is better, turn on ESI and EDI as SOE registers.
-
-reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
-reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
-reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
-reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
-// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
-reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
-reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
-reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
-reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
-
-// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
-// allocator, and only shows up in the encodings.
-reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
-reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
-// Ok so here's the trick FPR1 is really st(0) except in the midst
-// of emission of assembly for a machnode. During the emission the fpu stack
-// is pushed making FPR1 == st(1) temporarily. However at any safepoint
-// the stack will not have this element so FPR1 == st(0) from the
-// oopMap viewpoint. This same weirdness with numbering causes
-// instruction encoding to have to play games with the register
-// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
-// where it does flt->flt moves to see an example
-//
-reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
-reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
-reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
-reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
-reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
-reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
-reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
-reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
-reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
-reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
-reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
-reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
-reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
-reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
-//
-// Empty fill registers, which are never used, but supply alignment to xmm regs
-//
-reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
-reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
-reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
-reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
-reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
-reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
-reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
-reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
-
-// Specify priority of register selection within phases of register
-// allocation.  Highest priority is first.  A useful heuristic is to
-// give registers a low priority when they are required by machine
-// instructions, like EAX and EDX.  Registers which are used as
-// pairs must fall on an even boundary (witness the FPR#L's in this list).
-// For the Intel integer registers, the equivalent Long pairs are
-// EDX:EAX, EBX:ECX, and EDI:EBP.
-alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
-                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
-                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
-                    FPR6L, FPR6H, FPR7L, FPR7H,
-                    FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
-
-
-//----------Architecture Description Register Classes--------------------------
-// Several register classes are automatically defined based upon information in
-// this architecture description.
-// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
-// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
-//
-// Class for no registers (empty set).
-reg_class no_reg();
-
-// Class for all registers
-reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
-// Class for all registers (excluding EBP)
-reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
-// Dynamic register class that selects at runtime between register classes
-// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
-// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
-reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
-
-// Class for general registers
-reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
-// Class for general registers (excluding EBP).
-// It is also safe for use by tailjumps (we don't want to allocate in ebp).
-// Used also if the PreserveFramePointer flag is true.
-reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
-// Dynamic register class that selects between int_reg and int_reg_no_ebp.
-reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
-
-// Class of "X" registers
-reg_class int_x_reg(EBX, ECX, EDX, EAX);
-
-// Class of registers that can appear in an address with no offset.
-// EBP and ESP require an extra instruction byte for zero offset.
-// Used in fast-unlock
-reg_class p_reg(EDX, EDI, ESI, EBX);
-
-// Class for general registers excluding ECX
-reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
-// Class for general registers excluding ECX (and EBP)
-reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
-// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
-reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
-
-// Class for general registers excluding EAX
-reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
-
-// Class for general registers excluding EAX and EBX.
-reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
-// Class for general registers excluding EAX and EBX (and EBP)
-reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
-// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
-reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
-
-// Class of EAX (for multiply and divide operations)
-reg_class eax_reg(EAX);
-
-// Class of EBX (for atomic add)
-reg_class ebx_reg(EBX);
-
-// Class of ECX (for shift and JCXZ operations and cmpLTMask)
-reg_class ecx_reg(ECX);
-
-// Class of EDX (for multiply and divide operations)
-reg_class edx_reg(EDX);
-
-// Class of EDI (for synchronization)
-reg_class edi_reg(EDI);
-
-// Class of ESI (for synchronization)
-reg_class esi_reg(ESI);
-
-// Singleton class for stack pointer
-reg_class sp_reg(ESP);
-
-// Singleton class for instruction pointer
-// reg_class ip_reg(EIP);
-
-// Class of integer register pairs
-reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
-// Class of integer register pairs (excluding EBP and EDI);
-reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
-// Dynamic register class that selects between long_reg and long_reg_no_ebp.
-reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
-
-// Class of integer register pairs that aligns with calling convention
-reg_class eadx_reg( EAX,EDX );
-reg_class ebcx_reg( ECX,EBX );
-reg_class ebpd_reg( EBP,EDI );
-
-// Not AX or DX, used in divides
-reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
-// Not AX or DX (and neither EBP), used in divides
-reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
-// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
-reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
-
-// Floating point registers.  Notice FPR0 is not a choice.
-// FPR0 is not ever allocated; we use clever encodings to fake
-// a 2-address instructions out of Intels FP stack.
-reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
-
-reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
-                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
-                      FPR7L,FPR7H );
-
-reg_class fp_flt_reg0( FPR1L );
-reg_class fp_dbl_reg0( FPR1L,FPR1H );
-reg_class fp_dbl_reg1( FPR2L,FPR2H );
-reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
-                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
-
-%}
-
-
-//----------SOURCE BLOCK-------------------------------------------------------
-// This is a block of C++ code which provides values, functions, and
-// definitions necessary in the rest of the architecture description
-source_hpp %{
-// Must be visible to the DFA in dfa_x86_32.cpp
-extern bool is_operand_hi32_zero(Node* n);
-%}
-
-source %{
-#define   RELOC_IMM32    Assembler::imm_operand
-#define   RELOC_DISP32   Assembler::disp32_operand
-
-#define __ masm->
-
-// How to find the high register of a Long pair, given the low register
-#define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
-#define   HIGH_FROM_LOW_ENC(x) ((x)+2)
-
-// These masks are used to provide 128-bit aligned bitmasks to the XMM
-// instructions, to allow sign-masking or sign-bit flipping.  They allow
-// fast versions of NegF/NegD and AbsF/AbsD.
-
-void reg_mask_init() {}
-
-// Note: 'double' and 'long long' have 32-bits alignment on x86.
-static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
-  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
-  // of 128-bits operands for SSE instructions.
-  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
-  // Store the value to a 128-bits operand.
-  operand[0] = lo;
-  operand[1] = hi;
-  return operand;
-}
-
-// Buffer for 128-bits masks used by SSE instructions.
-static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
-
-// Static initialization during VM startup.
-static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
-static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
-static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
-static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
-
-// Offset hacking within calls.
-static int pre_call_resets_size() {
-  int size = 0;
-  Compile* C = Compile::current();
-  if (C->in_24_bit_fp_mode()) {
-    size += 6; // fldcw
-  }
-  if (VM_Version::supports_vzeroupper()) {
-    size += 3; // vzeroupper
-  }
-  return size;
-}
-
-// !!!!! Special hack to get all type of calls to specify the byte offset
-//       from the start of the call to the point where the return address
-//       will point.
-int MachCallStaticJavaNode::ret_addr_offset() {
-  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
-}
-
-int MachCallDynamicJavaNode::ret_addr_offset() {
-  return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
-}
-
-static int sizeof_FFree_Float_Stack_All = -1;
-
-int MachCallRuntimeNode::ret_addr_offset() {
-  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
-  return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
-}
-
-//
-// Compute padding required for nodes which need alignment
-//
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
-  current_offset += pre_call_resets_size();  // skip fldcw, if any
-  current_offset += 1;      // skip call opcode byte
-  return align_up(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
-int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
-  current_offset += pre_call_resets_size();  // skip fldcw, if any
-  current_offset += 5;      // skip MOV instruction
-  current_offset += 1;      // skip call opcode byte
-  return align_up(current_offset, alignment_required()) - current_offset;
-}
-
-// EMIT_RM()
-void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
-  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
-  __ emit_int8(c);
-}
-
-// EMIT_CC()
-void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
-  unsigned char c = (unsigned char)( f1 | f2 );
-  __ emit_int8(c);
-}
-
-// EMIT_OPCODE()
-void emit_opcode(C2_MacroAssembler *masm, int code) {
-  __ emit_int8((unsigned char) code);
-}
-
-// EMIT_OPCODE() w/ relocation information
-void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
-  __ relocate(__ inst_mark() + offset, reloc);
-  emit_opcode(masm, code);
-}
-
-// EMIT_D8()
-void emit_d8(C2_MacroAssembler *masm, int d8) {
-  __ emit_int8((unsigned char) d8);
-}
-
-// EMIT_D16()
-void emit_d16(C2_MacroAssembler *masm, int d16) {
-  __ emit_int16(d16);
-}
-
-// EMIT_D32()
-void emit_d32(C2_MacroAssembler *masm, int d32) {
-  __ emit_int32(d32);
-}
-
-// emit 32 bit value and construct relocation entry from relocInfo::relocType
-void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
-        int format) {
-  __ relocate(__ inst_mark(), reloc, format);
-  __ emit_int32(d32);
-}
-
-// emit 32 bit value and construct relocation entry from RelocationHolder
-void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
-        int format) {
-#ifdef ASSERT
-  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
-    assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
-  }
-#endif
-  __ relocate(__ inst_mark(), rspec, format);
-  __ emit_int32(d32);
-}
-
-// Access stack slot for load or store
-void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
-  emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
-  if( -128 <= disp && disp <= 127 ) {
-    emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
-    emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
-    emit_d8 (masm, disp);     // Displacement  // R/M byte
-  } else {
-    emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
-    emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
-    emit_d32(masm, disp);     // Displacement  // R/M byte
-  }
-}
-
-   // rRegI ereg, memory mem) %{    // emit_reg_mem
-void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
-  // There is no index & no scale, use form without SIB byte
-  if ((index == 0x4) &&
-      (scale == 0) && (base != ESP_enc)) {
-    // If no displacement, mode is 0x0; unless base is [EBP]
-    if ( (displace == 0) && (base != EBP_enc) ) {
-      emit_rm(masm, 0x0, reg_encoding, base);
-    }
-    else {                    // If 8-bit displacement, mode 0x1
-      if ((displace >= -128) && (displace <= 127)
-          && (disp_reloc == relocInfo::none) ) {
-        emit_rm(masm, 0x1, reg_encoding, base);
-        emit_d8(masm, displace);
-      }
-      else {                  // If 32-bit displacement
-        if (base == -1) { // Special flag for absolute address
-          emit_rm(masm, 0x0, reg_encoding, 0x5);
-          // (manual lies; no SIB needed here)
-          if ( disp_reloc != relocInfo::none ) {
-            emit_d32_reloc(masm, displace, disp_reloc, 1);
-          } else {
-            emit_d32      (masm, displace);
-          }
-        }
-        else {                // Normal base + offset
-          emit_rm(masm, 0x2, reg_encoding, base);
-          if ( disp_reloc != relocInfo::none ) {
-            emit_d32_reloc(masm, displace, disp_reloc, 1);
-          } else {
-            emit_d32      (masm, displace);
-          }
-        }
-      }
-    }
-  }
-  else {                      // Else, encode with the SIB byte
-    // If no displacement, mode is 0x0; unless base is [EBP]
-    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
-      emit_rm(masm, 0x0, reg_encoding, 0x4);
-      emit_rm(masm, scale, index, base);
-    }
-    else {                    // If 8-bit displacement, mode 0x1
-      if ((displace >= -128) && (displace <= 127)
-          && (disp_reloc == relocInfo::none) ) {
-        emit_rm(masm, 0x1, reg_encoding, 0x4);
-        emit_rm(masm, scale, index, base);
-        emit_d8(masm, displace);
-      }
-      else {                  // If 32-bit displacement
-        if (base == 0x04 ) {
-          emit_rm(masm, 0x2, reg_encoding, 0x4);
-          emit_rm(masm, scale, index, 0x04);
-        } else {
-          emit_rm(masm, 0x2, reg_encoding, 0x4);
-          emit_rm(masm, scale, index, base);
-        }
-        if ( disp_reloc != relocInfo::none ) {
-          emit_d32_reloc(masm, displace, disp_reloc, 1);
-        } else {
-          emit_d32      (masm, displace);
-        }
-      }
-    }
-  }
-}
-
-
-void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
-  if( dst_encoding == src_encoding ) {
-    // reg-reg copy, use an empty encoding
-  } else {
-    emit_opcode( masm, 0x8B );
-    emit_rm(masm, 0x3, dst_encoding, src_encoding );
-  }
-}
-
-void emit_cmpfp_fixup(MacroAssembler* masm) {
-  Label exit;
-  __ jccb(Assembler::noParity, exit);
-  __ pushf();
-  //
-  // comiss/ucomiss instructions set ZF,PF,CF flags and
-  // zero OF,AF,SF for NaN values.
-  // Fixup flags by zeroing ZF,PF so that compare of NaN
-  // values returns 'less than' result (CF is set).
-  // Leave the rest of flags unchanged.
-  //
-  //    7 6 5 4 3 2 1 0
-  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
-  //    0 0 1 0 1 0 1 1   (0x2B)
-  //
-  __ andl(Address(rsp, 0), 0xffffff2b);
-  __ popf();
-  __ bind(exit);
-}
-
-static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
-  Label done;
-  __ movl(dst, -1);
-  __ jcc(Assembler::parity, done);
-  __ jcc(Assembler::below, done);
-  __ setb(Assembler::notEqual, dst);
-  __ movzbl(dst, dst);
-  __ bind(done);
-}
-
-
-//=============================================================================
-const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
-
-int ConstantTable::calculate_table_base_offset() const {
-  return 0;  // absolute addressing, no offset
-}
-
-bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
-void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
-  ShouldNotReachHere();
-}
-
-void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
-  // Empty encoding
-}
-
-uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
-  return 0;
-}
-
-#ifndef PRODUCT
-void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
-  st->print("# MachConstantBaseNode (empty encoding)");
-}
-#endif
-
-
-//=============================================================================
-#ifndef PRODUCT
-void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
-  Compile* C = ra_->C;
-
-  int framesize = C->output()->frame_size_in_bytes();
-  int bangsize = C->output()->bang_size_in_bytes();
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove wordSize for return addr which is already pushed.
-  framesize -= wordSize;
-
-  if (C->output()->need_stack_bang(bangsize)) {
-    framesize -= wordSize;
-    st->print("# stack bang (%d bytes)", bangsize);
-    st->print("\n\t");
-    st->print("PUSH   EBP\t# Save EBP");
-    if (PreserveFramePointer) {
-      st->print("\n\t");
-      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
-    }
-    if (framesize) {
-      st->print("\n\t");
-      st->print("SUB    ESP, #%d\t# Create frame",framesize);
-    }
-  } else {
-    st->print("SUB    ESP, #%d\t# Create frame",framesize);
-    st->print("\n\t");
-    framesize -= wordSize;
-    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
-    if (PreserveFramePointer) {
-      st->print("\n\t");
-      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
-      if (framesize > 0) {
-        st->print("\n\t");
-        st->print("ADD    EBP, #%d", framesize);
-      }
-    }
-  }
-
-  if (VerifyStackAtCalls) {
-    st->print("\n\t");
-    framesize -= wordSize;
-    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
-  }
-
-  if( C->in_24_bit_fp_mode() ) {
-    st->print("\n\t");
-    st->print("FLDCW  \t# load 24 bit fpu control word");
-  }
-  if (UseSSE >= 2 && VerifyFPU) {
-    st->print("\n\t");
-    st->print("# verify FPU stack (must be clean on entry)");
-  }
-
-#ifdef ASSERT
-  if (VerifyStackAtCalls) {
-    st->print("\n\t");
-    st->print("# stack alignment check");
-  }
-#endif
-  st->cr();
-}
-#endif
-
-
-void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
-  Compile* C = ra_->C;
-
-  int framesize = C->output()->frame_size_in_bytes();
-  int bangsize = C->output()->bang_size_in_bytes();
-
-  __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
-
-  C->output()->set_frame_complete(__ offset());
-
-  if (C->has_mach_constant_base_node()) {
-    // NOTE: We set the table base offset here because users might be
-    // emitted before MachConstantBaseNode.
-    ConstantTable& constant_table = C->output()->constant_table();
-    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
-  }
-}
-
-uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
-  return MachNode::size(ra_); // too many variables; just compute it the hard way
-}
-
-int MachPrologNode::reloc() const {
-  return 0; // a large enough number
-}
-
-//=============================================================================
-#ifndef PRODUCT
-void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
-  Compile *C = ra_->C;
-  int framesize = C->output()->frame_size_in_bytes();
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove two words for return addr and rbp,
-  framesize -= 2*wordSize;
-
-  if (C->max_vector_size() > 16) {
-    st->print("VZEROUPPER");
-    st->cr(); st->print("\t");
-  }
-  if (C->in_24_bit_fp_mode()) {
-    st->print("FLDCW  standard control word");
-    st->cr(); st->print("\t");
-  }
-  if (framesize) {
-    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
-    st->cr(); st->print("\t");
-  }
-  st->print_cr("POPL   EBP"); st->print("\t");
-  if (do_polling() && C->is_method_compilation()) {
-    st->print("CMPL    rsp, poll_offset[thread]  \n\t"
-              "JA      #safepoint_stub\t"
-              "# Safepoint: poll for GC");
-  }
-}
-#endif
-
-void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
-  Compile *C = ra_->C;
-
-  if (C->max_vector_size() > 16) {
-    // Clear upper bits of YMM registers when current compiled code uses
-    // wide vectors to avoid AVX <-> SSE transition penalty during call.
-    __ vzeroupper();
-  }
-  // If method set FPU control word, restore to standard control word
-  if (C->in_24_bit_fp_mode()) {
-    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-  }
-
-  int framesize = C->output()->frame_size_in_bytes();
-  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
-  // Remove two words for return addr and rbp,
-  framesize -= 2*wordSize;
-
-  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
-
-  if (framesize >= 128) {
-    emit_opcode(masm, 0x81); // add  SP, #framesize
-    emit_rm(masm, 0x3, 0x00, ESP_enc);
-    emit_d32(masm, framesize);
-  } else if (framesize) {
-    emit_opcode(masm, 0x83); // add  SP, #framesize
-    emit_rm(masm, 0x3, 0x00, ESP_enc);
-    emit_d8(masm, framesize);
-  }
-
-  emit_opcode(masm, 0x58 | EBP_enc);
-
-  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
-    __ reserved_stack_check();
-  }
-
-  if (do_polling() && C->is_method_compilation()) {
-    Register thread = as_Register(EBX_enc);
-    __ get_thread(thread);
-    Label dummy_label;
-    Label* code_stub = &dummy_label;
-    if (!C->output()->in_scratch_emit_size()) {
-      C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
-      C->output()->add_stub(stub);
-      code_stub = &stub->entry();
-    }
-    __ set_inst_mark();
-    __ relocate(relocInfo::poll_return_type);
-    __ clear_inst_mark();
-    __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
-  }
-}
-
-uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
-  return MachNode::size(ra_); // too many variables; just compute it
-                              // the hard way
-}
-
-int MachEpilogNode::reloc() const {
-  return 0; // a large enough number
-}
-
-const Pipeline * MachEpilogNode::pipeline() const {
-  return MachNode::pipeline_class();
-}
-
-//=============================================================================
-
-enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
-static enum RC rc_class( OptoReg::Name reg ) {
-
-  if( !OptoReg::is_valid(reg)  ) return rc_bad;
-  if (OptoReg::is_stack(reg)) return rc_stack;
-
-  VMReg r = OptoReg::as_VMReg(reg);
-  if (r->is_Register()) return rc_int;
-  if (r->is_FloatRegister()) {
-    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
-    return rc_float;
-  }
-  if (r->is_KRegister()) return rc_kreg;
-  assert(r->is_XMMRegister(), "must be");
-  return rc_xmm;
-}
-
-static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
-                        int opcode, const char *op_str, int size, outputStream* st ) {
-  if( masm ) {
-    masm->set_inst_mark();
-    emit_opcode  (masm, opcode );
-    encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
-    masm->clear_inst_mark();
-#ifndef PRODUCT
-  } else if( !do_size ) {
-    if( size != 0 ) st->print("\n\t");
-    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
-      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
-      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
-    } else { // FLD, FST, PUSH, POP
-      st->print("%s [ESP + #%d]",op_str,offset);
-    }
-#endif
-  }
-  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
-  return size+3+offset_size;
-}
-
-// Helper for XMM registers.  Extra opcode bits, limited syntax.
-static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
-                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
-  int in_size_in_bits = Assembler::EVEX_32bit;
-  int evex_encoding = 0;
-  if (reg_lo+1 == reg_hi) {
-    in_size_in_bits = Assembler::EVEX_64bit;
-    evex_encoding = Assembler::VEX_W;
-  }
-  if (masm) {
-    // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
-    //                          it maps more cases to single byte displacement
-    __ set_managed();
-    if (reg_lo+1 == reg_hi) { // double move?
-      if (is_load) {
-        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
-      } else {
-        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
-      }
-    } else {
-      if (is_load) {
-        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
-      } else {
-        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
-      }
-    }
-#ifndef PRODUCT
-  } else if (!do_size) {
-    if (size != 0) st->print("\n\t");
-    if (reg_lo+1 == reg_hi) { // double move?
-      if (is_load) st->print("%s %s,[ESP + #%d]",
-                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
-                              Matcher::regName[reg_lo], offset);
-      else         st->print("MOVSD  [ESP + #%d],%s",
-                              offset, Matcher::regName[reg_lo]);
-    } else {
-      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
-                              Matcher::regName[reg_lo], offset);
-      else         st->print("MOVSS  [ESP + #%d],%s",
-                              offset, Matcher::regName[reg_lo]);
-    }
-#endif
-  }
-  bool is_single_byte = false;
-  if ((UseAVX > 2) && (offset != 0)) {
-    is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
-  }
-  int offset_size = 0;
-  if (UseAVX > 2 ) {
-    offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
-  } else {
-    offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
-  }
-  size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
-  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
-  return size+5+offset_size;
-}
-
-
-static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
-                            int src_hi, int dst_hi, int size, outputStream* st ) {
-  if (masm) {
-    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
-    __ set_managed();
-    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
-      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
-                as_XMMRegister(Matcher::_regEncode[src_lo]));
-    } else {
-      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
-                as_XMMRegister(Matcher::_regEncode[src_lo]));
-    }
-#ifndef PRODUCT
-  } else if (!do_size) {
-    if (size != 0) st->print("\n\t");
-    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
-      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
-        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
-      } else {
-        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
-      }
-    } else {
-      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
-        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
-      } else {
-        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
-      }
-    }
-#endif
-  }
-  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
-  // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
-  int sz = (UseAVX > 2) ? 6 : 4;
-  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
-      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
-  return size + sz;
-}
-
-static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
-                            int src_hi, int dst_hi, int size, outputStream* st ) {
-  // 32-bit
-  if (masm) {
-    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
-    __ set_managed();
-    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
-             as_Register(Matcher::_regEncode[src_lo]));
-#ifndef PRODUCT
-  } else if (!do_size) {
-    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
-#endif
-  }
-  return (UseAVX> 2) ? 6 : 4;
-}
-
-
-static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
-                                 int src_hi, int dst_hi, int size, outputStream* st ) {
-  // 32-bit
-  if (masm) {
-    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
-    __ set_managed();
-    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
-             as_XMMRegister(Matcher::_regEncode[src_lo]));
-#ifndef PRODUCT
-  } else if (!do_size) {
-    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
-#endif
-  }
-  return (UseAVX> 2) ? 6 : 4;
-}
-
-static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
-  if( masm ) {
-    emit_opcode(masm, 0x8B );
-    emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
-#ifndef PRODUCT
-  } else if( !do_size ) {
-    if( size != 0 ) st->print("\n\t");
-    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
-#endif
-  }
-  return size+2;
-}
-
-static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
-                                 int offset, int size, outputStream* st ) {
-  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
-    if( masm ) {
-      emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
-      emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
-#ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      st->print("FLD    %s",Matcher::regName[src_lo]);
-#endif
-    }
-    size += 2;
-  }
-
-  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
-  const char *op_str;
-  int op;
-  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
-    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
-    op = 0xDD;
-  } else {                   // 32-bit store
-    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
-    op = 0xD9;
-    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
-  }
-
-  return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
-}
-
-// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
-static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
-                          int src_hi, int dst_hi, uint ireg, outputStream* st);
-
-void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
-                            int stack_offset, int reg, uint ireg, outputStream* st);
-
-static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
-                                     int dst_offset, uint ireg, outputStream* st) {
-  if (masm) {
-    switch (ireg) {
-    case Op_VecS:
-      __ pushl(Address(rsp, src_offset));
-      __ popl (Address(rsp, dst_offset));
-      break;
-    case Op_VecD:
-      __ pushl(Address(rsp, src_offset));
-      __ popl (Address(rsp, dst_offset));
-      __ pushl(Address(rsp, src_offset+4));
-      __ popl (Address(rsp, dst_offset+4));
-      break;
-    case Op_VecX:
-      __ movdqu(Address(rsp, -16), xmm0);
-      __ movdqu(xmm0, Address(rsp, src_offset));
-      __ movdqu(Address(rsp, dst_offset), xmm0);
-      __ movdqu(xmm0, Address(rsp, -16));
-      break;
-    case Op_VecY:
-      __ vmovdqu(Address(rsp, -32), xmm0);
-      __ vmovdqu(xmm0, Address(rsp, src_offset));
-      __ vmovdqu(Address(rsp, dst_offset), xmm0);
-      __ vmovdqu(xmm0, Address(rsp, -32));
-      break;
-    case Op_VecZ:
-      __ evmovdquq(Address(rsp, -64), xmm0, 2);
-      __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
-      __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
-      __ evmovdquq(xmm0, Address(rsp, -64), 2);
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-#ifndef PRODUCT
-  } else {
-    switch (ireg) {
-    case Op_VecS:
-      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
-                "popl    [rsp + #%d]",
-                src_offset, dst_offset);
-      break;
-    case Op_VecD:
-      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
-                "popq    [rsp + #%d]\n\t"
-                "pushl   [rsp + #%d]\n\t"
-                "popq    [rsp + #%d]",
-                src_offset, dst_offset, src_offset+4, dst_offset+4);
-      break;
-     case Op_VecX:
-      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
-                "movdqu  xmm0, [rsp + #%d]\n\t"
-                "movdqu  [rsp + #%d], xmm0\n\t"
-                "movdqu  xmm0, [rsp - #16]",
-                src_offset, dst_offset);
-      break;
-    case Op_VecY:
-      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
-                "vmovdqu xmm0, [rsp + #%d]\n\t"
-                "vmovdqu [rsp + #%d], xmm0\n\t"
-                "vmovdqu xmm0, [rsp - #32]",
-                src_offset, dst_offset);
-      break;
-    case Op_VecZ:
-      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
-                "vmovdqu xmm0, [rsp + #%d]\n\t"
-                "vmovdqu [rsp + #%d], xmm0\n\t"
-                "vmovdqu xmm0, [rsp - #64]",
-                src_offset, dst_offset);
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-#endif
-  }
-}
-
-uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
-  // Get registers to move
-  OptoReg::Name src_second = ra_->get_reg_second(in(1));
-  OptoReg::Name src_first = ra_->get_reg_first(in(1));
-  OptoReg::Name dst_second = ra_->get_reg_second(this );
-  OptoReg::Name dst_first = ra_->get_reg_first(this );
-
-  enum RC src_second_rc = rc_class(src_second);
-  enum RC src_first_rc = rc_class(src_first);
-  enum RC dst_second_rc = rc_class(dst_second);
-  enum RC dst_first_rc = rc_class(dst_first);
-
-  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
-
-  // Generate spill code!
-  int size = 0;
-
-  if( src_first == dst_first && src_second == dst_second )
-    return size;            // Self copy, no move
-
-  if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
-    uint ireg = ideal_reg();
-    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
-    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
-    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
-    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
-      // mem -> mem
-      int src_offset = ra_->reg2offset(src_first);
-      int dst_offset = ra_->reg2offset(dst_first);
-      vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
-    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
-      vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
-    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
-      int stack_offset = ra_->reg2offset(dst_first);
-      vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
-    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
-      int stack_offset = ra_->reg2offset(src_first);
-      vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
-    } else {
-      ShouldNotReachHere();
-    }
-    return 0;
-  }
-
-  // --------------------------------------
-  // Check for mem-mem move.  push/pop to move.
-  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
-    if( src_second == dst_first ) { // overlapping stack copy ranges
-      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
-      size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
-      size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
-      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
-    }
-    // move low bits
-    size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
-    size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
-    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
-      size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
-      size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
-    }
-    return size;
-  }
-
-  // --------------------------------------
-  // Check for integer reg-reg copy
-  if( src_first_rc == rc_int && dst_first_rc == rc_int )
-    size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
-
-  // Check for integer store
-  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
-    size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
-
-  // Check for integer load
-  if( src_first_rc == rc_stack && dst_first_rc == rc_int )
-    size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
-
-  // Check for integer reg-xmm reg copy
-  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
-    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
-            "no 64 bit integer-float reg moves" );
-    return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
-  }
-  // --------------------------------------
-  // Check for float reg-reg copy
-  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
-    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
-            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
-    if( masm ) {
-
-      // Note the mucking with the register encode to compensate for the 0/1
-      // indexing issue mentioned in a comment in the reg_def sections
-      // for FPR registers many lines above here.
-
-      if( src_first != FPR1L_num ) {
-        emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
-        emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
-        emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
-        emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
-     } else {
-        emit_opcode  (masm, 0xDD );           // FST    ST(i)
-        emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
-     }
-#ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
-      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
-#endif
-    }
-    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
-  }
-
-  // Check for float store
-  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
-    return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
-  }
-
-  // Check for float load
-  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
-    int offset = ra_->reg2offset(src_first);
-    const char *op_str;
-    int op;
-    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
-      op_str = "FLD_D";
-      op = 0xDD;
-    } else {                   // 32-bit load
-      op_str = "FLD_S";
-      op = 0xD9;
-      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
-    }
-    if( masm ) {
-      masm->set_inst_mark();
-      emit_opcode  (masm, op );
-      encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
-      emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
-      emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
-      masm->clear_inst_mark();
-#ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
-#endif
-    }
-    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
-    return size + 3+offset_size+2;
-  }
-
-  // Check for xmm reg-reg copy
-  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
-    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
-            (src_first+1 == src_second && dst_first+1 == dst_second),
-            "no non-adjacent float-moves" );
-    return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
-  }
-
-  // Check for xmm reg-integer reg copy
-  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
-    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
-            "no 64 bit float-integer reg moves" );
-    return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
-  }
-
-  // Check for xmm store
-  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
-    return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
-  }
-
-  // Check for float xmm load
-  if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
-    return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
-  }
-
-  // Copy from float reg to xmm reg
-  if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
-    // copy to the top of stack from floating point reg
-    // and use LEA to preserve flags
-    if( masm ) {
-      emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
-      emit_rm(masm, 0x1, ESP_enc, 0x04);
-      emit_rm(masm, 0x0, 0x04, ESP_enc);
-      emit_d8(masm,0xF8);
-#ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      st->print("LEA    ESP,[ESP-8]");
-#endif
-    }
-    size += 4;
-
-    size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
-
-    // Copy from the temp memory to the xmm reg.
-    size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
-
-    if( masm ) {
-      emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
-      emit_rm(masm, 0x1, ESP_enc, 0x04);
-      emit_rm(masm, 0x0, 0x04, ESP_enc);
-      emit_d8(masm,0x08);
-#ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      st->print("LEA    ESP,[ESP+8]");
-#endif
-    }
-    size += 4;
-    return size;
-  }
-
-  // AVX-512 opmask specific spilling.
-  if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
-    assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
-    assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
-    int offset = ra_->reg2offset(src_first);
-    if (masm != nullptr) {
-      __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
-#ifndef PRODUCT
-    } else {
-      st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
-#endif
-    }
-    return 0;
-  }
-
-  if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
-    assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
-    assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
-    int offset = ra_->reg2offset(dst_first);
-    if (masm != nullptr) {
-      __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
-#ifndef PRODUCT
-    } else {
-      st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
-#endif
-    }
-    return 0;
-  }
-
-  if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
-    Unimplemented();
-    return 0;
-  }
-
-  if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
-    Unimplemented();
-    return 0;
-  }
-
-  if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
-    assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
-    assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
-    if (masm != nullptr) {
-      __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
-#ifndef PRODUCT
-    } else {
-      st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
-#endif
-    }
-    return 0;
-  }
-
-  assert( size > 0, "missed a case" );
-
-  // --------------------------------------------------------------------
-  // Check for second bits still needing moving.
-  if( src_second == dst_second )
-    return size;               // Self copy; no move
-  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
-
-  // Check for second word int-int move
-  if( src_second_rc == rc_int && dst_second_rc == rc_int )
-    return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
-
-  // Check for second word integer store
-  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
-    return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
-
-  // Check for second word integer load
-  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
-    return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
-
-  Unimplemented();
-  return 0; // Mute compiler
-}
-
-#ifndef PRODUCT
-void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
-  implementation( nullptr, ra_, false, st );
-}
-#endif
-
-void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
-  implementation( masm, ra_, false, nullptr );
-}
-
-uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
-  return MachNode::size(ra_);
-}
-
-
-//=============================================================================
-#ifndef PRODUCT
-void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
-  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-  int reg = ra_->get_reg_first(this);
-  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
-}
-#endif
-
-void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
-  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-  int reg = ra_->get_encode(this);
-  if( offset >= 128 ) {
-    emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
-    emit_rm(masm, 0x2, reg, 0x04);
-    emit_rm(masm, 0x0, 0x04, ESP_enc);
-    emit_d32(masm, offset);
-  }
-  else {
-    emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
-    emit_rm(masm, 0x1, reg, 0x04);
-    emit_rm(masm, 0x0, 0x04, ESP_enc);
-    emit_d8(masm, offset);
-  }
-}
-
-uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
-  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
-  if( offset >= 128 ) {
-    return 7;
-  }
-  else {
-    return 4;
-  }
-}
-
-//=============================================================================
-#ifndef PRODUCT
-void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
-  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
-  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
-  st->print_cr("\tNOP");
-  st->print_cr("\tNOP");
-  if( !OptoBreakpoint )
-    st->print_cr("\tNOP");
-}
-#endif
-
-void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
-  __ ic_check(CodeEntryAlignment);
-}
-
-uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
-  return MachNode::size(ra_); // too many variables; just compute it
-                              // the hard way
-}
-
-
-//=============================================================================
-
-// Vector calling convention not supported.
-bool Matcher::supports_vector_calling_convention() {
-  return false;
-}
-
-OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
-  Unimplemented();
-  return OptoRegPair(0, 0);
-}
-
-// Is this branch offset short enough that a short branch can be used?
-//
-// NOTE: If the platform does not provide any short branch variants, then
-//       this method should return false for offset 0.
-bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
-  // The passed offset is relative to address of the branch.
-  // On 86 a branch displacement is calculated relative to address
-  // of a next instruction.
-  offset -= br_size;
-
-  // the short version of jmpConUCF2 contains multiple branches,
-  // making the reach slightly less
-  if (rule == jmpConUCF2_rule)
-    return (-126 <= offset && offset <= 125);
-  return (-128 <= offset && offset <= 127);
-}
-
-// Return whether or not this register is ever used as an argument.  This
-// function is used on startup to build the trampoline stubs in generateOptoStub.
-// Registers not mentioned will be killed by the VM call in the trampoline, and
-// arguments in those registers not be available to the callee.
-bool Matcher::can_be_java_arg( int reg ) {
-  if(  reg == ECX_num   || reg == EDX_num   ) return true;
-  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
-  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
-  return false;
-}
-
-bool Matcher::is_spillable_arg( int reg ) {
-  return can_be_java_arg(reg);
-}
-
-uint Matcher::int_pressure_limit()
-{
-  return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
-}
-
-uint Matcher::float_pressure_limit()
-{
-  return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
-}
-
-bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
-  // Use hardware integer DIV instruction when
-  // it is faster than a code which use multiply.
-  // Only when constant divisor fits into 32 bit
-  // (min_jint is excluded to get only correct
-  // positive 32 bit values from negative).
-  return VM_Version::has_fast_idiv() &&
-         (divisor == (int)divisor && divisor != min_jint);
-}
-
-// Register for DIVI projection of divmodI
-RegMask Matcher::divI_proj_mask() {
-  return EAX_REG_mask();
-}
-
-// Register for MODI projection of divmodI
-RegMask Matcher::modI_proj_mask() {
-  return EDX_REG_mask();
-}
-
-// Register for DIVL projection of divmodL
-RegMask Matcher::divL_proj_mask() {
-  ShouldNotReachHere();
-  return RegMask();
-}
-
-// Register for MODL projection of divmodL
-RegMask Matcher::modL_proj_mask() {
-  ShouldNotReachHere();
-  return RegMask();
-}
-
-const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return NO_REG_mask();
-}
-
-// Returns true if the high 32 bits of the value is known to be zero.
-bool is_operand_hi32_zero(Node* n) {
-  int opc = n->Opcode();
-  if (opc == Op_AndL) {
-    Node* o2 = n->in(2);
-    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
-      return true;
-    }
-  }
-  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
-    return true;
-  }
-  return false;
-}
-
-%}
-
-//----------ENCODING BLOCK-----------------------------------------------------
-// This block specifies the encoding classes used by the compiler to output
-// byte streams.  Encoding classes generate functions which are called by
-// Machine Instruction Nodes in order to generate the bit encoding of the
-// instruction.  Operands specify their base encoding interface with the
-// interface keyword.  There are currently supported four interfaces,
-// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
-// operand to generate a function which returns its register number when
-// queried.   CONST_INTER causes an operand to generate a function which
-// returns the value of the constant when queried.  MEMORY_INTER causes an
-// operand to generate four functions which return the Base Register, the
-// Index Register, the Scale Value, and the Offset Value of the operand when
-// queried.  COND_INTER causes an operand to generate six functions which
-// return the encoding code (ie - encoding bits for the instruction)
-// associated with each basic boolean condition for a conditional instruction.
-// Instructions specify two basic values for encoding.  They use the
-// ins_encode keyword to specify their encoding class (which must be one of
-// the class names specified in the encoding block), and they use the
-// opcode keyword to specify, in order, their primary, secondary, and
-// tertiary opcode.  Only the opcode sections which a particular instruction
-// needs for encoding need to be specified.
-encode %{
-  // Build emit functions for each basic byte or larger field in the intel
-  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
-  // code in the enc_class source block.  Emit functions will live in the
-  // main source block for now.  In future, we can generalize this by
-  // adding a syntax that specifies the sizes of fields in an order,
-  // so that the adlc can build the emit functions automagically
-
-  // Set instruction mark in MacroAssembler. This is used only in
-  // instructions that emit bytes directly to the CodeBuffer wraped
-  // in the MacroAssembler. Should go away once all "instruct" are
-  // patched to emit bytes only using methods in MacroAssembler.
-  enc_class SetInstMark %{
-    __ set_inst_mark();
-  %}
-
-  enc_class ClearInstMark %{
-    __ clear_inst_mark();
-  %}
-
-  // Emit primary opcode
-  enc_class OpcP %{
-    emit_opcode(masm, $primary);
-  %}
-
-  // Emit secondary opcode
-  enc_class OpcS %{
-    emit_opcode(masm, $secondary);
-  %}
-
-  // Emit opcode directly
-  enc_class Opcode(immI d8) %{
-    emit_opcode(masm, $d8$$constant);
-  %}
-
-  enc_class SizePrefix %{
-    emit_opcode(masm,0x66);
-  %}
-
-  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
-    emit_opcode(masm,$opcode$$constant);
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class mov_r32_imm0( rRegI dst ) %{
-    emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
-    emit_d32   ( masm, 0x0  );             //                         imm32==0x0
-  %}
-
-  enc_class cdq_enc %{
-    // Full implementation of Java idiv and irem; checks for
-    // special case as described in JVM spec., p.243 & p.271.
-    //
-    //         normal case                           special case
-    //
-    // input : rax,: dividend                         min_int
-    //         reg: divisor                          -1
-    //
-    // output: rax,: quotient  (= rax, idiv reg)       min_int
-    //         rdx: remainder (= rax, irem reg)       0
-    //
-    //  Code sequnce:
-    //
-    //  81 F8 00 00 00 80    cmp         rax,80000000h
-    //  0F 85 0B 00 00 00    jne         normal_case
-    //  33 D2                xor         rdx,edx
-    //  83 F9 FF             cmp         rcx,0FFh
-    //  0F 84 03 00 00 00    je          done
-    //                  normal_case:
-    //  99                   cdq
-    //  F7 F9                idiv        rax,ecx
-    //                  done:
-    //
-    emit_opcode(masm,0x81); emit_d8(masm,0xF8);
-    emit_opcode(masm,0x00); emit_d8(masm,0x00);
-    emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
-    emit_opcode(masm,0x0F); emit_d8(masm,0x85);
-    emit_opcode(masm,0x0B); emit_d8(masm,0x00);
-    emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
-    emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
-    emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
-    emit_opcode(masm,0x0F); emit_d8(masm,0x84);
-    emit_opcode(masm,0x03); emit_d8(masm,0x00);
-    emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
-    // normal_case:
-    emit_opcode(masm,0x99);                                         // cdq
-    // idiv (note: must be emitted by the user of this rule)
-    // normal:
-  %}
-
-  // Dense encoding for older common ops
-  enc_class Opc_plus(immI opcode, rRegI reg) %{
-    emit_opcode(masm, $opcode$$constant + $reg$$reg);
-  %}
-
-
-  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
-  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
-    // Check for 8-bit immediate, and set sign extend bit in opcode
-    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
-      emit_opcode(masm, $primary | 0x02);
-    }
-    else {                          // If 32-bit immediate
-      emit_opcode(masm, $primary);
-    }
-  %}
-
-  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
-    // Emit primary opcode and set sign-extend bit
-    // Check for 8-bit immediate, and set sign extend bit in opcode
-    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
-      emit_opcode(masm, $primary | 0x02);    }
-    else {                          // If 32-bit immediate
-      emit_opcode(masm, $primary);
-    }
-    // Emit r/m byte with secondary opcode, after primary opcode.
-    emit_rm(masm, 0x3, $secondary, $dst$$reg);
-  %}
-
-  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
-    // Check for 8-bit immediate, and set sign extend bit in opcode
-    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
-      $$$emit8$imm$$constant;
-    }
-    else {                          // If 32-bit immediate
-      // Output immediate
-      $$$emit32$imm$$constant;
-    }
-  %}
-
-  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
-    // Emit primary opcode and set sign-extend bit
-    // Check for 8-bit immediate, and set sign extend bit in opcode
-    int con = (int)$imm$$constant; // Throw away top bits
-    emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
-    // Emit r/m byte with secondary opcode, after primary opcode.
-    emit_rm(masm, 0x3, $secondary, $dst$$reg);
-    if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
-    else                               emit_d32(masm,con);
-  %}
-
-  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
-    // Emit primary opcode and set sign-extend bit
-    // Check for 8-bit immediate, and set sign extend bit in opcode
-    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
-    emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
-    // Emit r/m byte with tertiary opcode, after primary opcode.
-    emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
-    if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
-    else                               emit_d32(masm,con);
-  %}
-
-  enc_class OpcSReg (rRegI dst) %{    // BSWAP
-    emit_cc(masm, $secondary, $dst$$reg );
-  %}
-
-  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
-    int destlo = $dst$$reg;
-    int desthi = HIGH_FROM_LOW_ENC(destlo);
-    // bswap lo
-    emit_opcode(masm, 0x0F);
-    emit_cc(masm, 0xC8, destlo);
-    // bswap hi
-    emit_opcode(masm, 0x0F);
-    emit_cc(masm, 0xC8, desthi);
-    // xchg lo and hi
-    emit_opcode(masm, 0x87);
-    emit_rm(masm, 0x3, destlo, desthi);
-  %}
-
-  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
-    emit_rm(masm, 0x3, $secondary, $div$$reg );
-  %}
-
-  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
-    $$$emit8$primary;
-    emit_cc(masm, $secondary, $cop$$cmpcode);
-  %}
-
-  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
-    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
-    emit_d8(masm, op >> 8 );
-    emit_d8(masm, op & 255);
-  %}
-
-  // emulate a CMOV with a conditional branch around a MOV
-  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
-    // Invert sense of branch from sense of CMOV
-    emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
-    emit_d8( masm, $brOffs$$constant );
-  %}
-
-  enc_class enc_PartialSubtypeCheck( ) %{
-    Register Redi = as_Register(EDI_enc); // result register
-    Register Reax = as_Register(EAX_enc); // super class
-    Register Recx = as_Register(ECX_enc); // killed
-    Register Resi = as_Register(ESI_enc); // sub class
-    Label miss;
-
-    // NB: Callers may assume that, when $result is a valid register,
-    // check_klass_subtype_slow_path sets it to a nonzero value.
-     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
-                                     nullptr, &miss,
-                                     /*set_cond_codes:*/ true);
-    if ($primary) {
-      __ xorptr(Redi, Redi);
-    }
-    __ bind(miss);
-  %}
-
-  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
-    int start = __ offset();
-    if (UseSSE >= 2) {
-      if (VerifyFPU) {
-        __ verify_FPU(0, "must be empty in SSE2+ mode");
-      }
-    } else {
-      // External c_calling_convention expects the FPU stack to be 'clean'.
-      // Compiled code leaves it dirty.  Do cleanup now.
-      __ empty_FPU_stack();
-    }
-    if (sizeof_FFree_Float_Stack_All == -1) {
-      sizeof_FFree_Float_Stack_All = __ offset() - start;
-    } else {
-      assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
-    }
-  %}
-
-  enc_class Verify_FPU_For_Leaf %{
-    if( VerifyFPU ) {
-      __ verify_FPU( -3, "Returning from Runtime Leaf call");
-    }
-  %}
-
-  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
-    // This is the instruction starting address for relocation info.
-    __ set_inst_mark();
-    $$$emit8$primary;
-    // CALL directly to the runtime
-    emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
-                runtime_call_Relocation::spec(), RELOC_IMM32 );
-    __ clear_inst_mark();
-    __ post_call_nop();
-
-    if (UseSSE >= 2) {
-      BasicType rt = tf()->return_type();
-
-      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
-        // A C runtime call where the return value is unused.  In SSE2+
-        // mode the result needs to be removed from the FPU stack.  It's
-        // likely that this function call could be removed by the
-        // optimizer if the C function is a pure function.
-        __ ffree(0);
-      } else if (rt == T_FLOAT) {
-        __ lea(rsp, Address(rsp, -4));
-        __ fstp_s(Address(rsp, 0));
-        __ movflt(xmm0, Address(rsp, 0));
-        __ lea(rsp, Address(rsp,  4));
-      } else if (rt == T_DOUBLE) {
-        __ lea(rsp, Address(rsp, -8));
-        __ fstp_d(Address(rsp, 0));
-        __ movdbl(xmm0, Address(rsp, 0));
-        __ lea(rsp, Address(rsp,  8));
-      }
-    }
-  %}
-
-  enc_class pre_call_resets %{
-    // If method sets FPU control word restore it here
-    debug_only(int off0 = __ offset());
-    if (ra_->C->in_24_bit_fp_mode()) {
-      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-    }
-    // Clear upper bits of YMM registers when current compiled code uses
-    // wide vectors to avoid AVX <-> SSE transition penalty during call.
-    __ vzeroupper();
-    debug_only(int off1 = __ offset());
-    assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
-  %}
-
-  enc_class post_call_FPU %{
-    // If method sets FPU control word do it here also
-    if (Compile::current()->in_24_bit_fp_mode()) {
-      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
-    }
-  %}
-
-  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
-    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
-    // who we intended to call.
-    __ set_inst_mark();
-    $$$emit8$primary;
-
-    if (!_method) {
-      emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
-                     runtime_call_Relocation::spec(),
-                     RELOC_IMM32);
-      __ clear_inst_mark();
-      __ post_call_nop();
-    } else {
-      int method_index = resolved_method_index(masm);
-      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
-                                                  : static_call_Relocation::spec(method_index);
-      emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
-                     rspec, RELOC_DISP32);
-      __ post_call_nop();
-      address mark = __ inst_mark();
-      if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
-        // Calls of the same statically bound method can share
-        // a stub to the interpreter.
-        __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
-        __ clear_inst_mark();
-      } else {
-        // Emit stubs for static call.
-        address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
-        __ clear_inst_mark();
-        if (stub == nullptr) {
-          ciEnv::current()->record_failure("CodeCache is full");
-          return;
-        }
-      }
-    }
-  %}
-
-  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
-    __ ic_call((address)$meth$$method, resolved_method_index(masm));
-    __ post_call_nop();
-  %}
-
-  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
-    int disp = in_bytes(Method::from_compiled_offset());
-    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
-
-    // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
-    __ set_inst_mark();
-    $$$emit8$primary;
-    emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
-    emit_d8(masm, disp);             // Displacement
-    __ clear_inst_mark();
-    __ post_call_nop();
-  %}
-
-  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
-    $$$emit8$primary;
-    emit_rm(masm, 0x3, $secondary, $dst$$reg);
-    $$$emit8$shift$$constant;
-  %}
-
-  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
-    // Load immediate does not have a zero or sign extended version
-    // for 8-bit immediates
-    emit_opcode(masm, 0xB8 + $dst$$reg);
-    $$$emit32$src$$constant;
-  %}
-
-  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
-    // Load immediate does not have a zero or sign extended version
-    // for 8-bit immediates
-    emit_opcode(masm, $primary + $dst$$reg);
-    $$$emit32$src$$constant;
-  %}
-
-  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
-    // Load immediate does not have a zero or sign extended version
-    // for 8-bit immediates
-    int dst_enc = $dst$$reg;
-    int src_con = $src$$constant & 0x0FFFFFFFFL;
-    if (src_con == 0) {
-      // xor dst, dst
-      emit_opcode(masm, 0x33);
-      emit_rm(masm, 0x3, dst_enc, dst_enc);
-    } else {
-      emit_opcode(masm, $primary + dst_enc);
-      emit_d32(masm, src_con);
-    }
-  %}
-
-  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
-    // Load immediate does not have a zero or sign extended version
-    // for 8-bit immediates
-    int dst_enc = $dst$$reg + 2;
-    int src_con = ((julong)($src$$constant)) >> 32;
-    if (src_con == 0) {
-      // xor dst, dst
-      emit_opcode(masm, 0x33);
-      emit_rm(masm, 0x3, dst_enc, dst_enc);
-    } else {
-      emit_opcode(masm, $primary + dst_enc);
-      emit_d32(masm, src_con);
-    }
-  %}
-
-
-  // Encode a reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_Copy( rRegI dst, rRegI src ) %{
-    encode_Copy( masm, $dst$$reg, $src$$reg );
-  %}
-
-  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
-    encode_Copy( masm, $dst$$reg, $src$$reg );
-  %}
-
-  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
-    $$$emit8$primary;
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
-    $$$emit8$secondary;
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
-  %}
-
-  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
-  %}
-
-  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
-    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
-  %}
-
-  enc_class Con32 (immI src) %{    // Con32(storeImmI)
-    // Output immediate
-    $$$emit32$src$$constant;
-  %}
-
-  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
-    // Output Float immediate bits
-    jfloat jf = $src$$constant;
-    int    jf_as_bits = jint_cast( jf );
-    emit_d32(masm, jf_as_bits);
-  %}
-
-  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
-    // Output Float immediate bits
-    jfloat jf = $src$$constant;
-    int    jf_as_bits = jint_cast( jf );
-    emit_d32(masm, jf_as_bits);
-  %}
-
-  enc_class Con16 (immI src) %{    // Con16(storeImmI)
-    // Output immediate
-    $$$emit16$src$$constant;
-  %}
-
-  enc_class Con_d32(immI src) %{
-    emit_d32(masm,$src$$constant);
-  %}
-
-  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
-    // Output immediate memory reference
-    emit_rm(masm, 0x00, $t1$$reg, 0x05 );
-    emit_d32(masm, 0x00);
-  %}
-
-  enc_class lock_prefix( ) %{
-    emit_opcode(masm,0xF0);         // [Lock]
-  %}
-
-  // Cmp-xchg long value.
-  // Note: we need to swap rbx, and rcx before and after the
-  //       cmpxchg8 instruction because the instruction uses
-  //       rcx as the high order word of the new value to store but
-  //       our register encoding uses rbx,.
-  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
-
-    // XCHG  rbx,ecx
-    emit_opcode(masm,0x87);
-    emit_opcode(masm,0xD9);
-    // [Lock]
-    emit_opcode(masm,0xF0);
-    // CMPXCHG8 [Eptr]
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0xC7);
-    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
-    // XCHG  rbx,ecx
-    emit_opcode(masm,0x87);
-    emit_opcode(masm,0xD9);
-  %}
-
-  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
-    // [Lock]
-    emit_opcode(masm,0xF0);
-
-    // CMPXCHG [Eptr]
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0xB1);
-    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
-  %}
-
-  enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
-    // [Lock]
-    emit_opcode(masm,0xF0);
-
-    // CMPXCHGB [Eptr]
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0xB0);
-    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
-  %}
-
-  enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
-    // [Lock]
-    emit_opcode(masm,0xF0);
-
-    // 16-bit mode
-    emit_opcode(masm, 0x66);
-
-    // CMPXCHGW [Eptr]
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0xB1);
-    emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
-  %}
-
-  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
-    int res_encoding = $res$$reg;
-
-    // MOV  res,0
-    emit_opcode( masm, 0xB8 + res_encoding);
-    emit_d32( masm, 0 );
-    // JNE,s  fail
-    emit_opcode(masm,0x75);
-    emit_d8(masm, 5 );
-    // MOV  res,1
-    emit_opcode( masm, 0xB8 + res_encoding);
-    emit_d32( masm, 1 );
-    // fail:
-  %}
-
-  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
-    int reg_encoding = $ereg$$reg;
-    int base  = $mem$$base;
-    int index = $mem$$index;
-    int scale = $mem$$scale;
-    int displace = $mem$$disp;
-    relocInfo::relocType disp_reloc = $mem->disp_reloc();
-    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
-  %}
-
-  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
-    int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
-    int base  = $mem$$base;
-    int index = $mem$$index;
-    int scale = $mem$$scale;
-    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
-    assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
-    encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
-  %}
-
-  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
-    int r1, r2;
-    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
-    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,$tertiary);
-    emit_rm(masm, 0x3, r1, r2);
-    emit_d8(masm,$cnt$$constant);
-    emit_d8(masm,$primary);
-    emit_rm(masm, 0x3, $secondary, r1);
-    emit_d8(masm,$cnt$$constant);
-  %}
-
-  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
-    emit_opcode( masm, 0x8B ); // Move
-    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
-    if( $cnt$$constant > 32 ) { // Shift, if not by zero
-      emit_d8(masm,$primary);
-      emit_rm(masm, 0x3, $secondary, $dst$$reg);
-      emit_d8(masm,$cnt$$constant-32);
-    }
-    emit_d8(masm,$primary);
-    emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
-    emit_d8(masm,31);
-  %}
-
-  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
-    int r1, r2;
-    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
-    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
-
-    emit_opcode( masm, 0x8B ); // Move r1,r2
-    emit_rm(masm, 0x3, r1, r2);
-    if( $cnt$$constant > 32 ) { // Shift, if not by zero
-      emit_opcode(masm,$primary);
-      emit_rm(masm, 0x3, $secondary, r1);
-      emit_d8(masm,$cnt$$constant-32);
-    }
-    emit_opcode(masm,0x33);  // XOR r2,r2
-    emit_rm(masm, 0x3, r2, r2);
-  %}
-
-  // Clone of RegMem but accepts an extra parameter to access each
-  // half of a double in memory; it never needs relocation info.
-  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
-    emit_opcode(masm,$opcode$$constant);
-    int reg_encoding = $rm_reg$$reg;
-    int base     = $mem$$base;
-    int index    = $mem$$index;
-    int scale    = $mem$$scale;
-    int displace = $mem$$disp + $disp_for_half$$constant;
-    relocInfo::relocType disp_reloc = relocInfo::none;
-    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
-  %}
-
-  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
-  //
-  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
-  // and it never needs relocation information.
-  // Frequently used to move data between FPU's Stack Top and memory.
-  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
-    int rm_byte_opcode = $rm_opcode$$constant;
-    int base     = $mem$$base;
-    int index    = $mem$$index;
-    int scale    = $mem$$scale;
-    int displace = $mem$$disp;
-    assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
-    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
-  %}
-
-  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
-    int rm_byte_opcode = $rm_opcode$$constant;
-    int base     = $mem$$base;
-    int index    = $mem$$index;
-    int scale    = $mem$$scale;
-    int displace = $mem$$disp;
-    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
-    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
-  %}
-
-  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
-    int reg_encoding = $dst$$reg;
-    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
-    int index        = 0x04;            // 0x04 indicates no index
-    int scale        = 0x00;            // 0x00 indicates no scale
-    int displace     = $src1$$constant; // 0x00 indicates no displacement
-    relocInfo::relocType disp_reloc = relocInfo::none;
-    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
-  %}
-
-  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
-    // Compare dst,src
-    emit_opcode(masm,0x3B);
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-    // jmp dst < src around move
-    emit_opcode(masm,0x7C);
-    emit_d8(masm,2);
-    // move dst,src
-    emit_opcode(masm,0x8B);
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
-    // Compare dst,src
-    emit_opcode(masm,0x3B);
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-    // jmp dst > src around move
-    emit_opcode(masm,0x7F);
-    emit_d8(masm,2);
-    // move dst,src
-    emit_opcode(masm,0x8B);
-    emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
-  %}
-
-  enc_class enc_FPR_store(memory mem, regDPR src) %{
-    // If src is FPR1, we can just FST to store it.
-    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
-    int reg_encoding = 0x2; // Just store
-    int base  = $mem$$base;
-    int index = $mem$$index;
-    int scale = $mem$$scale;
-    int displace = $mem$$disp;
-    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
-    if( $src$$reg != FPR1L_enc ) {
-      reg_encoding = 0x3;  // Store & pop
-      emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
-      emit_d8( masm, 0xC0-1+$src$$reg );
-    }
-    __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
-    emit_opcode(masm,$primary);
-    encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
-    __ clear_inst_mark();
-  %}
-
-  enc_class neg_reg(rRegI dst) %{
-    // NEG $dst
-    emit_opcode(masm,0xF7);
-    emit_rm(masm, 0x3, 0x03, $dst$$reg );
-  %}
-
-  enc_class setLT_reg(eCXRegI dst) %{
-    // SETLT $dst
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0x9C);
-    emit_rm( masm, 0x3, 0x4, $dst$$reg );
-  %}
-
-  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
-    int tmpReg = $tmp$$reg;
-
-    // SUB $p,$q
-    emit_opcode(masm,0x2B);
-    emit_rm(masm, 0x3, $p$$reg, $q$$reg);
-    // SBB $tmp,$tmp
-    emit_opcode(masm,0x1B);
-    emit_rm(masm, 0x3, tmpReg, tmpReg);
-    // AND $tmp,$y
-    emit_opcode(masm,0x23);
-    emit_rm(masm, 0x3, tmpReg, $y$$reg);
-    // ADD $p,$tmp
-    emit_opcode(masm,0x03);
-    emit_rm(masm, 0x3, $p$$reg, tmpReg);
-  %}
-
-  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
-    // TEST shift,32
-    emit_opcode(masm,0xF7);
-    emit_rm(masm, 0x3, 0, ECX_enc);
-    emit_d32(masm,0x20);
-    // JEQ,s small
-    emit_opcode(masm, 0x74);
-    emit_d8(masm, 0x04);
-    // MOV    $dst.hi,$dst.lo
-    emit_opcode( masm, 0x8B );
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
-    // CLR    $dst.lo
-    emit_opcode(masm, 0x33);
-    emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
-// small:
-    // SHLD   $dst.hi,$dst.lo,$shift
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0xA5);
-    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
-    // SHL    $dst.lo,$shift"
-    emit_opcode(masm,0xD3);
-    emit_rm(masm, 0x3, 0x4, $dst$$reg );
-  %}
-
-  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
-    // TEST shift,32
-    emit_opcode(masm,0xF7);
-    emit_rm(masm, 0x3, 0, ECX_enc);
-    emit_d32(masm,0x20);
-    // JEQ,s small
-    emit_opcode(masm, 0x74);
-    emit_d8(masm, 0x04);
-    // MOV    $dst.lo,$dst.hi
-    emit_opcode( masm, 0x8B );
-    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
-    // CLR    $dst.hi
-    emit_opcode(masm, 0x33);
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
-// small:
-    // SHRD   $dst.lo,$dst.hi,$shift
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0xAD);
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
-    // SHR    $dst.hi,$shift"
-    emit_opcode(masm,0xD3);
-    emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
-  %}
-
-  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
-    // TEST shift,32
-    emit_opcode(masm,0xF7);
-    emit_rm(masm, 0x3, 0, ECX_enc);
-    emit_d32(masm,0x20);
-    // JEQ,s small
-    emit_opcode(masm, 0x74);
-    emit_d8(masm, 0x05);
-    // MOV    $dst.lo,$dst.hi
-    emit_opcode( masm, 0x8B );
-    emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
-    // SAR    $dst.hi,31
-    emit_opcode(masm, 0xC1);
-    emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
-    emit_d8(masm, 0x1F );
-// small:
-    // SHRD   $dst.lo,$dst.hi,$shift
-    emit_opcode(masm,0x0F);
-    emit_opcode(masm,0xAD);
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
-    // SAR    $dst.hi,$shift"
-    emit_opcode(masm,0xD3);
-    emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
-  %}
-
-
-  // ----------------- Encodings for floating point unit -----------------
-  // May leave result in FPU-TOS or FPU reg depending on opcodes
-  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
-    $$$emit8$primary;
-    emit_rm(masm, 0x3, $secondary, $src$$reg );
-  %}
-
-  // Pop argument in FPR0 with FSTP ST(0)
-  enc_class PopFPU() %{
-    emit_opcode( masm, 0xDD );
-    emit_d8( masm, 0xD8 );
-  %}
-
-  // !!!!! equivalent to Pop_Reg_F
-  enc_class Pop_Reg_DPR( regDPR dst ) %{
-    emit_opcode( masm, 0xDD );           // FSTP   ST(i)
-    emit_d8( masm, 0xD8+$dst$$reg );
-  %}
-
-  enc_class Push_Reg_DPR( regDPR dst ) %{
-    emit_opcode( masm, 0xD9 );
-    emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
-  %}
-
-  enc_class strictfp_bias1( regDPR dst ) %{
-    emit_opcode( masm, 0xDB );           // FLD m80real
-    emit_opcode( masm, 0x2D );
-    emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
-    emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
-    emit_opcode( masm, 0xC8+$dst$$reg );
-  %}
-
-  enc_class strictfp_bias2( regDPR dst ) %{
-    emit_opcode( masm, 0xDB );           // FLD m80real
-    emit_opcode( masm, 0x2D );
-    emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
-    emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
-    emit_opcode( masm, 0xC8+$dst$$reg );
-  %}
-
-  // Special case for moving an integer register to a stack slot.
-  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
-    store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
-  %}
-
-  // Special case for moving a register to a stack slot.
-  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
-    // Opcode already emitted
-    emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
-    emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
-    emit_d32(masm, $dst$$disp);   // Displacement
-  %}
-
-  // Push the integer in stackSlot 'src' onto FP-stack
-  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
-    store_to_stackslot( masm, $primary, $secondary, $src$$disp );
-  %}
-
-  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
-    store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
-  %}
-
-  // Same as Pop_Mem_F except for opcode
-  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
-    store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
-  %}
-
-  enc_class Pop_Reg_FPR( regFPR dst ) %{
-    emit_opcode( masm, 0xDD );           // FSTP   ST(i)
-    emit_d8( masm, 0xD8+$dst$$reg );
-  %}
-
-  enc_class Push_Reg_FPR( regFPR dst ) %{
-    emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
-    emit_d8( masm, 0xC0-1+$dst$$reg );
-  %}
-
-  // Push FPU's float to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
-    int pop = 0x02;
-    if ($src$$reg != FPR1L_enc) {
-      emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
-      emit_d8( masm, 0xC0-1+$src$$reg );
-      pop = 0x03;
-    }
-    store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
-  %}
-
-  // Push FPU's double to a stack-slot, and pop FPU-stack
-  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
-    int pop = 0x02;
-    if ($src$$reg != FPR1L_enc) {
-      emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
-      emit_d8( masm, 0xC0-1+$src$$reg );
-      pop = 0x03;
-    }
-    store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
-  %}
-
-  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
-  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
-    int pop = 0xD0 - 1; // -1 since we skip FLD
-    if ($src$$reg != FPR1L_enc) {
-      emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
-      emit_d8( masm, 0xC0-1+$src$$reg );
-      pop = 0xD8;
-    }
-    emit_opcode( masm, 0xDD );
-    emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
-  %}
-
-
-  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
-    // load dst in FPR0
-    emit_opcode( masm, 0xD9 );
-    emit_d8( masm, 0xC0-1+$dst$$reg );
-    if ($src$$reg != FPR1L_enc) {
-      // fincstp
-      emit_opcode (masm, 0xD9);
-      emit_opcode (masm, 0xF7);
-      // swap src with FPR1:
-      // FXCH FPR1 with src
-      emit_opcode(masm, 0xD9);
-      emit_d8(masm, 0xC8-1+$src$$reg );
-      // fdecstp
-      emit_opcode (masm, 0xD9);
-      emit_opcode (masm, 0xF6);
-    }
-  %}
-
-  enc_class Push_ResultD(regD dst) %{
-    __ fstp_d(Address(rsp, 0));
-    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
-    __ addptr(rsp, 8);
-  %}
-
-  enc_class Push_ResultF(regF dst, immI d8) %{
-    __ fstp_s(Address(rsp, 0));
-    __ movflt($dst$$XMMRegister, Address(rsp, 0));
-    __ addptr(rsp, $d8$$constant);
-  %}
-
-  enc_class Push_SrcD(regD src) %{
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_d(Address(rsp, 0));
-  %}
-
-  enc_class push_stack_temp_qword() %{
-    __ subptr(rsp, 8);
-  %}
-
-  enc_class pop_stack_temp_qword() %{
-    __ addptr(rsp, 8);
-  %}
-
-  enc_class push_xmm_to_fpr1(regD src) %{
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_d(Address(rsp, 0));
-  %}
-
-  enc_class fnstsw_sahf_skip_parity() %{
-    // fnstsw ax
-    emit_opcode( masm, 0xDF );
-    emit_opcode( masm, 0xE0 );
-    // sahf
-    emit_opcode( masm, 0x9E );
-    // jnp  ::skip
-    emit_opcode( masm, 0x7B );
-    emit_opcode( masm, 0x05 );
-  %}
-
-  enc_class fpu_flags() %{
-    // fnstsw_ax
-    emit_opcode( masm, 0xDF);
-    emit_opcode( masm, 0xE0);
-    // test ax,0x0400
-    emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
-    emit_opcode( masm, 0xA9 );
-    emit_d16   ( masm, 0x0400 );
-    // // // This sequence works, but stalls for 12-16 cycles on PPro
-    // // test rax,0x0400
-    // emit_opcode( masm, 0xA9 );
-    // emit_d32   ( masm, 0x00000400 );
-    //
-    // jz exit (no unordered comparison)
-    emit_opcode( masm, 0x74 );
-    emit_d8    ( masm, 0x02 );
-    // mov ah,1 - treat as LT case (set carry flag)
-    emit_opcode( masm, 0xB4 );
-    emit_d8    ( masm, 0x01 );
-    // sahf
-    emit_opcode( masm, 0x9E);
-  %}
-
-  enc_class cmpF_P6_fixup() %{
-    // Fixup the integer flags in case comparison involved a NaN
-    //
-    // JNP exit (no unordered comparison, P-flag is set by NaN)
-    emit_opcode( masm, 0x7B );
-    emit_d8    ( masm, 0x03 );
-    // MOV AH,1 - treat as LT case (set carry flag)
-    emit_opcode( masm, 0xB4 );
-    emit_d8    ( masm, 0x01 );
-    // SAHF
-    emit_opcode( masm, 0x9E);
-    // NOP     // target for branch to avoid branch to branch
-    emit_opcode( masm, 0x90);
-  %}
-
-//     fnstsw_ax();
-//     sahf();
-//     movl(dst, nan_result);
-//     jcc(Assembler::parity, exit);
-//     movl(dst, less_result);
-//     jcc(Assembler::below, exit);
-//     movl(dst, equal_result);
-//     jcc(Assembler::equal, exit);
-//     movl(dst, greater_result);
-
-// less_result     =  1;
-// greater_result  = -1;
-// equal_result    = 0;
-// nan_result      = -1;
-
-  enc_class CmpF_Result(rRegI dst) %{
-    // fnstsw_ax();
-    emit_opcode( masm, 0xDF);
-    emit_opcode( masm, 0xE0);
-    // sahf
-    emit_opcode( masm, 0x9E);
-    // movl(dst, nan_result);
-    emit_opcode( masm, 0xB8 + $dst$$reg);
-    emit_d32( masm, -1 );
-    // jcc(Assembler::parity, exit);
-    emit_opcode( masm, 0x7A );
-    emit_d8    ( masm, 0x13 );
-    // movl(dst, less_result);
-    emit_opcode( masm, 0xB8 + $dst$$reg);
-    emit_d32( masm, -1 );
-    // jcc(Assembler::below, exit);
-    emit_opcode( masm, 0x72 );
-    emit_d8    ( masm, 0x0C );
-    // movl(dst, equal_result);
-    emit_opcode( masm, 0xB8 + $dst$$reg);
-    emit_d32( masm, 0 );
-    // jcc(Assembler::equal, exit);
-    emit_opcode( masm, 0x74 );
-    emit_d8    ( masm, 0x05 );
-    // movl(dst, greater_result);
-    emit_opcode( masm, 0xB8 + $dst$$reg);
-    emit_d32( masm, 1 );
-  %}
-
-
-  // Compare the longs and set flags
-  // BROKEN!  Do Not use as-is
-  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
-    // CMP    $src1.hi,$src2.hi
-    emit_opcode( masm, 0x3B );
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
-    // JNE,s  done
-    emit_opcode(masm,0x75);
-    emit_d8(masm, 2 );
-    // CMP    $src1.lo,$src2.lo
-    emit_opcode( masm, 0x3B );
-    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
-// done:
-  %}
-
-  enc_class convert_int_long( regL dst, rRegI src ) %{
-    // mov $dst.lo,$src
-    int dst_encoding = $dst$$reg;
-    int src_encoding = $src$$reg;
-    encode_Copy( masm, dst_encoding  , src_encoding );
-    // mov $dst.hi,$src
-    encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
-    // sar $dst.hi,31
-    emit_opcode( masm, 0xC1 );
-    emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
-    emit_d8(masm, 0x1F );
-  %}
-
-  enc_class convert_long_double( eRegL src ) %{
-    // push $src.hi
-    emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
-    // push $src.lo
-    emit_opcode(masm, 0x50+$src$$reg  );
-    // fild 64-bits at [SP]
-    emit_opcode(masm,0xdf);
-    emit_d8(masm, 0x6C);
-    emit_d8(masm, 0x24);
-    emit_d8(masm, 0x00);
-    // pop stack
-    emit_opcode(masm, 0x83); // add  SP, #8
-    emit_rm(masm, 0x3, 0x00, ESP_enc);
-    emit_d8(masm, 0x8);
-  %}
-
-  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
-    // IMUL   EDX:EAX,$src1
-    emit_opcode( masm, 0xF7 );
-    emit_rm( masm, 0x3, 0x5, $src1$$reg );
-    // SAR    EDX,$cnt-32
-    int shift_count = ((int)$cnt$$constant) - 32;
-    if (shift_count > 0) {
-      emit_opcode(masm, 0xC1);
-      emit_rm(masm, 0x3, 7, $dst$$reg );
-      emit_d8(masm, shift_count);
-    }
-  %}
-
-  // this version doesn't have add sp, 8
-  enc_class convert_long_double2( eRegL src ) %{
-    // push $src.hi
-    emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
-    // push $src.lo
-    emit_opcode(masm, 0x50+$src$$reg  );
-    // fild 64-bits at [SP]
-    emit_opcode(masm,0xdf);
-    emit_d8(masm, 0x6C);
-    emit_d8(masm, 0x24);
-    emit_d8(masm, 0x00);
-  %}
-
-  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
-    // Basic idea: long = (long)int * (long)int
-    // IMUL EDX:EAX, src
-    emit_opcode( masm, 0xF7 );
-    emit_rm( masm, 0x3, 0x5, $src$$reg);
-  %}
-
-  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
-    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
-    // MUL EDX:EAX, src
-    emit_opcode( masm, 0xF7 );
-    emit_rm( masm, 0x3, 0x4, $src$$reg);
-  %}
-
-  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
-    // Basic idea: lo(result) = lo(x_lo * y_lo)
-    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
-    // MOV    $tmp,$src.lo
-    encode_Copy( masm, $tmp$$reg, $src$$reg );
-    // IMUL   $tmp,EDX
-    emit_opcode( masm, 0x0F );
-    emit_opcode( masm, 0xAF );
-    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
-    // MOV    EDX,$src.hi
-    encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
-    // IMUL   EDX,EAX
-    emit_opcode( masm, 0x0F );
-    emit_opcode( masm, 0xAF );
-    emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
-    // ADD    $tmp,EDX
-    emit_opcode( masm, 0x03 );
-    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
-    // MUL   EDX:EAX,$src.lo
-    emit_opcode( masm, 0xF7 );
-    emit_rm( masm, 0x3, 0x4, $src$$reg );
-    // ADD    EDX,ESI
-    emit_opcode( masm, 0x03 );
-    emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
-  %}
-
-  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
-    // Basic idea: lo(result) = lo(src * y_lo)
-    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
-    // IMUL   $tmp,EDX,$src
-    emit_opcode( masm, 0x6B );
-    emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
-    emit_d8( masm, (int)$src$$constant );
-    // MOV    EDX,$src
-    emit_opcode(masm, 0xB8 + EDX_enc);
-    emit_d32( masm, (int)$src$$constant );
-    // MUL   EDX:EAX,EDX
-    emit_opcode( masm, 0xF7 );
-    emit_rm( masm, 0x3, 0x4, EDX_enc );
-    // ADD    EDX,ESI
-    emit_opcode( masm, 0x03 );
-    emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
-  %}
-
-  enc_class long_div( eRegL src1, eRegL src2 ) %{
-    // PUSH src1.hi
-    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
-    // PUSH src1.lo
-    emit_opcode(masm,               0x50+$src1$$reg  );
-    // PUSH src2.hi
-    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
-    // PUSH src2.lo
-    emit_opcode(masm,               0x50+$src2$$reg  );
-    // CALL directly to the runtime
-    __ set_inst_mark();
-    emit_opcode(masm,0xE8);       // Call into runtime
-    emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    __ clear_inst_mark();
-    __ post_call_nop();
-    // Restore stack
-    emit_opcode(masm, 0x83); // add  SP, #framesize
-    emit_rm(masm, 0x3, 0x00, ESP_enc);
-    emit_d8(masm, 4*4);
-  %}
-
-  enc_class long_mod( eRegL src1, eRegL src2 ) %{
-    // PUSH src1.hi
-    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
-    // PUSH src1.lo
-    emit_opcode(masm,               0x50+$src1$$reg  );
-    // PUSH src2.hi
-    emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
-    // PUSH src2.lo
-    emit_opcode(masm,               0x50+$src2$$reg  );
-    // CALL directly to the runtime
-    __ set_inst_mark();
-    emit_opcode(masm,0xE8);       // Call into runtime
-    emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    __ clear_inst_mark();
-    __ post_call_nop();
-    // Restore stack
-    emit_opcode(masm, 0x83); // add  SP, #framesize
-    emit_rm(masm, 0x3, 0x00, ESP_enc);
-    emit_d8(masm, 4*4);
-  %}
-
-  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
-    // MOV   $tmp,$src.lo
-    emit_opcode(masm, 0x8B);
-    emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
-    // OR    $tmp,$src.hi
-    emit_opcode(masm, 0x0B);
-    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
-  %}
-
-  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
-    // CMP    $src1.lo,$src2.lo
-    emit_opcode( masm, 0x3B );
-    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
-    // JNE,s  skip
-    emit_cc(masm, 0x70, 0x5);
-    emit_d8(masm,2);
-    // CMP    $src1.hi,$src2.hi
-    emit_opcode( masm, 0x3B );
-    emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
-  %}
-
-  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
-    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
-    emit_opcode( masm, 0x3B );
-    emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
-    // MOV    $tmp,$src1.hi
-    emit_opcode( masm, 0x8B );
-    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
-    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
-    emit_opcode( masm, 0x1B );
-    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
-  %}
-
-  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
-    // XOR    $tmp,$tmp
-    emit_opcode(masm,0x33);  // XOR
-    emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
-    // CMP    $tmp,$src.lo
-    emit_opcode( masm, 0x3B );
-    emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
-    // SBB    $tmp,$src.hi
-    emit_opcode( masm, 0x1B );
-    emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
-  %}
-
- // Sniff, sniff... smells like Gnu Superoptimizer
-  enc_class neg_long( eRegL dst ) %{
-    emit_opcode(masm,0xF7);    // NEG hi
-    emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
-    emit_opcode(masm,0xF7);    // NEG lo
-    emit_rm    (masm,0x3, 0x3,               $dst$$reg );
-    emit_opcode(masm,0x83);    // SBB hi,0
-    emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
-    emit_d8    (masm,0 );
-  %}
-
-  enc_class enc_pop_rdx() %{
-    emit_opcode(masm,0x5A);
-  %}
-
-  enc_class enc_rethrow() %{
-    __ set_inst_mark();
-    emit_opcode(masm, 0xE9);        // jmp    entry
-    emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
-                   runtime_call_Relocation::spec(), RELOC_IMM32 );
-    __ clear_inst_mark();
-    __ post_call_nop();
-  %}
-
-
-  // Convert a double to an int.  Java semantics require we do complex
-  // manglelations in the corner cases.  So we set the rounding mode to
-  // 'zero', store the darned double down as an int, and reset the
-  // rounding mode to 'nearest'.  The hardware throws an exception which
-  // patches up the correct value directly to the stack.
-  enc_class DPR2I_encoding( regDPR src ) %{
-    // Flip to round-to-zero mode.  We attempted to allow invalid-op
-    // exceptions here, so that a NAN or other corner-case value will
-    // thrown an exception (but normal values get converted at full speed).
-    // However, I2C adapters and other float-stack manglers leave pending
-    // invalid-op exceptions hanging.  We would have to clear them before
-    // enabling them and that is more expensive than just testing for the
-    // invalid value Intel stores down in the corner cases.
-    emit_opcode(masm,0xD9);            // FLDCW  trunc
-    emit_opcode(masm,0x2D);
-    emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
-    // Allocate a word
-    emit_opcode(masm,0x83);            // SUB ESP,4
-    emit_opcode(masm,0xEC);
-    emit_d8(masm,0x04);
-    // Encoding assumes a double has been pushed into FPR0.
-    // Store down the double as an int, popping the FPU stack
-    emit_opcode(masm,0xDB);            // FISTP [ESP]
-    emit_opcode(masm,0x1C);
-    emit_d8(masm,0x24);
-    // Restore the rounding mode; mask the exception
-    emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
-    emit_opcode(masm,0x2D);
-    emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
-        ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
-        : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
-
-    // Load the converted int; adjust CPU stack
-    emit_opcode(masm,0x58);       // POP EAX
-    emit_opcode(masm,0x3D);       // CMP EAX,imm
-    emit_d32   (masm,0x80000000); //         0x80000000
-    emit_opcode(masm,0x75);       // JNE around_slow_call
-    emit_d8    (masm,0x07);       // Size of slow_call
-    // Push src onto stack slow-path
-    emit_opcode(masm,0xD9 );      // FLD     ST(i)
-    emit_d8    (masm,0xC0-1+$src$$reg );
-    // CALL directly to the runtime
-    __ set_inst_mark();
-    emit_opcode(masm,0xE8);       // Call into runtime
-    emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    __ clear_inst_mark();
-    __ post_call_nop();
-    // Carry on here...
-  %}
-
-  enc_class DPR2L_encoding( regDPR src ) %{
-    emit_opcode(masm,0xD9);            // FLDCW  trunc
-    emit_opcode(masm,0x2D);
-    emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
-    // Allocate a word
-    emit_opcode(masm,0x83);            // SUB ESP,8
-    emit_opcode(masm,0xEC);
-    emit_d8(masm,0x08);
-    // Encoding assumes a double has been pushed into FPR0.
-    // Store down the double as a long, popping the FPU stack
-    emit_opcode(masm,0xDF);            // FISTP [ESP]
-    emit_opcode(masm,0x3C);
-    emit_d8(masm,0x24);
-    // Restore the rounding mode; mask the exception
-    emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
-    emit_opcode(masm,0x2D);
-    emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
-        ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
-        : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
-
-    // Load the converted int; adjust CPU stack
-    emit_opcode(masm,0x58);       // POP EAX
-    emit_opcode(masm,0x5A);       // POP EDX
-    emit_opcode(masm,0x81);       // CMP EDX,imm
-    emit_d8    (masm,0xFA);       // rdx
-    emit_d32   (masm,0x80000000); //         0x80000000
-    emit_opcode(masm,0x75);       // JNE around_slow_call
-    emit_d8    (masm,0x07+4);     // Size of slow_call
-    emit_opcode(masm,0x85);       // TEST EAX,EAX
-    emit_opcode(masm,0xC0);       // 2/rax,/rax,
-    emit_opcode(masm,0x75);       // JNE around_slow_call
-    emit_d8    (masm,0x07);       // Size of slow_call
-    // Push src onto stack slow-path
-    emit_opcode(masm,0xD9 );      // FLD     ST(i)
-    emit_d8    (masm,0xC0-1+$src$$reg );
-    // CALL directly to the runtime
-    __ set_inst_mark();
-    emit_opcode(masm,0xE8);       // Call into runtime
-    emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-    __ clear_inst_mark();
-    __ post_call_nop();
-    // Carry on here...
-  %}
-
-  enc_class FMul_ST_reg( eRegFPR src1 ) %{
-    // Operand was loaded from memory into fp ST (stack top)
-    // FMUL   ST,$src  /* D8 C8+i */
-    emit_opcode(masm, 0xD8);
-    emit_opcode(masm, 0xC8 + $src1$$reg);
-  %}
-
-  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
-    // FADDP  ST,src2  /* D8 C0+i */
-    emit_opcode(masm, 0xD8);
-    emit_opcode(masm, 0xC0 + $src2$$reg);
-    //could use FADDP  src2,fpST  /* DE C0+i */
-  %}
-
-  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
-    // FADDP  src2,ST  /* DE C0+i */
-    emit_opcode(masm, 0xDE);
-    emit_opcode(masm, 0xC0 + $src2$$reg);
-  %}
-
-  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
-    // Operand has been loaded into fp ST (stack top)
-      // FSUB   ST,$src1
-      emit_opcode(masm, 0xD8);
-      emit_opcode(masm, 0xE0 + $src1$$reg);
-
-      // FDIV
-      emit_opcode(masm, 0xD8);
-      emit_opcode(masm, 0xF0 + $src2$$reg);
-  %}
-
-  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
-    // Operand was loaded from memory into fp ST (stack top)
-    // FADD   ST,$src  /* D8 C0+i */
-    emit_opcode(masm, 0xD8);
-    emit_opcode(masm, 0xC0 + $src1$$reg);
-
-    // FMUL  ST,src2  /* D8 C*+i */
-    emit_opcode(masm, 0xD8);
-    emit_opcode(masm, 0xC8 + $src2$$reg);
-  %}
-
-
-  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
-    // Operand was loaded from memory into fp ST (stack top)
-    // FADD   ST,$src  /* D8 C0+i */
-    emit_opcode(masm, 0xD8);
-    emit_opcode(masm, 0xC0 + $src1$$reg);
-
-    // FMULP  src2,ST  /* DE C8+i */
-    emit_opcode(masm, 0xDE);
-    emit_opcode(masm, 0xC8 + $src2$$reg);
-  %}
-
-  // Atomically load the volatile long
-  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
-    emit_opcode(masm,0xDF);
-    int rm_byte_opcode = 0x05;
-    int base     = $mem$$base;
-    int index    = $mem$$index;
-    int scale    = $mem$$scale;
-    int displace = $mem$$disp;
-    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
-    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
-    store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
-  %}
-
-  // Volatile Store Long.  Must be atomic, so move it into
-  // the FP TOS and then do a 64-bit FIST.  Has to probe the
-  // target address before the store (for null-ptr checks)
-  // so the memory operand is used twice in the encoding.
-  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
-    store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
-    __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
-    emit_opcode(masm,0xDF);
-    int rm_byte_opcode = 0x07;
-    int base     = $mem$$base;
-    int index    = $mem$$index;
-    int scale    = $mem$$scale;
-    int displace = $mem$$disp;
-    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
-    encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
-    __ clear_inst_mark();
-  %}
-
-%}
-
-
-//----------FRAME--------------------------------------------------------------
-// Definition of frame structure and management information.
-//
-//  S T A C K   L A Y O U T    Allocators stack-slot number
-//                             |   (to get allocators register number
-//  G  Owned by    |        |  v    add OptoReg::stack0())
-//  r   CALLER     |        |
-//  o     |        +--------+      pad to even-align allocators stack-slot
-//  w     V        |  pad0  |        numbers; owned by CALLER
-//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
-//  h     ^        |   in   |  5
-//        |        |  args  |  4   Holes in incoming args owned by SELF
-//  |     |        |        |  3
-//  |     |        +--------+
-//  V     |        | old out|      Empty on Intel, window on Sparc
-//        |    old |preserve|      Must be even aligned.
-//        |     SP-+--------+----> Matcher::_old_SP, even aligned
-//        |        |   in   |  3   area for Intel ret address
-//     Owned by    |preserve|      Empty on Sparc.
-//       SELF      +--------+
-//        |        |  pad2  |  2   pad to align old SP
-//        |        +--------+  1
-//        |        | locks  |  0
-//        |        +--------+----> OptoReg::stack0(), even aligned
-//        |        |  pad1  | 11   pad to align new SP
-//        |        +--------+
-//        |        |        | 10
-//        |        | spills |  9   spills
-//        V        |        |  8   (pad0 slot for callee)
-//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
-//        ^        |  out   |  7
-//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
-//     Owned by    +--------+
-//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
-//        |    new |preserve|      Must be even-aligned.
-//        |     SP-+--------+----> Matcher::_new_SP, even aligned
-//        |        |        |
-//
-// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
-//         known from SELF's arguments and the Java calling convention.
-//         Region 6-7 is determined per call site.
-// Note 2: If the calling convention leaves holes in the incoming argument
-//         area, those holes are owned by SELF.  Holes in the outgoing area
-//         are owned by the CALLEE.  Holes should not be necessary in the
-//         incoming area, as the Java calling convention is completely under
-//         the control of the AD file.  Doubles can be sorted and packed to
-//         avoid holes.  Holes in the outgoing arguments may be necessary for
-//         varargs C calling conventions.
-// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
-//         even aligned with pad0 as needed.
-//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
-//         region 6-11 is even aligned; it may be padded out more so that
-//         the region from SP to FP meets the minimum stack alignment.
-
-frame %{
-  // These three registers define part of the calling convention
-  // between compiled code and the interpreter.
-  inline_cache_reg(EAX);                // Inline Cache Register
-
-  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
-  cisc_spilling_operand_name(indOffset32);
-
-  // Number of stack slots consumed by locking an object
-  sync_stack_slots(1);
-
-  // Compiled code's Frame Pointer
-  frame_pointer(ESP);
-  // Interpreter stores its frame pointer in a register which is
-  // stored to the stack by I2CAdaptors.
-  // I2CAdaptors convert from interpreted java to compiled java.
-  interpreter_frame_pointer(EBP);
-
-  // Stack alignment requirement
-  // Alignment size in bytes (128-bit -> 16 bytes)
-  stack_alignment(StackAlignmentInBytes);
-
-  // Number of outgoing stack slots killed above the out_preserve_stack_slots
-  // for calls to C.  Supports the var-args backing area for register parms.
-  varargs_C_out_slots_killed(0);
-
-  // The after-PROLOG location of the return address.  Location of
-  // return address specifies a type (REG or STACK) and a number
-  // representing the register number (i.e. - use a register name) or
-  // stack slot.
-  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
-  // Otherwise, it is above the locks and verification slot and alignment word
-  return_addr(STACK - 1 +
-              align_up((Compile::current()->in_preserve_stack_slots() +
-                        Compile::current()->fixed_slots()),
-                       stack_alignment_in_slots()));
-
-  // Location of C & interpreter return values
-  c_return_value %{
-    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
-    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
-    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
-
-    // in SSE2+ mode we want to keep the FPU stack clean so pretend
-    // that C functions return float and double results in XMM0.
-    if( ideal_reg == Op_RegD && UseSSE>=2 )
-      return OptoRegPair(XMM0b_num,XMM0_num);
-    if( ideal_reg == Op_RegF && UseSSE>=2 )
-      return OptoRegPair(OptoReg::Bad,XMM0_num);
-
-    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
-  %}
-
-  // Location of return values
-  return_value %{
-    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
-    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
-    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
-    if( ideal_reg == Op_RegD && UseSSE>=2 )
-      return OptoRegPair(XMM0b_num,XMM0_num);
-    if( ideal_reg == Op_RegF && UseSSE>=1 )
-      return OptoRegPair(OptoReg::Bad,XMM0_num);
-    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
-  %}
-
-%}
-
-//----------ATTRIBUTES---------------------------------------------------------
-//----------Operand Attributes-------------------------------------------------
-op_attrib op_cost(0);        // Required cost attribute
-
-//----------Instruction Attributes---------------------------------------------
-ins_attrib ins_cost(100);       // Required cost attribute
-ins_attrib ins_size(8);         // Required size attribute (in bits)
-ins_attrib ins_short_branch(0); // Required flag: is this instruction a
-                                // non-matching short branch variant of some
-                                                            // long branch?
-ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
-                                // specifies the alignment that some part of the instruction (not
-                                // necessarily the start) requires.  If > 1, a compute_padding()
-                                // function must be provided for the instruction
-
-//----------OPERANDS-----------------------------------------------------------
-// Operand definitions must precede instruction definitions for correct parsing
-// in the ADLC because operands constitute user defined types which are used in
-// instruction definitions.
-
-//----------Simple Operands----------------------------------------------------
-// Immediate Operands
-// Integer Immediate
-operand immI() %{
-  match(ConI);
-
-  op_cost(10);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Constant for test vs zero
-operand immI_0() %{
-  predicate(n->get_int() == 0);
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Constant for increment
-operand immI_1() %{
-  predicate(n->get_int() == 1);
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Constant for decrement
-operand immI_M1() %{
-  predicate(n->get_int() == -1);
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Valid scale values for addressing modes
-operand immI2() %{
-  predicate(0 <= n->get_int() && (n->get_int() <= 3));
-  match(ConI);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI8() %{
-  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
-  match(ConI);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immU8() %{
-  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
-  match(ConI);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI16() %{
-  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
-  match(ConI);
-
-  op_cost(10);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Int Immediate non-negative
-operand immU31()
-%{
-  predicate(n->get_int() >= 0);
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Constant for long shifts
-operand immI_32() %{
-  predicate( n->get_int() == 32 );
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI_1_31() %{
-  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI_32_63() %{
-  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
-  match(ConI);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI_2() %{
-  predicate( n->get_int() == 2 );
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI_3() %{
-  predicate( n->get_int() == 3 );
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI_4()
-%{
-  predicate(n->get_int() == 4);
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI_8()
-%{
-  predicate(n->get_int() == 8);
-  match(ConI);
-
-  op_cost(0);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Pointer Immediate
-operand immP() %{
-  match(ConP);
-
-  op_cost(10);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Null Pointer Immediate
-operand immP0() %{
-  predicate( n->get_ptr() == 0 );
-  match(ConP);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate
-operand immL() %{
-  match(ConL);
-
-  op_cost(20);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate zero
-operand immL0() %{
-  predicate( n->get_long() == 0L );
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate zero
-operand immL_M1() %{
-  predicate( n->get_long() == -1L );
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long immediate from 0 to 127.
-// Used for a shorter form of long mul by 10.
-operand immL_127() %{
-  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate: low 32-bit mask
-operand immL_32bits() %{
-  predicate(n->get_long() == 0xFFFFFFFFL);
-  match(ConL);
-  op_cost(0);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Long Immediate: low 32-bit mask
-operand immL32() %{
-  predicate(n->get_long() == (int)(n->get_long()));
-  match(ConL);
-  op_cost(20);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-//Double Immediate zero
-operand immDPR0() %{
-  // Do additional (and counter-intuitive) test against NaN to work around VC++
-  // bug that generates code such that NaNs compare equal to 0.0
-  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
-  match(ConD);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Double Immediate one
-operand immDPR1() %{
-  predicate( UseSSE<=1 && n->getd() == 1.0 );
-  match(ConD);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Double Immediate
-operand immDPR() %{
-  predicate(UseSSE<=1);
-  match(ConD);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immD() %{
-  predicate(UseSSE>=2);
-  match(ConD);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Double Immediate zero
-operand immD0() %{
-  // Do additional (and counter-intuitive) test against NaN to work around VC++
-  // bug that generates code such that NaNs compare equal to 0.0 AND do not
-  // compare equal to -0.0.
-  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
-  match(ConD);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Float Immediate zero
-operand immFPR0() %{
-  predicate(UseSSE == 0 && n->getf() == 0.0F);
-  match(ConF);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Float Immediate one
-operand immFPR1() %{
-  predicate(UseSSE == 0 && n->getf() == 1.0F);
-  match(ConF);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Float Immediate
-operand immFPR() %{
-  predicate( UseSSE == 0 );
-  match(ConF);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Float Immediate
-operand immF() %{
-  predicate(UseSSE >= 1);
-  match(ConF);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Float Immediate zero.  Zero and not -0.0
-operand immF0() %{
-  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
-  match(ConF);
-
-  op_cost(5);
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Immediates for special shifts (sign extend)
-
-// Constants for increment
-operand immI_16() %{
-  predicate( n->get_int() == 16 );
-  match(ConI);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand immI_24() %{
-  predicate( n->get_int() == 24 );
-  match(ConI);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Constant for byte-wide masking
-operand immI_255() %{
-  predicate( n->get_int() == 255 );
-  match(ConI);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-// Constant for short-wide masking
-operand immI_65535() %{
-  predicate(n->get_int() == 65535);
-  match(ConI);
-
-  format %{ %}
-  interface(CONST_INTER);
-%}
-
-operand kReg()
-%{
-  constraint(ALLOC_IN_RC(vectmask_reg));
-  match(RegVectMask);
-  format %{%}
-  interface(REG_INTER);
-%}
-
-// Register Operands
-// Integer Register
-operand rRegI() %{
-  constraint(ALLOC_IN_RC(int_reg));
-  match(RegI);
-  match(xRegI);
-  match(eAXRegI);
-  match(eBXRegI);
-  match(eCXRegI);
-  match(eDXRegI);
-  match(eDIRegI);
-  match(eSIRegI);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// Subset of Integer Register
-operand xRegI(rRegI reg) %{
-  constraint(ALLOC_IN_RC(int_x_reg));
-  match(reg);
-  match(eAXRegI);
-  match(eBXRegI);
-  match(eCXRegI);
-  match(eDXRegI);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// Special Registers
-operand eAXRegI(xRegI reg) %{
-  constraint(ALLOC_IN_RC(eax_reg));
-  match(reg);
-  match(rRegI);
-
-  format %{ "EAX" %}
-  interface(REG_INTER);
-%}
-
-// Special Registers
-operand eBXRegI(xRegI reg) %{
-  constraint(ALLOC_IN_RC(ebx_reg));
-  match(reg);
-  match(rRegI);
-
-  format %{ "EBX" %}
-  interface(REG_INTER);
-%}
-
-operand eCXRegI(xRegI reg) %{
-  constraint(ALLOC_IN_RC(ecx_reg));
-  match(reg);
-  match(rRegI);
-
-  format %{ "ECX" %}
-  interface(REG_INTER);
-%}
-
-operand eDXRegI(xRegI reg) %{
-  constraint(ALLOC_IN_RC(edx_reg));
-  match(reg);
-  match(rRegI);
-
-  format %{ "EDX" %}
-  interface(REG_INTER);
-%}
-
-operand eDIRegI(xRegI reg) %{
-  constraint(ALLOC_IN_RC(edi_reg));
-  match(reg);
-  match(rRegI);
-
-  format %{ "EDI" %}
-  interface(REG_INTER);
-%}
-
-operand nadxRegI() %{
-  constraint(ALLOC_IN_RC(nadx_reg));
-  match(RegI);
-  match(eBXRegI);
-  match(eCXRegI);
-  match(eSIRegI);
-  match(eDIRegI);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand ncxRegI() %{
-  constraint(ALLOC_IN_RC(ncx_reg));
-  match(RegI);
-  match(eAXRegI);
-  match(eDXRegI);
-  match(eSIRegI);
-  match(eDIRegI);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
-// //
-operand eSIRegI(xRegI reg) %{
-   constraint(ALLOC_IN_RC(esi_reg));
-   match(reg);
-   match(rRegI);
-
-   format %{ "ESI" %}
-   interface(REG_INTER);
-%}
-
-// Pointer Register
-operand anyRegP() %{
-  constraint(ALLOC_IN_RC(any_reg));
-  match(RegP);
-  match(eAXRegP);
-  match(eBXRegP);
-  match(eCXRegP);
-  match(eDIRegP);
-  match(eRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand eRegP() %{
-  constraint(ALLOC_IN_RC(int_reg));
-  match(RegP);
-  match(eAXRegP);
-  match(eBXRegP);
-  match(eCXRegP);
-  match(eDIRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand rRegP() %{
-  constraint(ALLOC_IN_RC(int_reg));
-  match(RegP);
-  match(eAXRegP);
-  match(eBXRegP);
-  match(eCXRegP);
-  match(eDIRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// On windows95, EBP is not safe to use for implicit null tests.
-operand eRegP_no_EBP() %{
-  constraint(ALLOC_IN_RC(int_reg_no_ebp));
-  match(RegP);
-  match(eAXRegP);
-  match(eBXRegP);
-  match(eCXRegP);
-  match(eDIRegP);
-
-  op_cost(100);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand pRegP() %{
-  constraint(ALLOC_IN_RC(p_reg));
-  match(RegP);
-  match(eBXRegP);
-  match(eDXRegP);
-  match(eSIRegP);
-  match(eDIRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// Special Registers
-// Return a pointer value
-operand eAXRegP(eRegP reg) %{
-  constraint(ALLOC_IN_RC(eax_reg));
-  match(reg);
-  format %{ "EAX" %}
-  interface(REG_INTER);
-%}
-
-// Used in AtomicAdd
-operand eBXRegP(eRegP reg) %{
-  constraint(ALLOC_IN_RC(ebx_reg));
-  match(reg);
-  format %{ "EBX" %}
-  interface(REG_INTER);
-%}
-
-// Tail-call (interprocedural jump) to interpreter
-operand eCXRegP(eRegP reg) %{
-  constraint(ALLOC_IN_RC(ecx_reg));
-  match(reg);
-  format %{ "ECX" %}
-  interface(REG_INTER);
-%}
-
-operand eDXRegP(eRegP reg) %{
-  constraint(ALLOC_IN_RC(edx_reg));
-  match(reg);
-  format %{ "EDX" %}
-  interface(REG_INTER);
-%}
-
-operand eSIRegP(eRegP reg) %{
-  constraint(ALLOC_IN_RC(esi_reg));
-  match(reg);
-  format %{ "ESI" %}
-  interface(REG_INTER);
-%}
-
-// Used in rep stosw
-operand eDIRegP(eRegP reg) %{
-  constraint(ALLOC_IN_RC(edi_reg));
-  match(reg);
-  format %{ "EDI" %}
-  interface(REG_INTER);
-%}
-
-operand eRegL() %{
-  constraint(ALLOC_IN_RC(long_reg));
-  match(RegL);
-  match(eADXRegL);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand eADXRegL( eRegL reg ) %{
-  constraint(ALLOC_IN_RC(eadx_reg));
-  match(reg);
-
-  format %{ "EDX:EAX" %}
-  interface(REG_INTER);
-%}
-
-operand eBCXRegL( eRegL reg ) %{
-  constraint(ALLOC_IN_RC(ebcx_reg));
-  match(reg);
-
-  format %{ "EBX:ECX" %}
-  interface(REG_INTER);
-%}
-
-operand eBDPRegL( eRegL reg ) %{
-  constraint(ALLOC_IN_RC(ebpd_reg));
-  match(reg);
-
-  format %{ "EBP:EDI" %}
-  interface(REG_INTER);
-%}
-// Special case for integer high multiply
-operand eADXRegL_low_only() %{
-  constraint(ALLOC_IN_RC(eadx_reg));
-  match(RegL);
-
-  format %{ "EAX" %}
-  interface(REG_INTER);
-%}
-
-// Flags register, used as output of compare instructions
-operand rFlagsReg() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-
-  format %{ "EFLAGS" %}
-  interface(REG_INTER);
-%}
-
-// Flags register, used as output of compare instructions
-operand eFlagsReg() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-
-  format %{ "EFLAGS" %}
-  interface(REG_INTER);
-%}
-
-// Flags register, used as output of FLOATING POINT compare instructions
-operand eFlagsRegU() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-
-  format %{ "EFLAGS_U" %}
-  interface(REG_INTER);
-%}
-
-operand eFlagsRegUCF() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-  predicate(false);
-
-  format %{ "EFLAGS_U_CF" %}
-  interface(REG_INTER);
-%}
-
-// Condition Code Register used by long compare
-operand flagsReg_long_LTGE() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-  format %{ "FLAGS_LTGE" %}
-  interface(REG_INTER);
-%}
-operand flagsReg_long_EQNE() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-  format %{ "FLAGS_EQNE" %}
-  interface(REG_INTER);
-%}
-operand flagsReg_long_LEGT() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-  format %{ "FLAGS_LEGT" %}
-  interface(REG_INTER);
-%}
-
-// Condition Code Register used by unsigned long compare
-operand flagsReg_ulong_LTGE() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-  format %{ "FLAGS_U_LTGE" %}
-  interface(REG_INTER);
-%}
-operand flagsReg_ulong_EQNE() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-  format %{ "FLAGS_U_EQNE" %}
-  interface(REG_INTER);
-%}
-operand flagsReg_ulong_LEGT() %{
-  constraint(ALLOC_IN_RC(int_flags));
-  match(RegFlags);
-  format %{ "FLAGS_U_LEGT" %}
-  interface(REG_INTER);
-%}
-
-// Float register operands
-operand regDPR() %{
-  predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(fp_dbl_reg));
-  match(RegD);
-  match(regDPR1);
-  match(regDPR2);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand regDPR1(regDPR reg) %{
-  predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(fp_dbl_reg0));
-  match(reg);
-  format %{ "FPR1" %}
-  interface(REG_INTER);
-%}
-
-operand regDPR2(regDPR reg) %{
-  predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(fp_dbl_reg1));
-  match(reg);
-  format %{ "FPR2" %}
-  interface(REG_INTER);
-%}
-
-operand regnotDPR1(regDPR reg) %{
-  predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
-  match(reg);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// Float register operands
-operand regFPR() %{
-  predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(fp_flt_reg));
-  match(RegF);
-  match(regFPR1);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// Float register operands
-operand regFPR1(regFPR reg) %{
-  predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(fp_flt_reg0));
-  match(reg);
-  format %{ "FPR1" %}
-  interface(REG_INTER);
-%}
-
-// XMM Float register operands
-operand regF() %{
-  predicate( UseSSE>=1 );
-  constraint(ALLOC_IN_RC(float_reg_legacy));
-  match(RegF);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand legRegF() %{
-  predicate( UseSSE>=1 );
-  constraint(ALLOC_IN_RC(float_reg_legacy));
-  match(RegF);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// Float register operands
-operand vlRegF() %{
-   constraint(ALLOC_IN_RC(float_reg_vl));
-   match(RegF);
-
-   format %{ %}
-   interface(REG_INTER);
-%}
-
-// XMM Double register operands
-operand regD() %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(double_reg_legacy));
-  match(RegD);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// Double register operands
-operand legRegD() %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(double_reg_legacy));
-  match(RegD);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-operand vlRegD() %{
-   constraint(ALLOC_IN_RC(double_reg_vl));
-   match(RegD);
-
-   format %{ %}
-   interface(REG_INTER);
-%}
-
-//----------Memory Operands----------------------------------------------------
-// Direct Memory Operand
-operand direct(immP addr) %{
-  match(addr);
-
-  format %{ "[$addr]" %}
-  interface(MEMORY_INTER) %{
-    base(0xFFFFFFFF);
-    index(0x4);
-    scale(0x0);
-    disp($addr);
-  %}
-%}
-
-// Indirect Memory Operand
-operand indirect(eRegP reg) %{
-  constraint(ALLOC_IN_RC(int_reg));
-  match(reg);
-
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-// Indirect Memory Plus Short Offset Operand
-operand indOffset8(eRegP reg, immI8 off) %{
-  match(AddP reg off);
-
-  format %{ "[$reg + $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-// Indirect Memory Plus Long Offset Operand
-operand indOffset32(eRegP reg, immI off) %{
-  match(AddP reg off);
-
-  format %{ "[$reg + $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-// Indirect Memory Plus Long Offset Operand
-operand indOffset32X(rRegI reg, immP off) %{
-  match(AddP off reg);
-
-  format %{ "[$reg + $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-// Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
-  match(AddP (AddP reg ireg) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $ireg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($ireg);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-// Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndex(eRegP reg, rRegI ireg) %{
-  match(AddP reg ireg);
-
-  op_cost(10);
-  format %{"[$reg + $ireg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($ireg);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-// // -------------------------------------------------------------------------
-// // 486 architecture doesn't support "scale * index + offset" with out a base
-// // -------------------------------------------------------------------------
-// // Scaled Memory Operands
-// // Indirect Memory Times Scale Plus Offset Operand
-// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
-//   match(AddP off (LShiftI ireg scale));
-//
-//   op_cost(10);
-//   format %{"[$off + $ireg << $scale]" %}
-//   interface(MEMORY_INTER) %{
-//     base(0x4);
-//     index($ireg);
-//     scale($scale);
-//     disp($off);
-//   %}
-// %}
-
-// Indirect Memory Times Scale Plus Index Register
-operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
-  match(AddP reg (LShiftI ireg scale));
-
-  op_cost(10);
-  format %{"[$reg + $ireg << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($ireg);
-    scale($scale);
-    disp(0x0);
-  %}
-%}
-
-// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
-  match(AddP (AddP reg (LShiftI ireg scale)) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $ireg << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($ireg);
-    scale($scale);
-    disp($off);
-  %}
-%}
-
-//----------Load Long Memory Operands------------------------------------------
-// The load-long idiom will use it's address expression again after loading
-// the first word of the long.  If the load-long destination overlaps with
-// registers used in the addressing expression, the 2nd half will be loaded
-// from a clobbered address.  Fix this by requiring that load-long use
-// address registers that do not overlap with the load-long target.
-
-// load-long support
-operand load_long_RegP() %{
-  constraint(ALLOC_IN_RC(esi_reg));
-  match(RegP);
-  match(eSIRegP);
-  op_cost(100);
-  format %{  %}
-  interface(REG_INTER);
-%}
-
-// Indirect Memory Operand Long
-operand load_long_indirect(load_long_RegP reg) %{
-  constraint(ALLOC_IN_RC(esi_reg));
-  match(reg);
-
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-// Indirect Memory Plus Long Offset Operand
-operand load_long_indOffset32(load_long_RegP reg, immI off) %{
-  match(AddP reg off);
-
-  format %{ "[$reg + $off]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-opclass load_long_memory(load_long_indirect, load_long_indOffset32);
-
-
-//----------Special Memory Operands--------------------------------------------
-// Stack Slot Operand - This operand is used for loading and storing temporary
-//                      values on the stack where a match requires a value to
-//                      flow through memory.
-operand stackSlotP(sRegP reg) %{
-  constraint(ALLOC_IN_RC(stack_slots));
-  // No match rule because this operand is only generated in matching
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base(0x4);   // ESP
-    index(0x4);  // No Index
-    scale(0x0);  // No Scale
-    disp($reg);  // Stack Offset
-  %}
-%}
-
-operand stackSlotI(sRegI reg) %{
-  constraint(ALLOC_IN_RC(stack_slots));
-  // No match rule because this operand is only generated in matching
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base(0x4);   // ESP
-    index(0x4);  // No Index
-    scale(0x0);  // No Scale
-    disp($reg);  // Stack Offset
-  %}
-%}
-
-operand stackSlotF(sRegF reg) %{
-  constraint(ALLOC_IN_RC(stack_slots));
-  // No match rule because this operand is only generated in matching
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base(0x4);   // ESP
-    index(0x4);  // No Index
-    scale(0x0);  // No Scale
-    disp($reg);  // Stack Offset
-  %}
-%}
-
-operand stackSlotD(sRegD reg) %{
-  constraint(ALLOC_IN_RC(stack_slots));
-  // No match rule because this operand is only generated in matching
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base(0x4);   // ESP
-    index(0x4);  // No Index
-    scale(0x0);  // No Scale
-    disp($reg);  // Stack Offset
-  %}
-%}
-
-operand stackSlotL(sRegL reg) %{
-  constraint(ALLOC_IN_RC(stack_slots));
-  // No match rule because this operand is only generated in matching
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base(0x4);   // ESP
-    index(0x4);  // No Index
-    scale(0x0);  // No Scale
-    disp($reg);  // Stack Offset
-  %}
-%}
-
-//----------Conditional Branch Operands----------------------------------------
-// Comparison Op  - This is the operation of the comparison, and is limited to
-//                  the following set of codes:
-//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
-//
-// Other attributes of the comparison, such as unsignedness, are specified
-// by the comparison instruction that sets a condition code flags register.
-// That result is represented by a flags operand whose subtype is appropriate
-// to the unsignedness (etc.) of the comparison.
-//
-// Later, the instruction which matches both the Comparison Op (a Bool) and
-// the flags (produced by the Cmp) specifies the coding of the comparison op
-// by matching a specific subtype of Bool operand below, such as cmpOpU.
-
-// Comparison Code
-operand cmpOp() %{
-  match(Bool);
-
-  format %{ "" %}
-  interface(COND_INTER) %{
-    equal(0x4, "e");
-    not_equal(0x5, "ne");
-    less(0xC, "l");
-    greater_equal(0xD, "ge");
-    less_equal(0xE, "le");
-    greater(0xF, "g");
-    overflow(0x0, "o");
-    no_overflow(0x1, "no");
-  %}
-%}
-
-// Comparison Code, unsigned compare.  Used by FP also, with
-// C2 (unordered) turned into GT or LT already.  The other bits
-// C0 and C3 are turned into Carry & Zero flags.
-operand cmpOpU() %{
-  match(Bool);
-
-  format %{ "" %}
-  interface(COND_INTER) %{
-    equal(0x4, "e");
-    not_equal(0x5, "ne");
-    less(0x2, "b");
-    greater_equal(0x3, "nb");
-    less_equal(0x6, "be");
-    greater(0x7, "nbe");
-    overflow(0x0, "o");
-    no_overflow(0x1, "no");
-  %}
-%}
-
-// Floating comparisons that don't require any fixup for the unordered case
-operand cmpOpUCF() %{
-  match(Bool);
-  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
-            n->as_Bool()->_test._test == BoolTest::ge ||
-            n->as_Bool()->_test._test == BoolTest::le ||
-            n->as_Bool()->_test._test == BoolTest::gt);
-  format %{ "" %}
-  interface(COND_INTER) %{
-    equal(0x4, "e");
-    not_equal(0x5, "ne");
-    less(0x2, "b");
-    greater_equal(0x3, "nb");
-    less_equal(0x6, "be");
-    greater(0x7, "nbe");
-    overflow(0x0, "o");
-    no_overflow(0x1, "no");
-  %}
-%}
-
-
-// Floating comparisons that can be fixed up with extra conditional jumps
-operand cmpOpUCF2() %{
-  match(Bool);
-  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
-            n->as_Bool()->_test._test == BoolTest::eq);
-  format %{ "" %}
-  interface(COND_INTER) %{
-    equal(0x4, "e");
-    not_equal(0x5, "ne");
-    less(0x2, "b");
-    greater_equal(0x3, "nb");
-    less_equal(0x6, "be");
-    greater(0x7, "nbe");
-    overflow(0x0, "o");
-    no_overflow(0x1, "no");
-  %}
-%}
-
-// Comparison Code for FP conditional move
-operand cmpOp_fcmov() %{
-  match(Bool);
-
-  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
-            n->as_Bool()->_test._test != BoolTest::no_overflow);
-  format %{ "" %}
-  interface(COND_INTER) %{
-    equal        (0x0C8);
-    not_equal    (0x1C8);
-    less         (0x0C0);
-    greater_equal(0x1C0);
-    less_equal   (0x0D0);
-    greater      (0x1D0);
-    overflow(0x0, "o"); // not really supported by the instruction
-    no_overflow(0x1, "no"); // not really supported by the instruction
-  %}
-%}
-
-// Comparison Code used in long compares
-operand cmpOp_commute() %{
-  match(Bool);
-
-  format %{ "" %}
-  interface(COND_INTER) %{
-    equal(0x4, "e");
-    not_equal(0x5, "ne");
-    less(0xF, "g");
-    greater_equal(0xE, "le");
-    less_equal(0xD, "ge");
-    greater(0xC, "l");
-    overflow(0x0, "o");
-    no_overflow(0x1, "no");
-  %}
-%}
-
-// Comparison Code used in unsigned long compares
-operand cmpOpU_commute() %{
-  match(Bool);
-
-  format %{ "" %}
-  interface(COND_INTER) %{
-    equal(0x4, "e");
-    not_equal(0x5, "ne");
-    less(0x7, "nbe");
-    greater_equal(0x6, "be");
-    less_equal(0x3, "nb");
-    greater(0x2, "b");
-    overflow(0x0, "o");
-    no_overflow(0x1, "no");
-  %}
-%}
-
-//----------OPERAND CLASSES----------------------------------------------------
-// Operand Classes are groups of operands that are used as to simplify
-// instruction definitions by not requiring the AD writer to specify separate
-// instructions for every form of operand when the instruction accepts
-// multiple operand types with the same basic encoding and format.  The classic
-// case of this is memory operands.
-
-opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
-               indIndex, indIndexScale, indIndexScaleOffset);
-
-// Long memory operations are encoded in 2 instructions and a +4 offset.
-// This means some kind of offset is always required and you cannot use
-// an oop as the offset (done when working on static globals).
-opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
-                    indIndex, indIndexScale, indIndexScaleOffset);
-
-
-//----------PIPELINE-----------------------------------------------------------
-// Rules which define the behavior of the target architectures pipeline.
-pipeline %{
-
-//----------ATTRIBUTES---------------------------------------------------------
-attributes %{
-  variable_size_instructions;        // Fixed size instructions
-  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
-  instruction_unit_size = 1;         // An instruction is 1 bytes long
-  instruction_fetch_unit_size = 16;  // The processor fetches one line
-  instruction_fetch_units = 1;       // of 16 bytes
-
-  // List of nop instructions
-  nops( MachNop );
-%}
-
-//----------RESOURCES----------------------------------------------------------
-// Resources are the functional units available to the machine
-
-// Generic P2/P3 pipeline
-// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
-// 3 instructions decoded per cycle.
-// 2 load/store ops per cycle, 1 branch, 1 FPU,
-// 2 ALU op, only ALU0 handles mul/div instructions.
-resources( D0, D1, D2, DECODE = D0 | D1 | D2,
-           MS0, MS1, MEM = MS0 | MS1,
-           BR, FPU,
-           ALU0, ALU1, ALU = ALU0 | ALU1 );
-
-//----------PIPELINE DESCRIPTION-----------------------------------------------
-// Pipeline Description specifies the stages in the machine's pipeline
-
-// Generic P2/P3 pipeline
-pipe_desc(S0, S1, S2, S3, S4, S5);
-
-//----------PIPELINE CLASSES---------------------------------------------------
-// Pipeline Classes describe the stages in which input and output are
-// referenced by the hardware pipeline.
-
-// Naming convention: ialu or fpu
-// Then: _reg
-// Then: _reg if there is a 2nd register
-// Then: _long if it's a pair of instructions implementing a long
-// Then: _fat if it requires the big decoder
-//   Or: _mem if it requires the big decoder and a memory unit.
-
-// Integer ALU reg operation
-pipe_class ialu_reg(rRegI dst) %{
-    single_instruction;
-    dst    : S4(write);
-    dst    : S3(read);
-    DECODE : S0;        // any decoder
-    ALU    : S3;        // any alu
-%}
-
-// Long ALU reg operation
-pipe_class ialu_reg_long(eRegL dst) %{
-    instruction_count(2);
-    dst    : S4(write);
-    dst    : S3(read);
-    DECODE : S0(2);     // any 2 decoders
-    ALU    : S3(2);     // both alus
-%}
-
-// Integer ALU reg operation using big decoder
-pipe_class ialu_reg_fat(rRegI dst) %{
-    single_instruction;
-    dst    : S4(write);
-    dst    : S3(read);
-    D0     : S0;        // big decoder only
-    ALU    : S3;        // any alu
-%}
-
-// Long ALU reg operation using big decoder
-pipe_class ialu_reg_long_fat(eRegL dst) %{
-    instruction_count(2);
-    dst    : S4(write);
-    dst    : S3(read);
-    D0     : S0(2);     // big decoder only; twice
-    ALU    : S3(2);     // any 2 alus
-%}
-
-// Integer ALU reg-reg operation
-pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
-    single_instruction;
-    dst    : S4(write);
-    src    : S3(read);
-    DECODE : S0;        // any decoder
-    ALU    : S3;        // any alu
-%}
-
-// Long ALU reg-reg operation
-pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
-    instruction_count(2);
-    dst    : S4(write);
-    src    : S3(read);
-    DECODE : S0(2);     // any 2 decoders
-    ALU    : S3(2);     // both alus
-%}
-
-// Integer ALU reg-reg operation
-pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
-    single_instruction;
-    dst    : S4(write);
-    src    : S3(read);
-    D0     : S0;        // big decoder only
-    ALU    : S3;        // any alu
-%}
-
-// Long ALU reg-reg operation
-pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
-    instruction_count(2);
-    dst    : S4(write);
-    src    : S3(read);
-    D0     : S0(2);     // big decoder only; twice
-    ALU    : S3(2);     // both alus
-%}
-
-// Integer ALU reg-mem operation
-pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
-    single_instruction;
-    dst    : S5(write);
-    mem    : S3(read);
-    D0     : S0;        // big decoder only
-    ALU    : S4;        // any alu
-    MEM    : S3;        // any mem
-%}
-
-// Long ALU reg-mem operation
-pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
-    instruction_count(2);
-    dst    : S5(write);
-    mem    : S3(read);
-    D0     : S0(2);     // big decoder only; twice
-    ALU    : S4(2);     // any 2 alus
-    MEM    : S3(2);     // both mems
-%}
-
-// Integer mem operation (prefetch)
-pipe_class ialu_mem(memory mem)
-%{
-    single_instruction;
-    mem    : S3(read);
-    D0     : S0;        // big decoder only
-    MEM    : S3;        // any mem
-%}
-
-// Integer Store to Memory
-pipe_class ialu_mem_reg(memory mem, rRegI src) %{
-    single_instruction;
-    mem    : S3(read);
-    src    : S5(read);
-    D0     : S0;        // big decoder only
-    ALU    : S4;        // any alu
-    MEM    : S3;
-%}
-
-// Long Store to Memory
-pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
-    instruction_count(2);
-    mem    : S3(read);
-    src    : S5(read);
-    D0     : S0(2);     // big decoder only; twice
-    ALU    : S4(2);     // any 2 alus
-    MEM    : S3(2);     // Both mems
-%}
-
-// Integer Store to Memory
-pipe_class ialu_mem_imm(memory mem) %{
-    single_instruction;
-    mem    : S3(read);
-    D0     : S0;        // big decoder only
-    ALU    : S4;        // any alu
-    MEM    : S3;
-%}
-
-// Integer ALU0 reg-reg operation
-pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
-    single_instruction;
-    dst    : S4(write);
-    src    : S3(read);
-    D0     : S0;        // Big decoder only
-    ALU0   : S3;        // only alu0
-%}
-
-// Integer ALU0 reg-mem operation
-pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
-    single_instruction;
-    dst    : S5(write);
-    mem    : S3(read);
-    D0     : S0;        // big decoder only
-    ALU0   : S4;        // ALU0 only
-    MEM    : S3;        // any mem
-%}
-
-// Integer ALU reg-reg operation
-pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
-    single_instruction;
-    cr     : S4(write);
-    src1   : S3(read);
-    src2   : S3(read);
-    DECODE : S0;        // any decoder
-    ALU    : S3;        // any alu
-%}
-
-// Integer ALU reg-imm operation
-pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
-    single_instruction;
-    cr     : S4(write);
-    src1   : S3(read);
-    DECODE : S0;        // any decoder
-    ALU    : S3;        // any alu
-%}
-
-// Integer ALU reg-mem operation
-pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
-    single_instruction;
-    cr     : S4(write);
-    src1   : S3(read);
-    src2   : S3(read);
-    D0     : S0;        // big decoder only
-    ALU    : S4;        // any alu
-    MEM    : S3;
-%}
-
-// Conditional move reg-reg
-pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
-    instruction_count(4);
-    y      : S4(read);
-    q      : S3(read);
-    p      : S3(read);
-    DECODE : S0(4);     // any decoder
-%}
-
-// Conditional move reg-reg
-pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
-    single_instruction;
-    dst    : S4(write);
-    src    : S3(read);
-    cr     : S3(read);
-    DECODE : S0;        // any decoder
-%}
-
-// Conditional move reg-mem
-pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
-    single_instruction;
-    dst    : S4(write);
-    src    : S3(read);
-    cr     : S3(read);
-    DECODE : S0;        // any decoder
-    MEM    : S3;
-%}
-
-// Conditional move reg-reg long
-pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
-    single_instruction;
-    dst    : S4(write);
-    src    : S3(read);
-    cr     : S3(read);
-    DECODE : S0(2);     // any 2 decoders
-%}
-
-// Conditional move double reg-reg
-pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
-    single_instruction;
-    dst    : S4(write);
-    src    : S3(read);
-    cr     : S3(read);
-    DECODE : S0;        // any decoder
-%}
-
-// Float reg-reg operation
-pipe_class fpu_reg(regDPR dst) %{
-    instruction_count(2);
-    dst    : S3(read);
-    DECODE : S0(2);     // any 2 decoders
-    FPU    : S3;
-%}
-
-// Float reg-reg operation
-pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
-    instruction_count(2);
-    dst    : S4(write);
-    src    : S3(read);
-    DECODE : S0(2);     // any 2 decoders
-    FPU    : S3;
-%}
-
-// Float reg-reg operation
-pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
-    instruction_count(3);
-    dst    : S4(write);
-    src1   : S3(read);
-    src2   : S3(read);
-    DECODE : S0(3);     // any 3 decoders
-    FPU    : S3(2);
-%}
-
-// Float reg-reg operation
-pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
-    instruction_count(4);
-    dst    : S4(write);
-    src1   : S3(read);
-    src2   : S3(read);
-    src3   : S3(read);
-    DECODE : S0(4);     // any 3 decoders
-    FPU    : S3(2);
-%}
-
-// Float reg-reg operation
-pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
-    instruction_count(4);
-    dst    : S4(write);
-    src1   : S3(read);
-    src2   : S3(read);
-    src3   : S3(read);
-    DECODE : S1(3);     // any 3 decoders
-    D0     : S0;        // Big decoder only
-    FPU    : S3(2);
-    MEM    : S3;
-%}
-
-// Float reg-mem operation
-pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
-    instruction_count(2);
-    dst    : S5(write);
-    mem    : S3(read);
-    D0     : S0;        // big decoder only
-    DECODE : S1;        // any decoder for FPU POP
-    FPU    : S4;
-    MEM    : S3;        // any mem
-%}
-
-// Float reg-mem operation
-pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
-    instruction_count(3);
-    dst    : S5(write);
-    src1   : S3(read);
-    mem    : S3(read);
-    D0     : S0;        // big decoder only
-    DECODE : S1(2);     // any decoder for FPU POP
-    FPU    : S4;
-    MEM    : S3;        // any mem
-%}
-
-// Float mem-reg operation
-pipe_class fpu_mem_reg(memory mem, regDPR src) %{
-    instruction_count(2);
-    src    : S5(read);
-    mem    : S3(read);
-    DECODE : S0;        // any decoder for FPU PUSH
-    D0     : S1;        // big decoder only
-    FPU    : S4;
-    MEM    : S3;        // any mem
-%}
-
-pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
-    instruction_count(3);
-    src1   : S3(read);
-    src2   : S3(read);
-    mem    : S3(read);
-    DECODE : S0(2);     // any decoder for FPU PUSH
-    D0     : S1;        // big decoder only
-    FPU    : S4;
-    MEM    : S3;        // any mem
-%}
-
-pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
-    instruction_count(3);
-    src1   : S3(read);
-    src2   : S3(read);
-    mem    : S4(read);
-    DECODE : S0;        // any decoder for FPU PUSH
-    D0     : S0(2);     // big decoder only
-    FPU    : S4;
-    MEM    : S3(2);     // any mem
-%}
-
-pipe_class fpu_mem_mem(memory dst, memory src1) %{
-    instruction_count(2);
-    src1   : S3(read);
-    dst    : S4(read);
-    D0     : S0(2);     // big decoder only
-    MEM    : S3(2);     // any mem
-%}
-
-pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
-    instruction_count(3);
-    src1   : S3(read);
-    src2   : S3(read);
-    dst    : S4(read);
-    D0     : S0(3);     // big decoder only
-    FPU    : S4;
-    MEM    : S3(3);     // any mem
-%}
-
-pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
-    instruction_count(3);
-    src1   : S4(read);
-    mem    : S4(read);
-    DECODE : S0;        // any decoder for FPU PUSH
-    D0     : S0(2);     // big decoder only
-    FPU    : S4;
-    MEM    : S3(2);     // any mem
-%}
-
-// Float load constant
-pipe_class fpu_reg_con(regDPR dst) %{
-    instruction_count(2);
-    dst    : S5(write);
-    D0     : S0;        // big decoder only for the load
-    DECODE : S1;        // any decoder for FPU POP
-    FPU    : S4;
-    MEM    : S3;        // any mem
-%}
-
-// Float load constant
-pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
-    instruction_count(3);
-    dst    : S5(write);
-    src    : S3(read);
-    D0     : S0;        // big decoder only for the load
-    DECODE : S1(2);     // any decoder for FPU POP
-    FPU    : S4;
-    MEM    : S3;        // any mem
-%}
-
-// UnConditional branch
-pipe_class pipe_jmp( label labl ) %{
-    single_instruction;
-    BR   : S3;
-%}
-
-// Conditional branch
-pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
-    single_instruction;
-    cr    : S1(read);
-    BR    : S3;
-%}
-
-// Allocation idiom
-pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
-    instruction_count(1); force_serialization;
-    fixed_latency(6);
-    heap_ptr : S3(read);
-    DECODE   : S0(3);
-    D0       : S2;
-    MEM      : S3;
-    ALU      : S3(2);
-    dst      : S5(write);
-    BR       : S5;
-%}
-
-// Generic big/slow expanded idiom
-pipe_class pipe_slow(  ) %{
-    instruction_count(10); multiple_bundles; force_serialization;
-    fixed_latency(100);
-    D0  : S0(2);
-    MEM : S3(2);
-%}
-
-// The real do-nothing guy
-pipe_class empty( ) %{
-    instruction_count(0);
-%}
-
-// Define the class for the Nop node
-define %{
-   MachNop = empty;
-%}
-
-%}
-
-//----------INSTRUCTIONS-------------------------------------------------------
-//
-// match      -- States which machine-independent subtree may be replaced
-//               by this instruction.
-// ins_cost   -- The estimated cost of this instruction is used by instruction
-//               selection to identify a minimum cost tree of machine
-//               instructions that matches a tree of machine-independent
-//               instructions.
-// format     -- A string providing the disassembly for this instruction.
-//               The value of an instruction's operand may be inserted
-//               by referring to it with a '$' prefix.
-// opcode     -- Three instruction opcodes may be provided.  These are referred
-//               to within an encode class as $primary, $secondary, and $tertiary
-//               respectively.  The primary opcode is commonly used to
-//               indicate the type of machine instruction, while secondary
-//               and tertiary are often used for prefix options or addressing
-//               modes.
-// ins_encode -- A list of encode classes with parameters. The encode class
-//               name must have been defined in an 'enc_class' specification
-//               in the encode section of the architecture description.
-
-// Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
-// Load Float
-instruct MoveF2LEG(legRegF dst, regF src) %{
-  match(Set dst src);
-  format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Load Float
-instruct MoveLEG2F(regF dst, legRegF src) %{
-  match(Set dst src);
-  format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Load Float
-instruct MoveF2VL(vlRegF dst, regF src) %{
-  match(Set dst src);
-  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Load Float
-instruct MoveVL2F(regF dst, vlRegF src) %{
-  match(Set dst src);
-  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-
-
-// Load Double
-instruct MoveD2LEG(legRegD dst, regD src) %{
-  match(Set dst src);
-  format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Load Double
-instruct MoveLEG2D(regD dst, legRegD src) %{
-  match(Set dst src);
-  format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Load Double
-instruct MoveD2VL(vlRegD dst, regD src) %{
-  match(Set dst src);
-  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Load Double
-instruct MoveVL2D(regD dst, vlRegD src) %{
-  match(Set dst src);
-  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
-  ins_encode %{
-    ShouldNotReachHere();
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-//----------BSWAP-Instruction--------------------------------------------------
-instruct bytes_reverse_int(rRegI dst) %{
-  match(Set dst (ReverseBytesI dst));
-
-  format %{ "BSWAP  $dst" %}
-  opcode(0x0F, 0xC8);
-  ins_encode( OpcP, OpcSReg(dst) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct bytes_reverse_long(eRegL dst) %{
-  match(Set dst (ReverseBytesL dst));
-
-  format %{ "BSWAP  $dst.lo\n\t"
-            "BSWAP  $dst.hi\n\t"
-            "XCHG   $dst.lo $dst.hi" %}
-
-  ins_cost(125);
-  ins_encode( bswap_long_bytes(dst) );
-  ins_pipe( ialu_reg_reg);
-%}
-
-instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
-  match(Set dst (ReverseBytesUS dst));
-  effect(KILL cr);
-
-  format %{ "BSWAP  $dst\n\t"
-            "SHR    $dst,16\n\t" %}
-  ins_encode %{
-    __ bswapl($dst$$Register);
-    __ shrl($dst$$Register, 16);
-  %}
-  ins_pipe( ialu_reg );
-%}
-
-instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
-  match(Set dst (ReverseBytesS dst));
-  effect(KILL cr);
-
-  format %{ "BSWAP  $dst\n\t"
-            "SAR    $dst,16\n\t" %}
-  ins_encode %{
-    __ bswapl($dst$$Register);
-    __ sarl($dst$$Register, 16);
-  %}
-  ins_pipe( ialu_reg );
-%}
-
-
-//---------- Zeros Count Instructions ------------------------------------------
-
-instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
-  predicate(UseCountLeadingZerosInstruction);
-  match(Set dst (CountLeadingZerosI src));
-  effect(KILL cr);
-
-  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
-  ins_encode %{
-    __ lzcntl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
-  predicate(!UseCountLeadingZerosInstruction);
-  match(Set dst (CountLeadingZerosI src));
-  effect(KILL cr);
-
-  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
-            "JNZ    skip\n\t"
-            "MOV    $dst, -1\n"
-      "skip:\n\t"
-            "NEG    $dst\n\t"
-            "ADD    $dst, 31" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    Label skip;
-    __ bsrl(Rdst, Rsrc);
-    __ jccb(Assembler::notZero, skip);
-    __ movl(Rdst, -1);
-    __ bind(skip);
-    __ negl(Rdst);
-    __ addl(Rdst, BitsPerInt - 1);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
-  predicate(UseCountLeadingZerosInstruction);
-  match(Set dst (CountLeadingZerosL src));
-  effect(TEMP dst, KILL cr);
-
-  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
-            "JNC    done\n\t"
-            "LZCNT  $dst, $src.lo\n\t"
-            "ADD    $dst, 32\n"
-      "done:" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    Label done;
-    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
-    __ jccb(Assembler::carryClear, done);
-    __ lzcntl(Rdst, Rsrc);
-    __ addl(Rdst, BitsPerInt);
-    __ bind(done);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
-  predicate(!UseCountLeadingZerosInstruction);
-  match(Set dst (CountLeadingZerosL src));
-  effect(TEMP dst, KILL cr);
-
-  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
-            "JZ     msw_is_zero\n\t"
-            "ADD    $dst, 32\n\t"
-            "JMP    not_zero\n"
-      "msw_is_zero:\n\t"
-            "BSR    $dst, $src.lo\n\t"
-            "JNZ    not_zero\n\t"
-            "MOV    $dst, -1\n"
-      "not_zero:\n\t"
-            "NEG    $dst\n\t"
-            "ADD    $dst, 63\n" %}
- ins_encode %{
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    Label msw_is_zero;
-    Label not_zero;
-    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
-    __ jccb(Assembler::zero, msw_is_zero);
-    __ addl(Rdst, BitsPerInt);
-    __ jmpb(not_zero);
-    __ bind(msw_is_zero);
-    __ bsrl(Rdst, Rsrc);
-    __ jccb(Assembler::notZero, not_zero);
-    __ movl(Rdst, -1);
-    __ bind(not_zero);
-    __ negl(Rdst);
-    __ addl(Rdst, BitsPerLong - 1);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
-  predicate(UseCountTrailingZerosInstruction);
-  match(Set dst (CountTrailingZerosI src));
-  effect(KILL cr);
-
-  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
-  ins_encode %{
-    __ tzcntl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
-  predicate(!UseCountTrailingZerosInstruction);
-  match(Set dst (CountTrailingZerosI src));
-  effect(KILL cr);
-
-  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
-            "JNZ    done\n\t"
-            "MOV    $dst, 32\n"
-      "done:" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    Label done;
-    __ bsfl(Rdst, $src$$Register);
-    __ jccb(Assembler::notZero, done);
-    __ movl(Rdst, BitsPerInt);
-    __ bind(done);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
-  predicate(UseCountTrailingZerosInstruction);
-  match(Set dst (CountTrailingZerosL src));
-  effect(TEMP dst, KILL cr);
-
-  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
-            "JNC    done\n\t"
-            "TZCNT  $dst, $src.hi\n\t"
-            "ADD    $dst, 32\n"
-            "done:" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    Label done;
-    __ tzcntl(Rdst, Rsrc);
-    __ jccb(Assembler::carryClear, done);
-    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
-    __ addl(Rdst, BitsPerInt);
-    __ bind(done);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
-  predicate(!UseCountTrailingZerosInstruction);
-  match(Set dst (CountTrailingZerosL src));
-  effect(TEMP dst, KILL cr);
-
-  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
-            "JNZ    done\n\t"
-            "BSF    $dst, $src.hi\n\t"
-            "JNZ    msw_not_zero\n\t"
-            "MOV    $dst, 32\n"
-      "msw_not_zero:\n\t"
-            "ADD    $dst, 32\n"
-      "done:" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    Label msw_not_zero;
-    Label done;
-    __ bsfl(Rdst, Rsrc);
-    __ jccb(Assembler::notZero, done);
-    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
-    __ jccb(Assembler::notZero, msw_not_zero);
-    __ movl(Rdst, BitsPerInt);
-    __ bind(msw_not_zero);
-    __ addl(Rdst, BitsPerInt);
-    __ bind(done);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-
-//---------- Population Count Instructions -------------------------------------
-
-instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
-  predicate(UsePopCountInstruction);
-  match(Set dst (PopCountI src));
-  effect(KILL cr);
-
-  format %{ "POPCNT $dst, $src" %}
-  ins_encode %{
-    __ popcntl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
-  predicate(UsePopCountInstruction);
-  match(Set dst (PopCountI (LoadI mem)));
-  effect(KILL cr);
-
-  format %{ "POPCNT $dst, $mem" %}
-  ins_encode %{
-    __ popcntl($dst$$Register, $mem$$Address);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-// Note: Long.bitCount(long) returns an int.
-instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
-  predicate(UsePopCountInstruction);
-  match(Set dst (PopCountL src));
-  effect(KILL cr, TEMP tmp, TEMP dst);
-
-  format %{ "POPCNT $dst, $src.lo\n\t"
-            "POPCNT $tmp, $src.hi\n\t"
-            "ADD    $dst, $tmp" %}
-  ins_encode %{
-    __ popcntl($dst$$Register, $src$$Register);
-    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
-    __ addl($dst$$Register, $tmp$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-// Note: Long.bitCount(long) returns an int.
-instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
-  predicate(UsePopCountInstruction);
-  match(Set dst (PopCountL (LoadL mem)));
-  effect(KILL cr, TEMP tmp, TEMP dst);
-
-  format %{ "POPCNT $dst, $mem\n\t"
-            "POPCNT $tmp, $mem+4\n\t"
-            "ADD    $dst, $tmp" %}
-  ins_encode %{
-    //__ popcntl($dst$$Register, $mem$$Address$$first);
-    //__ popcntl($tmp$$Register, $mem$$Address$$second);
-    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
-    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
-    __ addl($dst$$Register, $tmp$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-
-//----------Load/Store/Move Instructions---------------------------------------
-//----------Load Instructions--------------------------------------------------
-// Load Byte (8bit signed)
-instruct loadB(xRegI dst, memory mem) %{
-  match(Set dst (LoadB mem));
-
-  ins_cost(125);
-  format %{ "MOVSX8 $dst,$mem\t# byte" %}
-
-  ins_encode %{
-    __ movsbl($dst$$Register, $mem$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Byte (8bit signed) into Long Register
-instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (LoadB mem)));
-  effect(KILL cr);
-
-  ins_cost(375);
-  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
-            "MOV    $dst.hi,$dst.lo\n\t"
-            "SAR    $dst.hi,7" %}
-
-  ins_encode %{
-    __ movsbl($dst$$Register, $mem$$Address);
-    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
-    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Byte (8bit UNsigned)
-instruct loadUB(xRegI dst, memory mem) %{
-  match(Set dst (LoadUB mem));
-
-  ins_cost(125);
-  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
-
-  ins_encode %{
-    __ movzbl($dst$$Register, $mem$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Byte (8 bit UNsigned) into Long Register
-instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (LoadUB mem)));
-  effect(KILL cr);
-
-  ins_cost(250);
-  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    __ movzbl(Rdst, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
-instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
-  effect(KILL cr);
-
-  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
-            "XOR    $dst.hi,$dst.hi\n\t"
-            "AND    $dst.lo,right_n_bits($mask, 8)" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    __ movzbl(Rdst, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
-    __ andl(Rdst, $mask$$constant & right_n_bits(8));
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Short (16bit signed)
-instruct loadS(rRegI dst, memory mem) %{
-  match(Set dst (LoadS mem));
-
-  ins_cost(125);
-  format %{ "MOVSX  $dst,$mem\t# short" %}
-
-  ins_encode %{
-    __ movswl($dst$$Register, $mem$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Short (16 bit signed) to Byte (8 bit signed)
-instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
-  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
-
-  ins_cost(125);
-  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
-  ins_encode %{
-    __ movsbl($dst$$Register, $mem$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Short (16bit signed) into Long Register
-instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (LoadS mem)));
-  effect(KILL cr);
-
-  ins_cost(375);
-  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
-            "MOV    $dst.hi,$dst.lo\n\t"
-            "SAR    $dst.hi,15" %}
-
-  ins_encode %{
-    __ movswl($dst$$Register, $mem$$Address);
-    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
-    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Short/Char (16bit unsigned)
-instruct loadUS(rRegI dst, memory mem) %{
-  match(Set dst (LoadUS mem));
-
-  ins_cost(125);
-  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
-
-  ins_encode %{
-    __ movzwl($dst$$Register, $mem$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
-instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
-  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
-
-  ins_cost(125);
-  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
-  ins_encode %{
-    __ movsbl($dst$$Register, $mem$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
-instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (LoadUS mem)));
-  effect(KILL cr);
-
-  ins_cost(250);
-  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-
-  ins_encode %{
-    __ movzwl($dst$$Register, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
-instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
-  effect(KILL cr);
-
-  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    __ movzbl(Rdst, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
-instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
-  effect(KILL cr);
-
-  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
-            "XOR    $dst.hi,$dst.hi\n\t"
-            "AND    $dst.lo,right_n_bits($mask, 16)" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    __ movzwl(Rdst, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
-    __ andl(Rdst, $mask$$constant & right_n_bits(16));
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer
-instruct loadI(rRegI dst, memory mem) %{
-  match(Set dst (LoadI mem));
-
-  ins_cost(125);
-  format %{ "MOV    $dst,$mem\t# int" %}
-
-  ins_encode %{
-    __ movl($dst$$Register, $mem$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer (32 bit signed) to Byte (8 bit signed)
-instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
-  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
-
-  ins_cost(125);
-  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
-  ins_encode %{
-    __ movsbl($dst$$Register, $mem$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
-instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
-  match(Set dst (AndI (LoadI mem) mask));
-
-  ins_cost(125);
-  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
-  ins_encode %{
-    __ movzbl($dst$$Register, $mem$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer (32 bit signed) to Short (16 bit signed)
-instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
-  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
-
-  ins_cost(125);
-  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
-  ins_encode %{
-    __ movswl($dst$$Register, $mem$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
-instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
-  match(Set dst (AndI (LoadI mem) mask));
-
-  ins_cost(125);
-  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
-  ins_encode %{
-    __ movzwl($dst$$Register, $mem$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer into Long Register
-instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (LoadI mem)));
-  effect(KILL cr);
-
-  ins_cost(375);
-  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
-            "MOV    $dst.hi,$dst.lo\n\t"
-            "SAR    $dst.hi,31" %}
-
-  ins_encode %{
-    __ movl($dst$$Register, $mem$$Address);
-    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
-    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer with mask 0xFF into Long Register
-instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
-  effect(KILL cr);
-
-  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    __ movzbl(Rdst, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer with mask 0xFFFF into Long Register
-instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
-  effect(KILL cr);
-
-  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    __ movzwl(Rdst, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Integer with 31-bit mask into Long Register
-instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
-  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
-  effect(KILL cr);
-
-  format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
-            "XOR    $dst.hi,$dst.hi\n\t"
-            "AND    $dst.lo,$mask" %}
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    __ movl(Rdst, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
-    __ andl(Rdst, $mask$$constant);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Unsigned Integer into Long Register
-instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
-  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
-  effect(KILL cr);
-
-  ins_cost(250);
-  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-
-  ins_encode %{
-    __ movl($dst$$Register, $mem$$Address);
-    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Load Long.  Cannot clobber address while loading, so restrict address
-// register to ESI
-instruct loadL(eRegL dst, load_long_memory mem) %{
-  predicate(!((LoadLNode*)n)->require_atomic_access());
-  match(Set dst (LoadL mem));
-
-  ins_cost(250);
-  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
-            "MOV    $dst.hi,$mem+4" %}
-
-  ins_encode %{
-    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
-    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
-    __ movl($dst$$Register, Amemlo);
-    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
-  %}
-
-  ins_pipe(ialu_reg_long_mem);
-%}
-
-// Volatile Load Long.  Must be atomic, so do 64-bit FILD
-// then store it down to the stack and reload on the int
-// side.
-instruct loadL_volatile(stackSlotL dst, memory mem) %{
-  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
-  match(Set dst (LoadL mem));
-
-  ins_cost(200);
-  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
-            "FISTp  $dst" %}
-  ins_encode(enc_loadL_volatile(mem,dst));
-  ins_pipe( fpu_reg_mem );
-%}
-
-instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
-  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
-  match(Set dst (LoadL mem));
-  effect(TEMP tmp);
-  ins_cost(180);
-  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
-            "MOVSD  $dst,$tmp" %}
-  ins_encode %{
-    __ movdbl($tmp$$XMMRegister, $mem$$Address);
-    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
-  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
-  match(Set dst (LoadL mem));
-  effect(TEMP tmp);
-  ins_cost(160);
-  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
-            "MOVD   $dst.lo,$tmp\n\t"
-            "PSRLQ  $tmp,32\n\t"
-            "MOVD   $dst.hi,$tmp" %}
-  ins_encode %{
-    __ movdbl($tmp$$XMMRegister, $mem$$Address);
-    __ movdl($dst$$Register, $tmp$$XMMRegister);
-    __ psrlq($tmp$$XMMRegister, 32);
-    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Range
-instruct loadRange(rRegI dst, memory mem) %{
-  match(Set dst (LoadRange mem));
-
-  ins_cost(125);
-  format %{ "MOV    $dst,$mem" %}
-  opcode(0x8B);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_mem );
-%}
-
-
-// Load Pointer
-instruct loadP(eRegP dst, memory mem) %{
-  match(Set dst (LoadP mem));
-
-  ins_cost(125);
-  format %{ "MOV    $dst,$mem" %}
-  opcode(0x8B);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_mem );
-%}
-
-// Load Klass Pointer
-instruct loadKlass(eRegP dst, memory mem) %{
-  match(Set dst (LoadKlass mem));
-
-  ins_cost(125);
-  format %{ "MOV    $dst,$mem" %}
-  opcode(0x8B);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_mem );
-%}
-
-// Load Double
-instruct loadDPR(regDPR dst, memory mem) %{
-  predicate(UseSSE<=1);
-  match(Set dst (LoadD mem));
-
-  ins_cost(150);
-  format %{ "FLD_D  ST,$mem\n\t"
-            "FSTP   $dst" %}
-  opcode(0xDD);               /* DD /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_DPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-// Load Double to XMM
-instruct loadD(regD dst, memory mem) %{
-  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
-  match(Set dst (LoadD mem));
-  ins_cost(145);
-  format %{ "MOVSD  $dst,$mem" %}
-  ins_encode %{
-    __ movdbl ($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct loadD_partial(regD dst, memory mem) %{
-  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
-  match(Set dst (LoadD mem));
-  ins_cost(145);
-  format %{ "MOVLPD $dst,$mem" %}
-  ins_encode %{
-    __ movdbl ($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load to XMM register (single-precision floating point)
-// MOVSS instruction
-instruct loadF(regF dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (LoadF mem));
-  ins_cost(145);
-  format %{ "MOVSS  $dst,$mem" %}
-  ins_encode %{
-    __ movflt ($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Float
-instruct loadFPR(regFPR dst, memory mem) %{
-  predicate(UseSSE==0);
-  match(Set dst (LoadF mem));
-
-  ins_cost(150);
-  format %{ "FLD_S  ST,$mem\n\t"
-            "FSTP   $dst" %}
-  opcode(0xD9);               /* D9 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-// Load Effective Address
-instruct leaP8(eRegP dst, indOffset8 mem) %{
-  match(Set dst mem);
-
-  ins_cost(110);
-  format %{ "LEA    $dst,$mem" %}
-  opcode(0x8D);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_reg_fat );
-%}
-
-instruct leaP32(eRegP dst, indOffset32 mem) %{
-  match(Set dst mem);
-
-  ins_cost(110);
-  format %{ "LEA    $dst,$mem" %}
-  opcode(0x8D);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_reg_fat );
-%}
-
-instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
-  match(Set dst mem);
-
-  ins_cost(110);
-  format %{ "LEA    $dst,$mem" %}
-  opcode(0x8D);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_reg_fat );
-%}
-
-instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
-  match(Set dst mem);
-
-  ins_cost(110);
-  format %{ "LEA    $dst,$mem" %}
-  opcode(0x8D);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_reg_fat );
-%}
-
-instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
-  match(Set dst mem);
-
-  ins_cost(110);
-  format %{ "LEA    $dst,$mem" %}
-  opcode(0x8D);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
-  ins_pipe( ialu_reg_reg_fat );
-%}
-
-// Load Constant
-instruct loadConI(rRegI dst, immI src) %{
-  match(Set dst src);
-
-  format %{ "MOV    $dst,$src" %}
-  ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_fat );
-%}
-
-// Load Constant zero
-instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
-  match(Set dst src);
-  effect(KILL cr);
-
-  ins_cost(50);
-  format %{ "XOR    $dst,$dst" %}
-  opcode(0x33);  /* + rd */
-  ins_encode( OpcP, RegReg( dst, dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct loadConP(eRegP dst, immP src) %{
-  match(Set dst src);
-
-  format %{ "MOV    $dst,$src" %}
-  opcode(0xB8);  /* + rd */
-  ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_fat );
-%}
-
-instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
-  match(Set dst src);
-  effect(KILL cr);
-  ins_cost(200);
-  format %{ "MOV    $dst.lo,$src.lo\n\t"
-            "MOV    $dst.hi,$src.hi" %}
-  opcode(0xB8);
-  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
-  ins_pipe( ialu_reg_long_fat );
-%}
-
-instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
-  match(Set dst src);
-  effect(KILL cr);
-  ins_cost(150);
-  format %{ "XOR    $dst.lo,$dst.lo\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-  opcode(0x33,0x33);
-  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// The instruction usage is guarded by predicate in operand immFPR().
-instruct loadConFPR(regFPR dst, immFPR con) %{
-  match(Set dst con);
-  ins_cost(125);
-  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
-            "FSTP   $dst" %}
-  ins_encode %{
-    __ fld_s($constantaddress($con));
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_con);
-%}
-
-// The instruction usage is guarded by predicate in operand immFPR0().
-instruct loadConFPR0(regFPR dst, immFPR0 con) %{
-  match(Set dst con);
-  ins_cost(125);
-  format %{ "FLDZ   ST\n\t"
-            "FSTP   $dst" %}
-  ins_encode %{
-    __ fldz();
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_con);
-%}
-
-// The instruction usage is guarded by predicate in operand immFPR1().
-instruct loadConFPR1(regFPR dst, immFPR1 con) %{
-  match(Set dst con);
-  ins_cost(125);
-  format %{ "FLD1   ST\n\t"
-            "FSTP   $dst" %}
-  ins_encode %{
-    __ fld1();
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_con);
-%}
-
-// The instruction usage is guarded by predicate in operand immF().
-instruct loadConF(regF dst, immF con) %{
-  match(Set dst con);
-  ins_cost(125);
-  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
-  ins_encode %{
-    __ movflt($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-// The instruction usage is guarded by predicate in operand immF0().
-instruct loadConF0(regF dst, immF0 src) %{
-  match(Set dst src);
-  ins_cost(100);
-  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
-  ins_encode %{
-    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-// The instruction usage is guarded by predicate in operand immDPR().
-instruct loadConDPR(regDPR dst, immDPR con) %{
-  match(Set dst con);
-  ins_cost(125);
-
-  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
-            "FSTP   $dst" %}
-  ins_encode %{
-    __ fld_d($constantaddress($con));
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_con);
-%}
-
-// The instruction usage is guarded by predicate in operand immDPR0().
-instruct loadConDPR0(regDPR dst, immDPR0 con) %{
-  match(Set dst con);
-  ins_cost(125);
-
-  format %{ "FLDZ   ST\n\t"
-            "FSTP   $dst" %}
-  ins_encode %{
-    __ fldz();
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_con);
-%}
-
-// The instruction usage is guarded by predicate in operand immDPR1().
-instruct loadConDPR1(regDPR dst, immDPR1 con) %{
-  match(Set dst con);
-  ins_cost(125);
-
-  format %{ "FLD1   ST\n\t"
-            "FSTP   $dst" %}
-  ins_encode %{
-    __ fld1();
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_con);
-%}
-
-// The instruction usage is guarded by predicate in operand immD().
-instruct loadConD(regD dst, immD con) %{
-  match(Set dst con);
-  ins_cost(125);
-  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
-  ins_encode %{
-    __ movdbl($dst$$XMMRegister, $constantaddress($con));
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-// The instruction usage is guarded by predicate in operand immD0().
-instruct loadConD0(regD dst, immD0 src) %{
-  match(Set dst src);
-  ins_cost(100);
-  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
-  ins_encode %{
-    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Stack Slot
-instruct loadSSI(rRegI dst, stackSlotI src) %{
-  match(Set dst src);
-  ins_cost(125);
-
-  format %{ "MOV    $dst,$src" %}
-  opcode(0x8B);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
-  ins_pipe( ialu_reg_mem );
-%}
-
-instruct loadSSL(eRegL dst, stackSlotL src) %{
-  match(Set dst src);
-
-  ins_cost(200);
-  format %{ "MOV    $dst,$src.lo\n\t"
-            "MOV    $dst+4,$src.hi" %}
-  opcode(0x8B, 0x8B);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
-  ins_pipe( ialu_mem_long_reg );
-%}
-
-// Load Stack Slot
-instruct loadSSP(eRegP dst, stackSlotP src) %{
-  match(Set dst src);
-  ins_cost(125);
-
-  format %{ "MOV    $dst,$src" %}
-  opcode(0x8B);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
-  ins_pipe( ialu_reg_mem );
-%}
-
-// Load Stack Slot
-instruct loadSSF(regFPR dst, stackSlotF src) %{
-  match(Set dst src);
-  ins_cost(125);
-
-  format %{ "FLD_S  $src\n\t"
-            "FSTP   $dst" %}
-  opcode(0xD9);               /* D9 /0, FLD m32real */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-// Load Stack Slot
-instruct loadSSD(regDPR dst, stackSlotD src) %{
-  match(Set dst src);
-  ins_cost(125);
-
-  format %{ "FLD_D  $src\n\t"
-            "FSTP   $dst" %}
-  opcode(0xDD);               /* DD /0, FLD m64real */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_DPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-// Prefetch instructions for allocation.
-// Must be safe to execute with invalid address (cannot fault).
-
-instruct prefetchAlloc0( memory mem ) %{
-  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
-  match(PrefetchAllocation mem);
-  ins_cost(0);
-  size(0);
-  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
-  ins_encode();
-  ins_pipe(empty);
-%}
-
-instruct prefetchAlloc( memory mem ) %{
-  predicate(AllocatePrefetchInstr==3);
-  match( PrefetchAllocation mem );
-  ins_cost(100);
-
-  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
-  ins_encode %{
-    __ prefetchw($mem$$Address);
-  %}
-  ins_pipe(ialu_mem);
-%}
-
-instruct prefetchAllocNTA( memory mem ) %{
-  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
-  match(PrefetchAllocation mem);
-  ins_cost(100);
-
-  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
-  ins_encode %{
-    __ prefetchnta($mem$$Address);
-  %}
-  ins_pipe(ialu_mem);
-%}
-
-instruct prefetchAllocT0( memory mem ) %{
-  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
-  match(PrefetchAllocation mem);
-  ins_cost(100);
-
-  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
-  ins_encode %{
-    __ prefetcht0($mem$$Address);
-  %}
-  ins_pipe(ialu_mem);
-%}
-
-instruct prefetchAllocT2( memory mem ) %{
-  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
-  match(PrefetchAllocation mem);
-  ins_cost(100);
-
-  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
-  ins_encode %{
-    __ prefetcht2($mem$$Address);
-  %}
-  ins_pipe(ialu_mem);
-%}
-
-//----------Store Instructions-------------------------------------------------
-
-// Store Byte
-instruct storeB(memory mem, xRegI src) %{
-  match(Set mem (StoreB mem src));
-
-  ins_cost(125);
-  format %{ "MOV8   $mem,$src" %}
-  opcode(0x88);
-  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Store Char/Short
-instruct storeC(memory mem, rRegI src) %{
-  match(Set mem (StoreC mem src));
-
-  ins_cost(125);
-  format %{ "MOV16  $mem,$src" %}
-  opcode(0x89, 0x66);
-  ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Store Integer
-instruct storeI(memory mem, rRegI src) %{
-  match(Set mem (StoreI mem src));
-
-  ins_cost(125);
-  format %{ "MOV    $mem,$src" %}
-  opcode(0x89);
-  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Store Long
-instruct storeL(long_memory mem, eRegL src) %{
-  predicate(!((StoreLNode*)n)->require_atomic_access());
-  match(Set mem (StoreL mem src));
-
-  ins_cost(200);
-  format %{ "MOV    $mem,$src.lo\n\t"
-            "MOV    $mem+4,$src.hi" %}
-  opcode(0x89, 0x89);
-  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
-  ins_pipe( ialu_mem_long_reg );
-%}
-
-// Store Long to Integer
-instruct storeL2I(memory mem, eRegL src) %{
-  match(Set mem (StoreI mem (ConvL2I src)));
-
-  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
-  ins_encode %{
-    __ movl($mem$$Address, $src$$Register);
-  %}
-  ins_pipe(ialu_mem_reg);
-%}
-
-// Volatile Store Long.  Must be atomic, so move it into
-// the FP TOS and then do a 64-bit FIST.  Has to probe the
-// target address before the store (for null-ptr checks)
-// so the memory operand is used twice in the encoding.
-instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
-  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
-  match(Set mem (StoreL mem src));
-  effect( KILL cr );
-  ins_cost(400);
-  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
-            "FILD   $src\n\t"
-            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
-  opcode(0x3B);
-  ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
-  ins_pipe( fpu_reg_mem );
-%}
-
-instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
-  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
-  match(Set mem (StoreL mem src));
-  effect( TEMP tmp, KILL cr );
-  ins_cost(380);
-  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
-            "MOVSD  $tmp,$src\n\t"
-            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
-  ins_encode %{
-    __ cmpl(rax, $mem$$Address);
-    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
-    __ movdbl($mem$$Address, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
-  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
-  match(Set mem (StoreL mem src));
-  effect( TEMP tmp2 , TEMP tmp, KILL cr );
-  ins_cost(360);
-  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
-            "MOVD   $tmp,$src.lo\n\t"
-            "MOVD   $tmp2,$src.hi\n\t"
-            "PUNPCKLDQ $tmp,$tmp2\n\t"
-            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
-  ins_encode %{
-    __ cmpl(rax, $mem$$Address);
-    __ movdl($tmp$$XMMRegister, $src$$Register);
-    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
-    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
-    __ movdbl($mem$$Address, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Pointer; for storing unknown oops and raw pointers
-instruct storeP(memory mem, anyRegP src) %{
-  match(Set mem (StoreP mem src));
-
-  ins_cost(125);
-  format %{ "MOV    $mem,$src" %}
-  opcode(0x89);
-  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Store Integer Immediate
-instruct storeImmI(memory mem, immI src) %{
-  match(Set mem (StoreI mem src));
-
-  ins_cost(150);
-  format %{ "MOV    $mem,$src" %}
-  opcode(0xC7);               /* C7 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Store Short/Char Immediate
-instruct storeImmI16(memory mem, immI16 src) %{
-  predicate(UseStoreImmI16);
-  match(Set mem (StoreC mem src));
-
-  ins_cost(150);
-  format %{ "MOV16  $mem,$src" %}
-  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
-  ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Store Pointer Immediate; null pointers or constant oops that do not
-// need card-mark barriers.
-instruct storeImmP(memory mem, immP src) %{
-  match(Set mem (StoreP mem src));
-
-  ins_cost(150);
-  format %{ "MOV    $mem,$src" %}
-  opcode(0xC7);               /* C7 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Store Byte Immediate
-instruct storeImmB(memory mem, immI8 src) %{
-  match(Set mem (StoreB mem src));
-
-  ins_cost(150);
-  format %{ "MOV8   $mem,$src" %}
-  opcode(0xC6);               /* C6 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Store Double
-instruct storeDPR( memory mem, regDPR1 src) %{
-  predicate(UseSSE<=1);
-  match(Set mem (StoreD mem src));
-
-  ins_cost(100);
-  format %{ "FST_D  $mem,$src" %}
-  opcode(0xDD);       /* DD /2 */
-  ins_encode( enc_FPR_store(mem,src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-// Store double does rounding on x86
-instruct storeDPR_rounded( memory mem, regDPR1 src) %{
-  predicate(UseSSE<=1);
-  match(Set mem (StoreD mem (RoundDouble src)));
-
-  ins_cost(100);
-  format %{ "FST_D  $mem,$src\t# round" %}
-  opcode(0xDD);       /* DD /2 */
-  ins_encode( enc_FPR_store(mem,src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-// Store XMM register to memory (double-precision floating points)
-// MOVSD instruction
-instruct storeD(memory mem, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set mem (StoreD mem src));
-  ins_cost(95);
-  format %{ "MOVSD  $mem,$src" %}
-  ins_encode %{
-    __ movdbl($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store XMM register to memory (single-precision floating point)
-// MOVSS instruction
-instruct storeF(memory mem, regF src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (StoreF mem src));
-  ins_cost(95);
-  format %{ "MOVSS  $mem,$src" %}
-  ins_encode %{
-    __ movflt($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-
-// Store Float
-instruct storeFPR( memory mem, regFPR1 src) %{
-  predicate(UseSSE==0);
-  match(Set mem (StoreF mem src));
-
-  ins_cost(100);
-  format %{ "FST_S  $mem,$src" %}
-  opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FPR_store(mem,src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-// Store Float does rounding on x86
-instruct storeFPR_rounded( memory mem, regFPR1 src) %{
-  predicate(UseSSE==0);
-  match(Set mem (StoreF mem (RoundFloat src)));
-
-  ins_cost(100);
-  format %{ "FST_S  $mem,$src\t# round" %}
-  opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FPR_store(mem,src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-// Store Float does rounding on x86
-instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
-  predicate(UseSSE<=1);
-  match(Set mem (StoreF mem (ConvD2F src)));
-
-  ins_cost(100);
-  format %{ "FST_S  $mem,$src\t# D-round" %}
-  opcode(0xD9);       /* D9 /2 */
-  ins_encode( enc_FPR_store(mem,src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-// Store immediate Float value (it is faster than store from FPU register)
-// The instruction usage is guarded by predicate in operand immFPR().
-instruct storeFPR_imm( memory mem, immFPR src) %{
-  match(Set mem (StoreF mem src));
-
-  ins_cost(50);
-  format %{ "MOV    $mem,$src\t# store float" %}
-  opcode(0xC7);               /* C7 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Store immediate Float value (it is faster than store from XMM register)
-// The instruction usage is guarded by predicate in operand immF().
-instruct storeF_imm( memory mem, immF src) %{
-  match(Set mem (StoreF mem src));
-
-  ins_cost(50);
-  format %{ "MOV    $mem,$src\t# store float" %}
-  opcode(0xC7);               /* C7 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Store Integer to stack slot
-instruct storeSSI(stackSlotI dst, rRegI src) %{
-  match(Set dst src);
-
-  ins_cost(100);
-  format %{ "MOV    $dst,$src" %}
-  opcode(0x89);
-  ins_encode( OpcPRegSS( dst, src ) );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Store Integer to stack slot
-instruct storeSSP(stackSlotP dst, eRegP src) %{
-  match(Set dst src);
-
-  ins_cost(100);
-  format %{ "MOV    $dst,$src" %}
-  opcode(0x89);
-  ins_encode( OpcPRegSS( dst, src ) );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Store Long to stack slot
-instruct storeSSL(stackSlotL dst, eRegL src) %{
-  match(Set dst src);
-
-  ins_cost(200);
-  format %{ "MOV    $dst,$src.lo\n\t"
-            "MOV    $dst+4,$src.hi" %}
-  opcode(0x89, 0x89);
-  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
-  ins_pipe( ialu_mem_long_reg );
-%}
-
-//----------MemBar Instructions-----------------------------------------------
-// Memory barrier flavors
-
-instruct membar_acquire() %{
-  match(MemBarAcquire);
-  match(LoadFence);
-  ins_cost(400);
-
-  size(0);
-  format %{ "MEMBAR-acquire ! (empty encoding)" %}
-  ins_encode();
-  ins_pipe(empty);
-%}
-
-instruct membar_acquire_lock() %{
-  match(MemBarAcquireLock);
-  ins_cost(0);
-
-  size(0);
-  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
-  ins_encode( );
-  ins_pipe(empty);
-%}
-
-instruct membar_release() %{
-  match(MemBarRelease);
-  match(StoreFence);
-  ins_cost(400);
-
-  size(0);
-  format %{ "MEMBAR-release ! (empty encoding)" %}
-  ins_encode( );
-  ins_pipe(empty);
-%}
-
-instruct membar_release_lock() %{
-  match(MemBarReleaseLock);
-  ins_cost(0);
-
-  size(0);
-  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
-  ins_encode( );
-  ins_pipe(empty);
-%}
-
-instruct membar_volatile(eFlagsReg cr) %{
-  match(MemBarVolatile);
-  effect(KILL cr);
-  ins_cost(400);
-
-  format %{
-    $$template
-    $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
-  %}
-  ins_encode %{
-    __ membar(Assembler::StoreLoad);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct unnecessary_membar_volatile() %{
-  match(MemBarVolatile);
-  predicate(Matcher::post_store_load_barrier(n));
-  ins_cost(0);
-
-  size(0);
-  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
-  ins_encode( );
-  ins_pipe(empty);
-%}
-
-instruct membar_storestore() %{
-  match(MemBarStoreStore);
-  match(StoreStoreFence);
-  ins_cost(0);
-
-  size(0);
-  format %{ "MEMBAR-storestore (empty encoding)" %}
-  ins_encode( );
-  ins_pipe(empty);
-%}
-
-//----------Move Instructions--------------------------------------------------
-instruct castX2P(eAXRegP dst, eAXRegI src) %{
-  match(Set dst (CastX2P src));
-  format %{ "# X2P  $dst, $src" %}
-  ins_encode( /*empty encoding*/ );
-  ins_cost(0);
-  ins_pipe(empty);
-%}
-
-instruct castP2X(rRegI dst, eRegP src ) %{
-  match(Set dst (CastP2X src));
-  ins_cost(50);
-  format %{ "MOV    $dst, $src\t# CastP2X" %}
-  ins_encode( enc_Copy( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-//----------Conditional Move---------------------------------------------------
-// Conditional move
-instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
-  predicate(!VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "J$cop,us skip\t# signed cmove\n\t"
-            "MOV    $dst,$src\n"
-      "skip:" %}
-  ins_encode %{
-    Label Lskip;
-    // Invert sense of branch from sense of CMOV
-    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
-    __ movl($dst$$Register, $src$$Register);
-    __ bind(Lskip);
-  %}
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
-  predicate(!VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
-            "MOV    $dst,$src\n"
-      "skip:" %}
-  ins_encode %{
-    Label Lskip;
-    // Invert sense of branch from sense of CMOV
-    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
-    __ movl($dst$$Register, $src$$Register);
-    __ bind(Lskip);
-  %}
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cop $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cop $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovI_regU(cop, cr, dst, src);
-  %}
-%}
-
-// Conditional move
-instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
-  ins_cost(250);
-  format %{ "CMOV$cop $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
-  ins_pipe( pipe_cmov_mem );
-%}
-
-// Conditional move
-instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
-  ins_cost(250);
-  format %{ "CMOV$cop $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
-  ins_pipe( pipe_cmov_mem );
-%}
-
-instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
-  ins_cost(250);
-  expand %{
-    cmovI_memU(cop, cr, dst, src);
-  %}
-%}
-
-// Conditional move
-instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cop $dst,$src\t# ptr" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-// Conditional move (non-P6 version)
-// Note:  a CMoveP is generated for  stubs and native wrappers
-//        regardless of whether we are on a P6, so we
-//        emulate a cmov here
-instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
-  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
-  ins_cost(300);
-  format %{ "Jn$cop   skip\n\t"
-          "MOV    $dst,$src\t# pointer\n"
-      "skip:" %}
-  opcode(0x8b);
-  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
-  ins_pipe( pipe_cmov_reg );
-%}
-
-// Conditional move
-instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cop $dst,$src\t# ptr" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovP_regU(cop, cr, dst, src);
-  %}
-%}
-
-// DISABLED: Requires the ADLC to emit a bottom_type call that
-// correctly meets the two pointer arguments; one is an incoming
-// register but the other is a memory operand.  ALSO appears to
-// be buggy with implicit null checks.
-//
-//// Conditional move
-//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
-//  predicate(VM_Version::supports_cmov() );
-//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
-//  ins_cost(250);
-//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
-//  opcode(0x0F,0x40);
-//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
-//  ins_pipe( pipe_cmov_mem );
-//%}
-//
-//// Conditional move
-//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
-//  predicate(VM_Version::supports_cmov() );
-//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
-//  ins_cost(250);
-//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
-//  opcode(0x0F,0x40);
-//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
-//  ins_pipe( pipe_cmov_mem );
-//%}
-
-// Conditional move
-instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "FCMOV$cop $dst,$src\t# double" %}
-  opcode(0xDA);
-  ins_encode( enc_cmov_dpr(cop,src) );
-  ins_pipe( pipe_cmovDPR_reg );
-%}
-
-// Conditional move
-instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
-  predicate(UseSSE==0);
-  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "FCMOV$cop $dst,$src\t# float" %}
-  opcode(0xDA);
-  ins_encode( enc_cmov_dpr(cop,src) );
-  ins_pipe( pipe_cmovDPR_reg );
-%}
-
-// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "Jn$cop   skip\n\t"
-            "MOV    $dst,$src\t# double\n"
-      "skip:" %}
-  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
-  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
-  ins_pipe( pipe_cmovDPR_reg );
-%}
-
-// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
-  predicate(UseSSE==0);
-  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "Jn$cop    skip\n\t"
-            "MOV    $dst,$src\t# float\n"
-      "skip:" %}
-  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
-  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
-  ins_pipe( pipe_cmovDPR_reg );
-%}
-
-// No CMOVE with SSE/SSE2
-instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
-  predicate (UseSSE>=1);
-  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "Jn$cop   skip\n\t"
-            "MOVSS  $dst,$src\t# float\n"
-      "skip:" %}
-  ins_encode %{
-    Label skip;
-    // Invert sense of branch from sense of CMOV
-    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
-    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
-    __ bind(skip);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// No CMOVE with SSE/SSE2
-instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
-  predicate (UseSSE>=2);
-  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "Jn$cop   skip\n\t"
-            "MOVSD  $dst,$src\t# float\n"
-      "skip:" %}
-  ins_encode %{
-    Label skip;
-    // Invert sense of branch from sense of CMOV
-    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
-    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
-    __ bind(skip);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// unsigned version
-instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
-  predicate (UseSSE>=1);
-  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "Jn$cop   skip\n\t"
-            "MOVSS  $dst,$src\t# float\n"
-      "skip:" %}
-  ins_encode %{
-    Label skip;
-    // Invert sense of branch from sense of CMOV
-    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
-    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
-    __ bind(skip);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
-  predicate (UseSSE>=1);
-  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovF_regU(cop, cr, dst, src);
-  %}
-%}
-
-// unsigned version
-instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
-  predicate (UseSSE>=2);
-  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "Jn$cop   skip\n\t"
-            "MOVSD  $dst,$src\t# float\n"
-      "skip:" %}
-  ins_encode %{
-    Label skip;
-    // Invert sense of branch from sense of CMOV
-    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
-    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
-    __ bind(skip);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
-  predicate (UseSSE>=2);
-  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovD_regU(cop, cr, dst, src);
-  %}
-%}
-
-instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
-            "CMOV$cop $dst.hi,$src.hi" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
-            "CMOV$cop $dst.hi,$src.hi" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
-  predicate(VM_Version::supports_cmov() );
-  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovL_regU(cop, cr, dst, src);
-  %}
-%}
-
-//----------Arithmetic Instructions--------------------------------------------
-//----------Addition Instructions----------------------------------------------
-
-// Integer Addition Instructions
-instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (AddI dst src));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "ADD    $dst,$src" %}
-  opcode(0x03);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
-  match(Set dst (AddI dst src));
-  effect(KILL cr);
-
-  format %{ "ADD    $dst,$src" %}
-  opcode(0x81, 0x00); /* /0 id */
-  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
-  predicate(UseIncDec);
-  match(Set dst (AddI dst src));
-  effect(KILL cr);
-
-  size(1);
-  format %{ "INC    $dst" %}
-  opcode(0x40); /*  */
-  ins_encode( Opc_plus( primary, dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
-  match(Set dst (AddI src0 src1));
-  ins_cost(110);
-
-  format %{ "LEA    $dst,[$src0 + $src1]" %}
-  opcode(0x8D); /* 0x8D /r */
-  ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
-  match(Set dst (AddP src0 src1));
-  ins_cost(110);
-
-  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
-  opcode(0x8D); /* 0x8D /r */
-  ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
-  predicate(UseIncDec);
-  match(Set dst (AddI dst src));
-  effect(KILL cr);
-
-  size(1);
-  format %{ "DEC    $dst" %}
-  opcode(0x48); /*  */
-  ins_encode( Opc_plus( primary, dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (AddP dst src));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "ADD    $dst,$src" %}
-  opcode(0x03);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
-  match(Set dst (AddP dst src));
-  effect(KILL cr);
-
-  format %{ "ADD    $dst,$src" %}
-  opcode(0x81,0x00); /* Opcode 81 /0 id */
-  // ins_encode( RegImm( dst, src) );
-  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
-  match(Set dst (AddI dst (LoadI src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "ADD    $dst,$src" %}
-  opcode(0x03);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_mem );
-%}
-
-instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "ADD    $dst,$src" %}
-  opcode(0x01);  /* Opcode 01 /r */
-  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Add Memory with Immediate
-instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "ADD    $dst,$src" %}
-  opcode(0x81);               /* Opcode 81 /0 id */
-  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
-  ins_pipe( ialu_mem_imm );
-%}
-
-instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "INC    $dst" %}
-  opcode(0xFF);               /* Opcode FF /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "DEC    $dst" %}
-  opcode(0xFF);               /* Opcode FF /1 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
-  ins_pipe( ialu_mem_imm );
-%}
-
-
-instruct checkCastPP( eRegP dst ) %{
-  match(Set dst (CheckCastPP dst));
-
-  size(0);
-  format %{ "#checkcastPP of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe( empty );
-%}
-
-instruct castPP( eRegP dst ) %{
-  match(Set dst (CastPP dst));
-  format %{ "#castPP of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_pipe( empty );
-%}
-
-instruct castII( rRegI dst ) %{
-  match(Set dst (CastII dst));
-  format %{ "#castII of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_cost(0);
-  ins_pipe( empty );
-%}
-
-instruct castLL( eRegL dst ) %{
-  match(Set dst (CastLL dst));
-  format %{ "#castLL of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_cost(0);
-  ins_pipe( empty );
-%}
-
-instruct castFF( regF dst ) %{
-  predicate(UseSSE >= 1);
-  match(Set dst (CastFF dst));
-  format %{ "#castFF of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_cost(0);
-  ins_pipe( empty );
-%}
-
-instruct castDD( regD dst ) %{
-  predicate(UseSSE >= 2);
-  match(Set dst (CastDD dst));
-  format %{ "#castDD of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_cost(0);
-  ins_pipe( empty );
-%}
-
-instruct castFF_PR( regFPR dst ) %{
-  predicate(UseSSE < 1);
-  match(Set dst (CastFF dst));
-  format %{ "#castFF of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_cost(0);
-  ins_pipe( empty );
-%}
-
-instruct castDD_PR( regDPR dst ) %{
-  predicate(UseSSE < 2);
-  match(Set dst (CastDD dst));
-  format %{ "#castDD of $dst" %}
-  ins_encode( /*empty encoding*/ );
-  ins_cost(0);
-  ins_pipe( empty );
-%}
-
-// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
-
-instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
-  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
-  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
-  effect(KILL cr, KILL oldval);
-  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
-            "MOV    $res,0\n\t"
-            "JNE,s  fail\n\t"
-            "MOV    $res,1\n"
-          "fail:" %}
-  ins_encode( enc_cmpxchg8(mem_ptr),
-              enc_flags_ne_to_boolean(res) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
-  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
-  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
-  effect(KILL cr, KILL oldval);
-  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
-            "MOV    $res,0\n\t"
-            "JNE,s  fail\n\t"
-            "MOV    $res,1\n"
-          "fail:" %}
-  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
-  match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
-  match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
-  effect(KILL cr, KILL oldval);
-  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
-            "MOV    $res,0\n\t"
-            "JNE,s  fail\n\t"
-            "MOV    $res,1\n"
-          "fail:" %}
-  ins_encode( enc_cmpxchgb(mem_ptr),
-              enc_flags_ne_to_boolean(res) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
-  match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
-  match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
-  effect(KILL cr, KILL oldval);
-  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
-            "MOV    $res,0\n\t"
-            "JNE,s  fail\n\t"
-            "MOV    $res,1\n"
-          "fail:" %}
-  ins_encode( enc_cmpxchgw(mem_ptr),
-              enc_flags_ne_to_boolean(res) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
-  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
-  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
-  effect(KILL cr, KILL oldval);
-  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
-            "MOV    $res,0\n\t"
-            "JNE,s  fail\n\t"
-            "MOV    $res,1\n"
-          "fail:" %}
-  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
-  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
-  effect(KILL cr);
-  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
-  ins_encode( enc_cmpxchg8(mem_ptr) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
-  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
-  effect(KILL cr);
-  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
-  ins_encode( enc_cmpxchg(mem_ptr) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
-  match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
-  effect(KILL cr);
-  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
-  ins_encode( enc_cmpxchgb(mem_ptr) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
-  match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
-  effect(KILL cr);
-  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
-  ins_encode( enc_cmpxchgw(mem_ptr) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
-  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
-  effect(KILL cr);
-  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
-  ins_encode( enc_cmpxchg(mem_ptr) );
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
-  predicate(n->as_LoadStore()->result_not_used());
-  match(Set dummy (GetAndAddB mem add));
-  effect(KILL cr);
-  format %{ "ADDB  [$mem],$add" %}
-  ins_encode %{
-    __ lock();
-    __ addb($mem$$Address, $add$$constant);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-// Important to match to xRegI: only 8-bit regs.
-instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
-  match(Set newval (GetAndAddB mem newval));
-  effect(KILL cr);
-  format %{ "XADDB  [$mem],$newval" %}
-  ins_encode %{
-    __ lock();
-    __ xaddb($mem$$Address, $newval$$Register);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
-  predicate(n->as_LoadStore()->result_not_used());
-  match(Set dummy (GetAndAddS mem add));
-  effect(KILL cr);
-  format %{ "ADDS  [$mem],$add" %}
-  ins_encode %{
-    __ lock();
-    __ addw($mem$$Address, $add$$constant);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
-  match(Set newval (GetAndAddS mem newval));
-  effect(KILL cr);
-  format %{ "XADDS  [$mem],$newval" %}
-  ins_encode %{
-    __ lock();
-    __ xaddw($mem$$Address, $newval$$Register);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
-  predicate(n->as_LoadStore()->result_not_used());
-  match(Set dummy (GetAndAddI mem add));
-  effect(KILL cr);
-  format %{ "ADDL  [$mem],$add" %}
-  ins_encode %{
-    __ lock();
-    __ addl($mem$$Address, $add$$constant);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
-  match(Set newval (GetAndAddI mem newval));
-  effect(KILL cr);
-  format %{ "XADDL  [$mem],$newval" %}
-  ins_encode %{
-    __ lock();
-    __ xaddl($mem$$Address, $newval$$Register);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-// Important to match to xRegI: only 8-bit regs.
-instruct xchgB( memory mem, xRegI newval) %{
-  match(Set newval (GetAndSetB mem newval));
-  format %{ "XCHGB  $newval,[$mem]" %}
-  ins_encode %{
-    __ xchgb($newval$$Register, $mem$$Address);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xchgS( memory mem, rRegI newval) %{
-  match(Set newval (GetAndSetS mem newval));
-  format %{ "XCHGW  $newval,[$mem]" %}
-  ins_encode %{
-    __ xchgw($newval$$Register, $mem$$Address);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xchgI( memory mem, rRegI newval) %{
-  match(Set newval (GetAndSetI mem newval));
-  format %{ "XCHGL  $newval,[$mem]" %}
-  ins_encode %{
-    __ xchgl($newval$$Register, $mem$$Address);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-instruct xchgP( memory mem, pRegP newval) %{
-  match(Set newval (GetAndSetP mem newval));
-  format %{ "XCHGL  $newval,[$mem]" %}
-  ins_encode %{
-    __ xchgl($newval$$Register, $mem$$Address);
-  %}
-  ins_pipe( pipe_cmpxchg );
-%}
-
-//----------Subtraction Instructions-------------------------------------------
-
-// Integer Subtraction Instructions
-instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (SubI dst src));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SUB    $dst,$src" %}
-  opcode(0x2B);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
-  match(Set dst (SubI dst src));
-  effect(KILL cr);
-
-  format %{ "SUB    $dst,$src" %}
-  opcode(0x81,0x05);  /* Opcode 81 /5 */
-  // ins_encode( RegImm( dst, src) );
-  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
-  match(Set dst (SubI dst (LoadI src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "SUB    $dst,$src" %}
-  opcode(0x2B);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_mem );
-%}
-
-instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "SUB    $dst,$src" %}
-  opcode(0x29);  /* Opcode 29 /r */
-  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Subtract from a pointer
-instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
-  match(Set dst (AddP dst (SubI zero src)));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SUB    $dst,$src" %}
-  opcode(0x2B);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
-  match(Set dst (SubI zero dst));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "NEG    $dst" %}
-  opcode(0xF7,0x03);  // Opcode F7 /3
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-//----------Multiplication/Division Instructions-------------------------------
-// Integer Multiplication Instructions
-// Multiply Register
-instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (MulI dst src));
-  effect(KILL cr);
-
-  size(3);
-  ins_cost(300);
-  format %{ "IMUL   $dst,$src" %}
-  opcode(0xAF, 0x0F);
-  ins_encode( OpcS, OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg_alu0 );
-%}
-
-// Multiply 32-bit Immediate
-instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
-  match(Set dst (MulI src imm));
-  effect(KILL cr);
-
-  ins_cost(300);
-  format %{ "IMUL   $dst,$src,$imm" %}
-  opcode(0x69);  /* 69 /r id */
-  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
-  ins_pipe( ialu_reg_reg_alu0 );
-%}
-
-instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
-  match(Set dst src);
-  effect(KILL cr);
-
-  // Note that this is artificially increased to make it more expensive than loadConL
-  ins_cost(250);
-  format %{ "MOV    EAX,$src\t// low word only" %}
-  opcode(0xB8);
-  ins_encode( LdImmL_Lo(dst, src) );
-  ins_pipe( ialu_reg_fat );
-%}
-
-// Multiply by 32-bit Immediate, taking the shifted high order results
-//  (special case for shift by 32)
-instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
-  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
-  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
-             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
-             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
-  effect(USE src1, KILL cr);
-
-  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
-  ins_cost(0*100 + 1*400 - 150);
-  format %{ "IMUL   EDX:EAX,$src1" %}
-  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply by 32-bit Immediate, taking the shifted high order results
-instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
-  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
-  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
-             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
-             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
-  effect(USE src1, KILL cr);
-
-  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
-  ins_cost(1*100 + 1*400 - 150);
-  format %{ "IMUL   EDX:EAX,$src1\n\t"
-            "SAR    EDX,$cnt-32" %}
-  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply Memory 32-bit Immediate
-instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
-  match(Set dst (MulI (LoadI src) imm));
-  effect(KILL cr);
-
-  ins_cost(300);
-  format %{ "IMUL   $dst,$src,$imm" %}
-  opcode(0x69);  /* 69 /r id */
-  ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
-  ins_pipe( ialu_reg_mem_alu0 );
-%}
-
-// Multiply Memory
-instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
-  match(Set dst (MulI dst (LoadI src)));
-  effect(KILL cr);
-
-  ins_cost(350);
-  format %{ "IMUL   $dst,$src" %}
-  opcode(0xAF, 0x0F);
-  ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_mem_alu0 );
-%}
-
-instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
-%{
-  match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
-  effect(KILL cr, KILL src2);
-
-  expand %{ mulI_eReg(dst, src1, cr);
-           mulI_eReg(src2, src3, cr);
-           addI_eReg(dst, src2, cr); %}
-%}
-
-// Multiply Register Int to Long
-instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
-  // Basic Idea: long = (long)int * (long)int
-  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
-  effect(DEF dst, USE src, USE src1, KILL flags);
-
-  ins_cost(300);
-  format %{ "IMUL   $dst,$src1" %}
-
-  ins_encode( long_int_multiply( dst, src1 ) );
-  ins_pipe( ialu_reg_reg_alu0 );
-%}
-
-instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
-  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
-  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
-  effect(KILL flags);
-
-  ins_cost(300);
-  format %{ "MUL    $dst,$src1" %}
-
-  ins_encode( long_uint_multiply(dst, src1) );
-  ins_pipe( ialu_reg_reg_alu0 );
-%}
-
-// Multiply Register Long
-instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
-  match(Set dst (MulL dst src));
-  effect(KILL cr, TEMP tmp);
-  ins_cost(4*100+3*400);
-// Basic idea: lo(result) = lo(x_lo * y_lo)
-//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
-  format %{ "MOV    $tmp,$src.lo\n\t"
-            "IMUL   $tmp,EDX\n\t"
-            "MOV    EDX,$src.hi\n\t"
-            "IMUL   EDX,EAX\n\t"
-            "ADD    $tmp,EDX\n\t"
-            "MUL    EDX:EAX,$src.lo\n\t"
-            "ADD    EDX,$tmp" %}
-  ins_encode( long_multiply( dst, src, tmp ) );
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply Register Long where the left operand's high 32 bits are zero
-instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
-  predicate(is_operand_hi32_zero(n->in(1)));
-  match(Set dst (MulL dst src));
-  effect(KILL cr, TEMP tmp);
-  ins_cost(2*100+2*400);
-// Basic idea: lo(result) = lo(x_lo * y_lo)
-//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
-  format %{ "MOV    $tmp,$src.hi\n\t"
-            "IMUL   $tmp,EAX\n\t"
-            "MUL    EDX:EAX,$src.lo\n\t"
-            "ADD    EDX,$tmp" %}
-  ins_encode %{
-    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
-    __ imull($tmp$$Register, rax);
-    __ mull($src$$Register);
-    __ addl(rdx, $tmp$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply Register Long where the right operand's high 32 bits are zero
-instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
-  predicate(is_operand_hi32_zero(n->in(2)));
-  match(Set dst (MulL dst src));
-  effect(KILL cr, TEMP tmp);
-  ins_cost(2*100+2*400);
-// Basic idea: lo(result) = lo(x_lo * y_lo)
-//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
-  format %{ "MOV    $tmp,$src.lo\n\t"
-            "IMUL   $tmp,EDX\n\t"
-            "MUL    EDX:EAX,$src.lo\n\t"
-            "ADD    EDX,$tmp" %}
-  ins_encode %{
-    __ movl($tmp$$Register, $src$$Register);
-    __ imull($tmp$$Register, rdx);
-    __ mull($src$$Register);
-    __ addl(rdx, $tmp$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply Register Long where the left and the right operands' high 32 bits are zero
-instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
-  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
-  match(Set dst (MulL dst src));
-  effect(KILL cr);
-  ins_cost(1*400);
-// Basic idea: lo(result) = lo(x_lo * y_lo)
-//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
-  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
-  ins_encode %{
-    __ mull($src$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Multiply Register Long by small constant
-instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
-  match(Set dst (MulL dst src));
-  effect(KILL cr, TEMP tmp);
-  ins_cost(2*100+2*400);
-  size(12);
-// Basic idea: lo(result) = lo(src * EAX)
-//             hi(result) = hi(src * EAX) + lo(src * EDX)
-  format %{ "IMUL   $tmp,EDX,$src\n\t"
-            "MOV    EDX,$src\n\t"
-            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
-            "ADD    EDX,$tmp" %}
-  ins_encode( long_multiply_con( dst, src, tmp ) );
-  ins_pipe( pipe_slow );
-%}
-
-// Integer DIV with Register
-instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
-  match(Set rax (DivI rax div));
-  effect(KILL rdx, KILL cr);
-  size(26);
-  ins_cost(30*100+10*100);
-  format %{ "CMP    EAX,0x80000000\n\t"
-            "JNE,s  normal\n\t"
-            "XOR    EDX,EDX\n\t"
-            "CMP    ECX,-1\n\t"
-            "JE,s   done\n"
-    "normal: CDQ\n\t"
-            "IDIV   $div\n\t"
-    "done:"        %}
-  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
-  ins_encode( cdq_enc, OpcP, RegOpc(div) );
-  ins_pipe( ialu_reg_reg_alu0 );
-%}
-
-// Divide Register Long
-instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
-  match(Set dst (DivL src1 src2));
-  effect(CALL);
-  ins_cost(10000);
-  format %{ "PUSH   $src1.hi\n\t"
-            "PUSH   $src1.lo\n\t"
-            "PUSH   $src2.hi\n\t"
-            "PUSH   $src2.lo\n\t"
-            "CALL   SharedRuntime::ldiv\n\t"
-            "ADD    ESP,16" %}
-  ins_encode( long_div(src1,src2) );
-  ins_pipe( pipe_slow );
-%}
-
-// Integer DIVMOD with Register, both quotient and mod results
-instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
-  match(DivModI rax div);
-  effect(KILL cr);
-  size(26);
-  ins_cost(30*100+10*100);
-  format %{ "CMP    EAX,0x80000000\n\t"
-            "JNE,s  normal\n\t"
-            "XOR    EDX,EDX\n\t"
-            "CMP    ECX,-1\n\t"
-            "JE,s   done\n"
-    "normal: CDQ\n\t"
-            "IDIV   $div\n\t"
-    "done:"        %}
-  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
-  ins_encode( cdq_enc, OpcP, RegOpc(div) );
-  ins_pipe( pipe_slow );
-%}
-
-// Integer MOD with Register
-instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
-  match(Set rdx (ModI rax div));
-  effect(KILL rax, KILL cr);
-
-  size(26);
-  ins_cost(300);
-  format %{ "CDQ\n\t"
-            "IDIV   $div" %}
-  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
-  ins_encode( cdq_enc, OpcP, RegOpc(div) );
-  ins_pipe( ialu_reg_reg_alu0 );
-%}
-
-// Remainder Register Long
-instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
-  match(Set dst (ModL src1 src2));
-  effect(CALL);
-  ins_cost(10000);
-  format %{ "PUSH   $src1.hi\n\t"
-            "PUSH   $src1.lo\n\t"
-            "PUSH   $src2.hi\n\t"
-            "PUSH   $src2.lo\n\t"
-            "CALL   SharedRuntime::lrem\n\t"
-            "ADD    ESP,16" %}
-  ins_encode( long_mod(src1,src2) );
-  ins_pipe( pipe_slow );
-%}
-
-// Divide Register Long (no special case since divisor != -1)
-instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
-  match(Set dst (DivL dst imm));
-  effect( TEMP tmp, TEMP tmp2, KILL cr );
-  ins_cost(1000);
-  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
-            "XOR    $tmp2,$tmp2\n\t"
-            "CMP    $tmp,EDX\n\t"
-            "JA,s   fast\n\t"
-            "MOV    $tmp2,EAX\n\t"
-            "MOV    EAX,EDX\n\t"
-            "MOV    EDX,0\n\t"
-            "JLE,s  pos\n\t"
-            "LNEG   EAX : $tmp2\n\t"
-            "DIV    $tmp # unsigned division\n\t"
-            "XCHG   EAX,$tmp2\n\t"
-            "DIV    $tmp\n\t"
-            "LNEG   $tmp2 : EAX\n\t"
-            "JMP,s  done\n"
-    "pos:\n\t"
-            "DIV    $tmp\n\t"
-            "XCHG   EAX,$tmp2\n"
-    "fast:\n\t"
-            "DIV    $tmp\n"
-    "done:\n\t"
-            "MOV    EDX,$tmp2\n\t"
-            "NEG    EDX:EAX # if $imm < 0" %}
-  ins_encode %{
-    int con = (int)$imm$$constant;
-    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
-    int pcon = (con > 0) ? con : -con;
-    Label Lfast, Lpos, Ldone;
-
-    __ movl($tmp$$Register, pcon);
-    __ xorl($tmp2$$Register,$tmp2$$Register);
-    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
-    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
-
-    __ movl($tmp2$$Register, $dst$$Register); // save
-    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
-    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
-    __ jccb(Assembler::lessEqual, Lpos); // result is positive
-
-    // Negative dividend.
-    // convert value to positive to use unsigned division
-    __ lneg($dst$$Register, $tmp2$$Register);
-    __ divl($tmp$$Register);
-    __ xchgl($dst$$Register, $tmp2$$Register);
-    __ divl($tmp$$Register);
-    // revert result back to negative
-    __ lneg($tmp2$$Register, $dst$$Register);
-    __ jmpb(Ldone);
-
-    __ bind(Lpos);
-    __ divl($tmp$$Register); // Use unsigned division
-    __ xchgl($dst$$Register, $tmp2$$Register);
-    // Fallthrow for final divide, tmp2 has 32 bit hi result
-
-    __ bind(Lfast);
-    // fast path: src is positive
-    __ divl($tmp$$Register); // Use unsigned division
-
-    __ bind(Ldone);
-    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
-    if (con < 0) {
-      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
-    }
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Remainder Register Long (remainder fit into 32 bits)
-instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
-  match(Set dst (ModL dst imm));
-  effect( TEMP tmp, TEMP tmp2, KILL cr );
-  ins_cost(1000);
-  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
-            "CMP    $tmp,EDX\n\t"
-            "JA,s   fast\n\t"
-            "MOV    $tmp2,EAX\n\t"
-            "MOV    EAX,EDX\n\t"
-            "MOV    EDX,0\n\t"
-            "JLE,s  pos\n\t"
-            "LNEG   EAX : $tmp2\n\t"
-            "DIV    $tmp # unsigned division\n\t"
-            "MOV    EAX,$tmp2\n\t"
-            "DIV    $tmp\n\t"
-            "NEG    EDX\n\t"
-            "JMP,s  done\n"
-    "pos:\n\t"
-            "DIV    $tmp\n\t"
-            "MOV    EAX,$tmp2\n"
-    "fast:\n\t"
-            "DIV    $tmp\n"
-    "done:\n\t"
-            "MOV    EAX,EDX\n\t"
-            "SAR    EDX,31\n\t" %}
-  ins_encode %{
-    int con = (int)$imm$$constant;
-    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
-    int pcon = (con > 0) ? con : -con;
-    Label  Lfast, Lpos, Ldone;
-
-    __ movl($tmp$$Register, pcon);
-    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
-    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
-
-    __ movl($tmp2$$Register, $dst$$Register); // save
-    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
-    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
-    __ jccb(Assembler::lessEqual, Lpos); // result is positive
-
-    // Negative dividend.
-    // convert value to positive to use unsigned division
-    __ lneg($dst$$Register, $tmp2$$Register);
-    __ divl($tmp$$Register);
-    __ movl($dst$$Register, $tmp2$$Register);
-    __ divl($tmp$$Register);
-    // revert remainder back to negative
-    __ negl(HIGH_FROM_LOW($dst$$Register));
-    __ jmpb(Ldone);
-
-    __ bind(Lpos);
-    __ divl($tmp$$Register);
-    __ movl($dst$$Register, $tmp2$$Register);
-
-    __ bind(Lfast);
-    // fast path: src is positive
-    __ divl($tmp$$Register);
-
-    __ bind(Ldone);
-    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
-    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
-
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Integer Shift Instructions
-// Shift Left by one
-instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
-  match(Set dst (LShiftI dst shift));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SHL    $dst,$shift" %}
-  opcode(0xD1, 0x4);  /* D1 /4 */
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-// Shift Left by 8-bit immediate
-instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
-  match(Set dst (LShiftI dst shift));
-  effect(KILL cr);
-
-  size(3);
-  format %{ "SHL    $dst,$shift" %}
-  opcode(0xC1, 0x4);  /* C1 /4 ib */
-  ins_encode( RegOpcImm( dst, shift) );
-  ins_pipe( ialu_reg );
-%}
-
-// Shift Left by variable
-instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
-  match(Set dst (LShiftI dst shift));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SHL    $dst,$shift" %}
-  opcode(0xD3, 0x4);  /* D3 /4 */
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-// Arithmetic shift right by one
-instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
-  match(Set dst (RShiftI dst shift));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SAR    $dst,$shift" %}
-  opcode(0xD1, 0x7);  /* D1 /7 */
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-// Arithmetic shift right by one
-instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
-  effect(KILL cr);
-  format %{ "SAR    $dst,$shift" %}
-  opcode(0xD1, 0x7);  /* D1 /7 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Arithmetic Shift Right by 8-bit immediate
-instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
-  match(Set dst (RShiftI dst shift));
-  effect(KILL cr);
-
-  size(3);
-  format %{ "SAR    $dst,$shift" %}
-  opcode(0xC1, 0x7);  /* C1 /7 ib */
-  ins_encode( RegOpcImm( dst, shift ) );
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Arithmetic Shift Right by 8-bit immediate
-instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
-  effect(KILL cr);
-
-  format %{ "SAR    $dst,$shift" %}
-  opcode(0xC1, 0x7);  /* C1 /7 ib */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
-  ins_pipe( ialu_mem_imm );
-%}
-
-// Arithmetic Shift Right by variable
-instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
-  match(Set dst (RShiftI dst shift));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SAR    $dst,$shift" %}
-  opcode(0xD3, 0x7);  /* D3 /7 */
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-// Logical shift right by one
-instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
-  match(Set dst (URShiftI dst shift));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SHR    $dst,$shift" %}
-  opcode(0xD1, 0x5);  /* D1 /5 */
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-// Logical Shift Right by 8-bit immediate
-instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
-  match(Set dst (URShiftI dst shift));
-  effect(KILL cr);
-
-  size(3);
-  format %{ "SHR    $dst,$shift" %}
-  opcode(0xC1, 0x5);  /* C1 /5 ib */
-  ins_encode( RegOpcImm( dst, shift) );
-  ins_pipe( ialu_reg );
-%}
-
-
-// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
-// This idiom is used by the compiler for the i2b bytecode.
-instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
-  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
-
-  size(3);
-  format %{ "MOVSX  $dst,$src :8" %}
-  ins_encode %{
-    __ movsbl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
-// This idiom is used by the compiler the i2s bytecode.
-instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
-  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
-
-  size(3);
-  format %{ "MOVSX  $dst,$src :16" %}
-  ins_encode %{
-    __ movswl($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-
-// Logical Shift Right by variable
-instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
-  match(Set dst (URShiftI dst shift));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "SHR    $dst,$shift" %}
-  opcode(0xD3, 0x5);  /* D3 /5 */
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-
-//----------Logical Instructions-----------------------------------------------
-//----------Integer Logical Instructions---------------------------------------
-// And Instructions
-// And Register with Register
-instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (AndI dst src));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "AND    $dst,$src" %}
-  opcode(0x23);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-// And Register with Immediate
-instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
-  match(Set dst (AndI dst src));
-  effect(KILL cr);
-
-  format %{ "AND    $dst,$src" %}
-  opcode(0x81,0x04);  /* Opcode 81 /4 */
-  // ins_encode( RegImm( dst, src) );
-  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
-  ins_pipe( ialu_reg );
-%}
-
-// And Register with Memory
-instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
-  match(Set dst (AndI dst (LoadI src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "AND    $dst,$src" %}
-  opcode(0x23);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_mem );
-%}
-
-// And Memory with Register
-instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "AND    $dst,$src" %}
-  opcode(0x21);  /* Opcode 21 /r */
-  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// And Memory with Immediate
-instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "AND    $dst,$src" %}
-  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
-  // ins_encode( MemImm( dst, src) );
-  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
-  ins_pipe( ialu_mem_imm );
-%}
-
-// BMI1 instructions
-instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
-  match(Set dst (AndI (XorI src1 minus_1) src2));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  format %{ "ANDNL  $dst, $src1, $src2" %}
-
-  ins_encode %{
-    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
-  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "ANDNL  $dst, $src1, $src2" %}
-
-  ins_encode %{
-    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
-  match(Set dst (AndI (SubI imm_zero src) src));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  format %{ "BLSIL  $dst, $src" %}
-
-  ins_encode %{
-    __ blsil($dst$$Register, $src$$Register);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
-  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "BLSIL  $dst, $src" %}
-
-  ins_encode %{
-    __ blsil($dst$$Register, $src$$Address);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (XorI (AddI src minus_1) src));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  format %{ "BLSMSKL $dst, $src" %}
-
-  ins_encode %{
-    __ blsmskl($dst$$Register, $src$$Register);
-  %}
-
-  ins_pipe(ialu_reg);
-%}
-
-instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "BLSMSKL $dst, $src" %}
-
-  ins_encode %{
-    __ blsmskl($dst$$Register, $src$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (AndI (AddI src minus_1) src) );
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  format %{ "BLSRL  $dst, $src" %}
-
-  ins_encode %{
-    __ blsrl($dst$$Register, $src$$Register);
-  %}
-
-  ins_pipe(ialu_reg);
-%}
-
-instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "BLSRL  $dst, $src" %}
-
-  ins_encode %{
-    __ blsrl($dst$$Register, $src$$Address);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Or Instructions
-// Or Register with Register
-instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (OrI dst src));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "OR     $dst,$src" %}
-  opcode(0x0B);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
-  match(Set dst (OrI dst (CastP2X src)));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "OR     $dst,$src" %}
-  opcode(0x0B);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-
-// Or Register with Immediate
-instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
-  match(Set dst (OrI dst src));
-  effect(KILL cr);
-
-  format %{ "OR     $dst,$src" %}
-  opcode(0x81,0x01);  /* Opcode 81 /1 id */
-  // ins_encode( RegImm( dst, src) );
-  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
-  ins_pipe( ialu_reg );
-%}
-
-// Or Register with Memory
-instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
-  match(Set dst (OrI dst (LoadI src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "OR     $dst,$src" %}
-  opcode(0x0B);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_mem );
-%}
-
-// Or Memory with Register
-instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "OR     $dst,$src" %}
-  opcode(0x09);  /* Opcode 09 /r */
-  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Or Memory with Immediate
-instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "OR     $dst,$src" %}
-  opcode(0x81,0x1);  /* Opcode 81 /1 id */
-  // ins_encode( MemImm( dst, src) );
-  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
-  ins_pipe( ialu_mem_imm );
-%}
-
-// ROL/ROR
-// ROL expand
-instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
-  effect(USE_DEF dst, USE shift, KILL cr);
-
-  format %{ "ROL    $dst, $shift" %}
-  opcode(0xD1, 0x0); /* Opcode D1 /0 */
-  ins_encode( OpcP, RegOpc( dst ));
-  ins_pipe( ialu_reg );
-%}
-
-instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
-  effect(USE_DEF dst, USE shift, KILL cr);
-
-  format %{ "ROL    $dst, $shift" %}
-  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
-  ins_encode( RegOpcImm(dst, shift) );
-  ins_pipe(ialu_reg);
-%}
-
-instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
-  effect(USE_DEF dst, USE shift, KILL cr);
-
-  format %{ "ROL    $dst, $shift" %}
-  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
-  ins_encode(OpcP, RegOpc(dst));
-  ins_pipe( ialu_reg_reg );
-%}
-// end of ROL expand
-
-// ROL 32bit by one once
-instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
-  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
-
-  expand %{
-    rolI_eReg_imm1(dst, lshift, cr);
-  %}
-%}
-
-// ROL 32bit var by imm8 once
-instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
-  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
-  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
-
-  expand %{
-    rolI_eReg_imm8(dst, lshift, cr);
-  %}
-%}
-
-// ROL 32bit var by var once
-instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
-  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
-
-  expand %{
-    rolI_eReg_CL(dst, shift, cr);
-  %}
-%}
-
-// ROL 32bit var by var once
-instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
-  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
-
-  expand %{
-    rolI_eReg_CL(dst, shift, cr);
-  %}
-%}
-
-// ROR expand
-instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
-  effect(USE_DEF dst, USE shift, KILL cr);
-
-  format %{ "ROR    $dst, $shift" %}
-  opcode(0xD1,0x1);  /* Opcode D1 /1 */
-  ins_encode( OpcP, RegOpc( dst ) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
-  effect (USE_DEF dst, USE shift, KILL cr);
-
-  format %{ "ROR    $dst, $shift" %}
-  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
-  ins_encode( RegOpcImm(dst, shift) );
-  ins_pipe( ialu_reg );
-%}
-
-instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
-  effect(USE_DEF dst, USE shift, KILL cr);
-
-  format %{ "ROR    $dst, $shift" %}
-  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
-  ins_encode(OpcP, RegOpc(dst));
-  ins_pipe( ialu_reg_reg );
-%}
-// end of ROR expand
-
-// ROR right once
-instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
-  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
-
-  expand %{
-    rorI_eReg_imm1(dst, rshift, cr);
-  %}
-%}
-
-// ROR 32bit by immI8 once
-instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
-  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
-  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
-
-  expand %{
-    rorI_eReg_imm8(dst, rshift, cr);
-  %}
-%}
-
-// ROR 32bit var by var once
-instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
-  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
-
-  expand %{
-    rorI_eReg_CL(dst, shift, cr);
-  %}
-%}
-
-// ROR 32bit var by var once
-instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
-  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
-
-  expand %{
-    rorI_eReg_CL(dst, shift, cr);
-  %}
-%}
-
-// Xor Instructions
-// Xor Register with Register
-instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (XorI dst src));
-  effect(KILL cr);
-
-  size(2);
-  format %{ "XOR    $dst,$src" %}
-  opcode(0x33);
-  ins_encode( OpcP, RegReg( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-// Xor Register with Immediate -1
-instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
-  match(Set dst (XorI dst imm));
-
-  size(2);
-  format %{ "NOT    $dst" %}
-  ins_encode %{
-     __ notl($dst$$Register);
-  %}
-  ins_pipe( ialu_reg );
-%}
-
-// Xor Register with Immediate
-instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
-  match(Set dst (XorI dst src));
-  effect(KILL cr);
-
-  format %{ "XOR    $dst,$src" %}
-  opcode(0x81,0x06);  /* Opcode 81 /6 id */
-  // ins_encode( RegImm( dst, src) );
-  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
-  ins_pipe( ialu_reg );
-%}
-
-// Xor Register with Memory
-instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
-  match(Set dst (XorI dst (LoadI src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "XOR    $dst,$src" %}
-  opcode(0x33);
-  ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
-  ins_pipe( ialu_reg_mem );
-%}
-
-// Xor Memory with Register
-instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(150);
-  format %{ "XOR    $dst,$src" %}
-  opcode(0x31);  /* Opcode 31 /r */
-  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
-  ins_pipe( ialu_mem_reg );
-%}
-
-// Xor Memory with Immediate
-instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
-  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
-  effect(KILL cr);
-
-  ins_cost(125);
-  format %{ "XOR    $dst,$src" %}
-  opcode(0x81,0x6);  /* Opcode 81 /6 id */
-  ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
-  ins_pipe( ialu_mem_imm );
-%}
-
-//----------Convert Int to Boolean---------------------------------------------
-
-instruct movI_nocopy(rRegI dst, rRegI src) %{
-  effect( DEF dst, USE src );
-  format %{ "MOV    $dst,$src" %}
-  ins_encode( enc_Copy( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
-  effect( USE_DEF dst, USE src, KILL cr );
-
-  size(4);
-  format %{ "NEG    $dst\n\t"
-            "ADC    $dst,$src" %}
-  ins_encode( neg_reg(dst),
-              OpcRegReg(0x13,dst,src) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
-  match(Set dst (Conv2B src));
-
-  expand %{
-    movI_nocopy(dst,src);
-    ci2b(dst,src,cr);
-  %}
-%}
-
-instruct movP_nocopy(rRegI dst, eRegP src) %{
-  effect( DEF dst, USE src );
-  format %{ "MOV    $dst,$src" %}
-  ins_encode( enc_Copy( dst, src) );
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
-  effect( USE_DEF dst, USE src, KILL cr );
-  format %{ "NEG    $dst\n\t"
-            "ADC    $dst,$src" %}
-  ins_encode( neg_reg(dst),
-              OpcRegReg(0x13,dst,src) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
-  match(Set dst (Conv2B src));
-
-  expand %{
-    movP_nocopy(dst,src);
-    cp2b(dst,src,cr);
-  %}
-%}
-
-instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
-  match(Set dst (CmpLTMask p q));
-  effect(KILL cr);
-  ins_cost(400);
-
-  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
-  format %{ "XOR    $dst,$dst\n\t"
-            "CMP    $p,$q\n\t"
-            "SETlt  $dst\n\t"
-            "NEG    $dst" %}
-  ins_encode %{
-    Register Rp = $p$$Register;
-    Register Rq = $q$$Register;
-    Register Rd = $dst$$Register;
-    Label done;
-    __ xorl(Rd, Rd);
-    __ cmpl(Rp, Rq);
-    __ setb(Assembler::less, Rd);
-    __ negl(Rd);
-  %}
-
-  ins_pipe(pipe_slow);
-%}
-
-instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
-  match(Set dst (CmpLTMask dst zero));
-  effect(DEF dst, KILL cr);
-  ins_cost(100);
-
-  format %{ "SAR    $dst,31\t# cmpLTMask0" %}
-  ins_encode %{
-  __ sarl($dst$$Register, 31);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-/* better to save a register than avoid a branch */
-instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
-  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
-  effect(KILL cr);
-  ins_cost(400);
-  format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
-            "JGE    done\n\t"
-            "ADD    $p,$y\n"
-            "done:  " %}
-  ins_encode %{
-    Register Rp = $p$$Register;
-    Register Rq = $q$$Register;
-    Register Ry = $y$$Register;
-    Label done;
-    __ subl(Rp, Rq);
-    __ jccb(Assembler::greaterEqual, done);
-    __ addl(Rp, Ry);
-    __ bind(done);
-  %}
-
-  ins_pipe(pipe_cmplt);
-%}
-
-/* better to save a register than avoid a branch */
-instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
-  match(Set y (AndI (CmpLTMask p q) y));
-  effect(KILL cr);
-
-  ins_cost(300);
-
-  format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
-            "JLT      done\n\t"
-            "XORL     $y, $y\n"
-            "done:  " %}
-  ins_encode %{
-    Register Rp = $p$$Register;
-    Register Rq = $q$$Register;
-    Register Ry = $y$$Register;
-    Label done;
-    __ cmpl(Rp, Rq);
-    __ jccb(Assembler::less, done);
-    __ xorl(Ry, Ry);
-    __ bind(done);
-  %}
-
-  ins_pipe(pipe_cmplt);
-%}
-
-/* If I enable this, I encourage spilling in the inner loop of compress.
-instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
-  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
-*/
-//----------Overflow Math Instructions-----------------------------------------
-
-instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
-%{
-  match(Set cr (OverflowAddI op1 op2));
-  effect(DEF cr, USE_KILL op1, USE op2);
-
-  format %{ "ADD    $op1, $op2\t# overflow check int" %}
-
-  ins_encode %{
-    __ addl($op1$$Register, $op2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
-%{
-  match(Set cr (OverflowAddI op1 op2));
-  effect(DEF cr, USE_KILL op1, USE op2);
-
-  format %{ "ADD    $op1, $op2\t# overflow check int" %}
-
-  ins_encode %{
-    __ addl($op1$$Register, $op2$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
-%{
-  match(Set cr (OverflowSubI op1 op2));
-
-  format %{ "CMP    $op1, $op2\t# overflow check int" %}
-  ins_encode %{
-    __ cmpl($op1$$Register, $op2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
-%{
-  match(Set cr (OverflowSubI op1 op2));
-
-  format %{ "CMP    $op1, $op2\t# overflow check int" %}
-  ins_encode %{
-    __ cmpl($op1$$Register, $op2$$constant);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
-%{
-  match(Set cr (OverflowSubI zero op2));
-  effect(DEF cr, USE_KILL op2);
-
-  format %{ "NEG    $op2\t# overflow check int" %}
-  ins_encode %{
-    __ negl($op2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg);
-%}
-
-instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
-%{
-  match(Set cr (OverflowMulI op1 op2));
-  effect(DEF cr, USE_KILL op1, USE op2);
-
-  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
-  ins_encode %{
-    __ imull($op1$$Register, $op2$$Register);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
-%{
-  match(Set cr (OverflowMulI op1 op2));
-  effect(DEF cr, TEMP tmp, USE op1, USE op2);
-
-  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
-  ins_encode %{
-    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
-  %}
-  ins_pipe(ialu_reg_reg_alu0);
-%}
-
-// Integer Absolute Instructions
-instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
-%{
-  match(Set dst (AbsI src));
-  effect(TEMP dst, TEMP tmp, KILL cr);
-  format %{ "movl $tmp, $src\n\t"
-            "sarl $tmp, 31\n\t"
-            "movl $dst, $src\n\t"
-            "xorl $dst, $tmp\n\t"
-            "subl $dst, $tmp\n"
-          %}
-  ins_encode %{
-    __ movl($tmp$$Register, $src$$Register);
-    __ sarl($tmp$$Register, 31);
-    __ movl($dst$$Register, $src$$Register);
-    __ xorl($dst$$Register, $tmp$$Register);
-    __ subl($dst$$Register, $tmp$$Register);
-  %}
-
-  ins_pipe(ialu_reg_reg);
-%}
-
-//----------Long Instructions------------------------------------------------
-// Add Long Register with Register
-instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
-  match(Set dst (AddL dst src));
-  effect(KILL cr);
-  ins_cost(200);
-  format %{ "ADD    $dst.lo,$src.lo\n\t"
-            "ADC    $dst.hi,$src.hi" %}
-  opcode(0x03, 0x13);
-  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Add Long Register with Immediate
-instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
-  match(Set dst (AddL dst src));
-  effect(KILL cr);
-  format %{ "ADD    $dst.lo,$src.lo\n\t"
-            "ADC    $dst.hi,$src.hi" %}
-  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
-  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Add Long Register with Memory
-instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
-  match(Set dst (AddL dst (LoadL mem)));
-  effect(KILL cr);
-  ins_cost(125);
-  format %{ "ADD    $dst.lo,$mem\n\t"
-            "ADC    $dst.hi,$mem+4" %}
-  opcode(0x03, 0x13);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
-  ins_pipe( ialu_reg_long_mem );
-%}
-
-// Subtract Long Register with Register.
-instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
-  match(Set dst (SubL dst src));
-  effect(KILL cr);
-  ins_cost(200);
-  format %{ "SUB    $dst.lo,$src.lo\n\t"
-            "SBB    $dst.hi,$src.hi" %}
-  opcode(0x2B, 0x1B);
-  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Subtract Long Register with Immediate
-instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
-  match(Set dst (SubL dst src));
-  effect(KILL cr);
-  format %{ "SUB    $dst.lo,$src.lo\n\t"
-            "SBB    $dst.hi,$src.hi" %}
-  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
-  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Subtract Long Register with Memory
-instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
-  match(Set dst (SubL dst (LoadL mem)));
-  effect(KILL cr);
-  ins_cost(125);
-  format %{ "SUB    $dst.lo,$mem\n\t"
-            "SBB    $dst.hi,$mem+4" %}
-  opcode(0x2B, 0x1B);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
-  ins_pipe( ialu_reg_long_mem );
-%}
-
-instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
-  match(Set dst (SubL zero dst));
-  effect(KILL cr);
-  ins_cost(300);
-  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
-  ins_encode( neg_long(dst) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// And Long Register with Register
-instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
-  match(Set dst (AndL dst src));
-  effect(KILL cr);
-  format %{ "AND    $dst.lo,$src.lo\n\t"
-            "AND    $dst.hi,$src.hi" %}
-  opcode(0x23,0x23);
-  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// And Long Register with Immediate
-instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
-  match(Set dst (AndL dst src));
-  effect(KILL cr);
-  format %{ "AND    $dst.lo,$src.lo\n\t"
-            "AND    $dst.hi,$src.hi" %}
-  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
-  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// And Long Register with Memory
-instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
-  match(Set dst (AndL dst (LoadL mem)));
-  effect(KILL cr);
-  ins_cost(125);
-  format %{ "AND    $dst.lo,$mem\n\t"
-            "AND    $dst.hi,$mem+4" %}
-  opcode(0x23, 0x23);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
-  ins_pipe( ialu_reg_long_mem );
-%}
-
-// BMI1 instructions
-instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
-  match(Set dst (AndL (XorL src1 minus_1) src2));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
-            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
-         %}
-
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    Register Rsrc1 = $src1$$Register;
-    Register Rsrc2 = $src2$$Register;
-    __ andnl(Rdst, Rsrc1, Rsrc2);
-    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
-  %}
-  ins_pipe(ialu_reg_reg_long);
-%}
-
-instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
-  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  ins_cost(125);
-  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
-            "ANDNL  $dst.hi, $src1.hi, $src2+4"
-         %}
-
-  ins_encode %{
-    Register Rdst = $dst$$Register;
-    Register Rsrc1 = $src1$$Register;
-    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
-
-    __ andnl(Rdst, Rsrc1, $src2$$Address);
-    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
-  match(Set dst (AndL (SubL imm_zero src) src));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  format %{ "MOVL   $dst.hi, 0\n\t"
-            "BLSIL  $dst.lo, $src.lo\n\t"
-            "JNZ    done\n\t"
-            "BLSIL  $dst.hi, $src.hi\n"
-            "done:"
-         %}
-
-  ins_encode %{
-    Label done;
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    __ movl(HIGH_FROM_LOW(Rdst), 0);
-    __ blsil(Rdst, Rsrc);
-    __ jccb(Assembler::notZero, done);
-    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
-    __ bind(done);
-  %}
-  ins_pipe(ialu_reg);
-%}
-
-instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
-  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  ins_cost(125);
-  format %{ "MOVL   $dst.hi, 0\n\t"
-            "BLSIL  $dst.lo, $src\n\t"
-            "JNZ    done\n\t"
-            "BLSIL  $dst.hi, $src+4\n"
-            "done:"
-         %}
-
-  ins_encode %{
-    Label done;
-    Register Rdst = $dst$$Register;
-    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
-
-    __ movl(HIGH_FROM_LOW(Rdst), 0);
-    __ blsil(Rdst, $src$$Address);
-    __ jccb(Assembler::notZero, done);
-    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
-    __ bind(done);
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (XorL (AddL src minus_1) src));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  format %{ "MOVL    $dst.hi, 0\n\t"
-            "BLSMSKL $dst.lo, $src.lo\n\t"
-            "JNC     done\n\t"
-            "BLSMSKL $dst.hi, $src.hi\n"
-            "done:"
-         %}
-
-  ins_encode %{
-    Label done;
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    __ movl(HIGH_FROM_LOW(Rdst), 0);
-    __ blsmskl(Rdst, Rsrc);
-    __ jccb(Assembler::carryClear, done);
-    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
-    __ bind(done);
-  %}
-
-  ins_pipe(ialu_reg);
-%}
-
-instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  ins_cost(125);
-  format %{ "MOVL    $dst.hi, 0\n\t"
-            "BLSMSKL $dst.lo, $src\n\t"
-            "JNC     done\n\t"
-            "BLSMSKL $dst.hi, $src+4\n"
-            "done:"
-         %}
-
-  ins_encode %{
-    Label done;
-    Register Rdst = $dst$$Register;
-    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
-
-    __ movl(HIGH_FROM_LOW(Rdst), 0);
-    __ blsmskl(Rdst, $src$$Address);
-    __ jccb(Assembler::carryClear, done);
-    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
-    __ bind(done);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (AndL (AddL src minus_1) src) );
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  format %{ "MOVL   $dst.hi, $src.hi\n\t"
-            "BLSRL  $dst.lo, $src.lo\n\t"
-            "JNC    done\n\t"
-            "BLSRL  $dst.hi, $src.hi\n"
-            "done:"
-  %}
-
-  ins_encode %{
-    Label done;
-    Register Rdst = $dst$$Register;
-    Register Rsrc = $src$$Register;
-    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
-    __ blsrl(Rdst, Rsrc);
-    __ jccb(Assembler::carryClear, done);
-    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
-    __ bind(done);
-  %}
-
-  ins_pipe(ialu_reg);
-%}
-
-instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
-%{
-  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
-  predicate(UseBMI1Instructions);
-  effect(KILL cr, TEMP dst);
-
-  ins_cost(125);
-  format %{ "MOVL   $dst.hi, $src+4\n\t"
-            "BLSRL  $dst.lo, $src\n\t"
-            "JNC    done\n\t"
-            "BLSRL  $dst.hi, $src+4\n"
-            "done:"
-  %}
-
-  ins_encode %{
-    Label done;
-    Register Rdst = $dst$$Register;
-    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
-    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
-    __ blsrl(Rdst, $src$$Address);
-    __ jccb(Assembler::carryClear, done);
-    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
-    __ bind(done);
-  %}
-
-  ins_pipe(ialu_reg_mem);
-%}
-
-// Or Long Register with Register
-instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
-  match(Set dst (OrL dst src));
-  effect(KILL cr);
-  format %{ "OR     $dst.lo,$src.lo\n\t"
-            "OR     $dst.hi,$src.hi" %}
-  opcode(0x0B,0x0B);
-  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Or Long Register with Immediate
-instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
-  match(Set dst (OrL dst src));
-  effect(KILL cr);
-  format %{ "OR     $dst.lo,$src.lo\n\t"
-            "OR     $dst.hi,$src.hi" %}
-  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
-  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Or Long Register with Memory
-instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
-  match(Set dst (OrL dst (LoadL mem)));
-  effect(KILL cr);
-  ins_cost(125);
-  format %{ "OR     $dst.lo,$mem\n\t"
-            "OR     $dst.hi,$mem+4" %}
-  opcode(0x0B,0x0B);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
-  ins_pipe( ialu_reg_long_mem );
-%}
-
-// Xor Long Register with Register
-instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
-  match(Set dst (XorL dst src));
-  effect(KILL cr);
-  format %{ "XOR    $dst.lo,$src.lo\n\t"
-            "XOR    $dst.hi,$src.hi" %}
-  opcode(0x33,0x33);
-  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Xor Long Register with Immediate -1
-instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
-  match(Set dst (XorL dst imm));
-  format %{ "NOT    $dst.lo\n\t"
-            "NOT    $dst.hi" %}
-  ins_encode %{
-     __ notl($dst$$Register);
-     __ notl(HIGH_FROM_LOW($dst$$Register));
-  %}
-  ins_pipe( ialu_reg_long );
-%}
-
-// Xor Long Register with Immediate
-instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
-  match(Set dst (XorL dst src));
-  effect(KILL cr);
-  format %{ "XOR    $dst.lo,$src.lo\n\t"
-            "XOR    $dst.hi,$src.hi" %}
-  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
-  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Xor Long Register with Memory
-instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
-  match(Set dst (XorL dst (LoadL mem)));
-  effect(KILL cr);
-  ins_cost(125);
-  format %{ "XOR    $dst.lo,$mem\n\t"
-            "XOR    $dst.hi,$mem+4" %}
-  opcode(0x33,0x33);
-  ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
-  ins_pipe( ialu_reg_long_mem );
-%}
-
-// Shift Left Long by 1
-instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
-  predicate(UseNewLongLShift);
-  match(Set dst (LShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(100);
-  format %{ "ADD    $dst.lo,$dst.lo\n\t"
-            "ADC    $dst.hi,$dst.hi" %}
-  ins_encode %{
-    __ addl($dst$$Register,$dst$$Register);
-    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
-  %}
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Left Long by 2
-instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
-  predicate(UseNewLongLShift);
-  match(Set dst (LShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(100);
-  format %{ "ADD    $dst.lo,$dst.lo\n\t"
-            "ADC    $dst.hi,$dst.hi\n\t"
-            "ADD    $dst.lo,$dst.lo\n\t"
-            "ADC    $dst.hi,$dst.hi" %}
-  ins_encode %{
-    __ addl($dst$$Register,$dst$$Register);
-    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
-    __ addl($dst$$Register,$dst$$Register);
-    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
-  %}
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Left Long by 3
-instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
-  predicate(UseNewLongLShift);
-  match(Set dst (LShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(100);
-  format %{ "ADD    $dst.lo,$dst.lo\n\t"
-            "ADC    $dst.hi,$dst.hi\n\t"
-            "ADD    $dst.lo,$dst.lo\n\t"
-            "ADC    $dst.hi,$dst.hi\n\t"
-            "ADD    $dst.lo,$dst.lo\n\t"
-            "ADC    $dst.hi,$dst.hi" %}
-  ins_encode %{
-    __ addl($dst$$Register,$dst$$Register);
-    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
-    __ addl($dst$$Register,$dst$$Register);
-    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
-    __ addl($dst$$Register,$dst$$Register);
-    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
-  %}
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Left Long by 1-31
-instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
-  match(Set dst (LShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(200);
-  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
-            "SHL    $dst.lo,$cnt" %}
-  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
-  ins_encode( move_long_small_shift(dst,cnt) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Left Long by 32-63
-instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
-  match(Set dst (LShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(300);
-  format %{ "MOV    $dst.hi,$dst.lo\n"
-          "\tSHL    $dst.hi,$cnt-32\n"
-          "\tXOR    $dst.lo,$dst.lo" %}
-  opcode(0xC1, 0x4);  /* C1 /4 ib */
-  ins_encode( move_long_big_shift_clr(dst,cnt) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Left Long by variable
-instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
-  match(Set dst (LShiftL dst shift));
-  effect(KILL cr);
-  ins_cost(500+200);
-  size(17);
-  format %{ "TEST   $shift,32\n\t"
-            "JEQ,s  small\n\t"
-            "MOV    $dst.hi,$dst.lo\n\t"
-            "XOR    $dst.lo,$dst.lo\n"
-    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
-            "SHL    $dst.lo,$shift" %}
-  ins_encode( shift_left_long( dst, shift ) );
-  ins_pipe( pipe_slow );
-%}
-
-// Shift Right Long by 1-31
-instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
-  match(Set dst (URShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(200);
-  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
-            "SHR    $dst.hi,$cnt" %}
-  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
-  ins_encode( move_long_small_shift(dst,cnt) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Right Long by 32-63
-instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
-  match(Set dst (URShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(300);
-  format %{ "MOV    $dst.lo,$dst.hi\n"
-          "\tSHR    $dst.lo,$cnt-32\n"
-          "\tXOR    $dst.hi,$dst.hi" %}
-  opcode(0xC1, 0x5);  /* C1 /5 ib */
-  ins_encode( move_long_big_shift_clr(dst,cnt) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Right Long by variable
-instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
-  match(Set dst (URShiftL dst shift));
-  effect(KILL cr);
-  ins_cost(600);
-  size(17);
-  format %{ "TEST   $shift,32\n\t"
-            "JEQ,s  small\n\t"
-            "MOV    $dst.lo,$dst.hi\n\t"
-            "XOR    $dst.hi,$dst.hi\n"
-    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
-            "SHR    $dst.hi,$shift" %}
-  ins_encode( shift_right_long( dst, shift ) );
-  ins_pipe( pipe_slow );
-%}
-
-// Shift Right Long by 1-31
-instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
-  match(Set dst (RShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(200);
-  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
-            "SAR    $dst.hi,$cnt" %}
-  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
-  ins_encode( move_long_small_shift(dst,cnt) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Right Long by 32-63
-instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
-  match(Set dst (RShiftL dst cnt));
-  effect(KILL cr);
-  ins_cost(300);
-  format %{ "MOV    $dst.lo,$dst.hi\n"
-          "\tSAR    $dst.lo,$cnt-32\n"
-          "\tSAR    $dst.hi,31" %}
-  opcode(0xC1, 0x7);  /* C1 /7 ib */
-  ins_encode( move_long_big_shift_sign(dst,cnt) );
-  ins_pipe( ialu_reg_long );
-%}
-
-// Shift Right arithmetic Long by variable
-instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
-  match(Set dst (RShiftL dst shift));
-  effect(KILL cr);
-  ins_cost(600);
-  size(18);
-  format %{ "TEST   $shift,32\n\t"
-            "JEQ,s  small\n\t"
-            "MOV    $dst.lo,$dst.hi\n\t"
-            "SAR    $dst.hi,31\n"
-    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
-            "SAR    $dst.hi,$shift" %}
-  ins_encode( shift_right_arith_long( dst, shift ) );
-  ins_pipe( pipe_slow );
-%}
-
-
-//----------Double Instructions------------------------------------------------
-// Double Math
-
-// Compare & branch
-
-// P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
-  predicate(VM_Version::supports_cmov() && UseSSE <=1);
-  match(Set cr (CmpD src1 src2));
-  effect(KILL rax);
-  ins_cost(150);
-  format %{ "FLD    $src1\n\t"
-            "FUCOMIP ST,$src2  // P6 instruction\n\t"
-            "JNP    exit\n\t"
-            "MOV    ah,1       // saw a NaN, set CF\n\t"
-            "SAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2),
-              cmpF_P6_fixup );
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
-  predicate(VM_Version::supports_cmov() && UseSSE <=1);
-  match(Set cr (CmpD src1 src2));
-  ins_cost(150);
-  format %{ "FLD    $src1\n\t"
-            "FUCOMIP ST,$src2  // P6 instruction" %}
-  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2));
-  ins_pipe( pipe_slow );
-%}
-
-// Compare & branch
-instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
-  predicate(UseSSE<=1);
-  match(Set cr (CmpD src1 src2));
-  effect(KILL rax);
-  ins_cost(200);
-  format %{ "FLD    $src1\n\t"
-            "FCOMp  $src2\n\t"
-            "FNSTSW AX\n\t"
-            "TEST   AX,0x400\n\t"
-            "JZ,s   flags\n\t"
-            "MOV    AH,1\t# unordered treat as LT\n"
-    "flags:\tSAHF" %}
-  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2),
-              fpu_flags);
-  ins_pipe( pipe_slow );
-%}
-
-// Compare vs zero into -1,0,1
-instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
-  predicate(UseSSE<=1);
-  match(Set dst (CmpD3 src1 zero));
-  effect(KILL cr, KILL rax);
-  ins_cost(280);
-  format %{ "FTSTD  $dst,$src1" %}
-  opcode(0xE4, 0xD9);
-  ins_encode( Push_Reg_DPR(src1),
-              OpcS, OpcP, PopFPU,
-              CmpF_Result(dst));
-  ins_pipe( pipe_slow );
-%}
-
-// Compare into -1,0,1
-instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
-  predicate(UseSSE<=1);
-  match(Set dst (CmpD3 src1 src2));
-  effect(KILL cr, KILL rax);
-  ins_cost(300);
-  format %{ "FCMPD  $dst,$src1,$src2" %}
-  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2),
-              CmpF_Result(dst));
-  ins_pipe( pipe_slow );
-%}
-
-// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
-  predicate(UseSSE>=2);
-  match(Set cr (CmpD src1 src2));
-  ins_cost(145);
-  format %{ "UCOMISD $src1,$src2\n\t"
-            "JNP,s   exit\n\t"
-            "PUSHF\t# saw NaN, set CF\n\t"
-            "AND     [rsp], #0xffffff2b\n\t"
-            "POPF\n"
-    "exit:" %}
-  ins_encode %{
-    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp_fixup(masm);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
-  predicate(UseSSE>=2);
-  match(Set cr (CmpD src1 src2));
-  ins_cost(100);
-  format %{ "UCOMISD $src1,$src2" %}
-  ins_encode %{
-    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
-  predicate(UseSSE>=2);
-  match(Set cr (CmpD src1 (LoadD src2)));
-  ins_cost(145);
-  format %{ "UCOMISD $src1,$src2\n\t"
-            "JNP,s   exit\n\t"
-            "PUSHF\t# saw NaN, set CF\n\t"
-            "AND     [rsp], #0xffffff2b\n\t"
-            "POPF\n"
-    "exit:" %}
-  ins_encode %{
-    __ ucomisd($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp_fixup(masm);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
-  predicate(UseSSE>=2);
-  match(Set cr (CmpD src1 (LoadD src2)));
-  ins_cost(100);
-  format %{ "UCOMISD $src1,$src2" %}
-  ins_encode %{
-    __ ucomisd($src1$$XMMRegister, $src2$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Compare into -1,0,1 in XMM
-instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
-  predicate(UseSSE>=2);
-  match(Set dst (CmpD3 src1 src2));
-  effect(KILL cr);
-  ins_cost(255);
-  format %{ "UCOMISD $src1, $src2\n\t"
-            "MOV     $dst, #-1\n\t"
-            "JP,s    done\n\t"
-            "JB,s    done\n\t"
-            "SETNE   $dst\n\t"
-            "MOVZB   $dst, $dst\n"
-    "done:" %}
-  ins_encode %{
-    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp3(masm, $dst$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Compare into -1,0,1 in XMM and memory
-instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
-  predicate(UseSSE>=2);
-  match(Set dst (CmpD3 src1 (LoadD src2)));
-  effect(KILL cr);
-  ins_cost(275);
-  format %{ "UCOMISD $src1, $src2\n\t"
-            "MOV     $dst, #-1\n\t"
-            "JP,s    done\n\t"
-            "JB,s    done\n\t"
-            "SETNE   $dst\n\t"
-            "MOVZB   $dst, $dst\n"
-    "done:" %}
-  ins_encode %{
-    __ ucomisd($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp3(masm, $dst$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-
-instruct subDPR_reg(regDPR dst, regDPR src) %{
-  predicate (UseSSE <=1);
-  match(Set dst (SubD dst src));
-
-  format %{ "FLD    $src\n\t"
-            "DSUBp  $dst,ST" %}
-  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
-  ins_cost(150);
-  ins_encode( Push_Reg_DPR(src),
-              OpcP, RegOpc(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
-  predicate (UseSSE <=1);
-  match(Set dst (RoundDouble (SubD src1 src2)));
-  ins_cost(250);
-
-  format %{ "FLD    $src2\n\t"
-            "DSUB   ST,$src1\n\t"
-            "FSTP_D $dst\t# D-round" %}
-  opcode(0xD8, 0x5);
-  ins_encode( Push_Reg_DPR(src2),
-              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
-  ins_pipe( fpu_mem_reg_reg );
-%}
-
-
-instruct subDPR_reg_mem(regDPR dst, memory src) %{
-  predicate (UseSSE <=1);
-  match(Set dst (SubD dst (LoadD src)));
-  ins_cost(150);
-
-  format %{ "FLD    $src\n\t"
-            "DSUBp  $dst,ST" %}
-  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
-  predicate (UseSSE<=1);
-  match(Set dst (AbsD src));
-  ins_cost(100);
-  format %{ "FABS" %}
-  opcode(0xE1, 0xD9);
-  ins_encode( OpcS, OpcP );
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (NegD src));
-  ins_cost(100);
-  format %{ "FCHS" %}
-  opcode(0xE0, 0xD9);
-  ins_encode( OpcS, OpcP );
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct addDPR_reg(regDPR dst, regDPR src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (AddD dst src));
-  format %{ "FLD    $src\n\t"
-            "DADD   $dst,ST" %}
-  size(4);
-  ins_cost(150);
-  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
-  ins_encode( Push_Reg_DPR(src),
-              OpcP, RegOpc(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-
-instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
-  predicate(UseSSE<=1);
-  match(Set dst (RoundDouble (AddD src1 src2)));
-  ins_cost(250);
-
-  format %{ "FLD    $src2\n\t"
-            "DADD   ST,$src1\n\t"
-            "FSTP_D $dst\t# D-round" %}
-  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
-  ins_encode( Push_Reg_DPR(src2),
-              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
-  ins_pipe( fpu_mem_reg_reg );
-%}
-
-
-instruct addDPR_reg_mem(regDPR dst, memory src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (AddD dst (LoadD src)));
-  ins_cost(150);
-
-  format %{ "FLD    $src\n\t"
-            "DADDp  $dst,ST" %}
-  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-// add-to-memory
-instruct addDPR_mem_reg(memory dst, regDPR src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
-  ins_cost(150);
-
-  format %{ "FLD_D  $dst\n\t"
-            "DADD   ST,$src\n\t"
-            "FST_D  $dst" %}
-  opcode(0xDD, 0x0);
-  ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
-              Opcode(0xD8), RegOpc(src), ClearInstMark,
-              SetInstMark,
-              Opcode(0xDD), RMopc_Mem(0x03,dst),
-              ClearInstMark);
-  ins_pipe( fpu_reg_mem );
-%}
-
-instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
-  predicate(UseSSE<=1);
-  match(Set dst (AddD dst con));
-  ins_cost(125);
-  format %{ "FLD1\n\t"
-            "DADDp  $dst,ST" %}
-  ins_encode %{
-    __ fld1();
-    __ faddp($dst$$reg);
-  %}
-  ins_pipe(fpu_reg);
-%}
-
-instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
-  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
-  match(Set dst (AddD dst con));
-  ins_cost(200);
-  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
-            "DADDp  $dst,ST" %}
-  ins_encode %{
-    __ fld_d($constantaddress($con));
-    __ faddp($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_mem);
-%}
-
-instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
-  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
-  match(Set dst (RoundDouble (AddD src con)));
-  ins_cost(200);
-  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
-            "DADD   ST,$src\n\t"
-            "FSTP_D $dst\t# D-round" %}
-  ins_encode %{
-    __ fld_d($constantaddress($con));
-    __ fadd($src$$reg);
-    __ fstp_d(Address(rsp, $dst$$disp));
-  %}
-  ins_pipe(fpu_mem_reg_con);
-%}
-
-instruct mulDPR_reg(regDPR dst, regDPR src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (MulD dst src));
-  format %{ "FLD    $src\n\t"
-            "DMULp  $dst,ST" %}
-  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
-  ins_cost(150);
-  ins_encode( Push_Reg_DPR(src),
-              OpcP, RegOpc(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Strict FP instruction biases argument before multiply then
-// biases result to avoid double rounding of subnormals.
-//
-// scale arg1 by multiplying arg1 by 2^(-15360)
-// load arg2
-// multiply scaled arg1 by arg2
-// rescale product by 2^(15360)
-//
-instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
-  predicate( UseSSE<=1 && Compile::current()->has_method() );
-  match(Set dst (MulD dst src));
-  ins_cost(1);   // Select this instruction for all FP double multiplies
-
-  format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
-            "DMULp  $dst,ST\n\t"
-            "FLD    $src\n\t"
-            "DMULp  $dst,ST\n\t"
-            "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
-            "DMULp  $dst,ST\n\t" %}
-  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
-  ins_encode( strictfp_bias1(dst),
-              Push_Reg_DPR(src),
-              OpcP, RegOpc(dst),
-              strictfp_bias2(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
-  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
-  match(Set dst (MulD dst con));
-  ins_cost(200);
-  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
-            "DMULp  $dst,ST" %}
-  ins_encode %{
-    __ fld_d($constantaddress($con));
-    __ fmulp($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_mem);
-%}
-
-
-instruct mulDPR_reg_mem(regDPR dst, memory src) %{
-  predicate( UseSSE<=1 );
-  match(Set dst (MulD dst (LoadD src)));
-  ins_cost(200);
-  format %{ "FLD_D  $src\n\t"
-            "DMULp  $dst,ST" %}
-  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-//
-// Cisc-alternate to reg-reg multiply
-instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
-  predicate( UseSSE<=1 );
-  match(Set dst (MulD src (LoadD mem)));
-  ins_cost(250);
-  format %{ "FLD_D  $mem\n\t"
-            "DMUL   ST,$src\n\t"
-            "FSTP_D $dst" %}
-  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
-              OpcReg_FPR(src),
-              Pop_Reg_DPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_reg_mem );
-%}
-
-
-// MACRO3 -- addDPR a mulDPR
-// This instruction is a '2-address' instruction in that the result goes
-// back to src2.  This eliminates a move from the macro; possibly the
-// register allocator will have to add it back (and maybe not).
-instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
-  predicate( UseSSE<=1 );
-  match(Set src2 (AddD (MulD src0 src1) src2));
-  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
-            "DMUL   ST,$src1\n\t"
-            "DADDp  $src2,ST" %}
-  ins_cost(250);
-  opcode(0xDD); /* LoadD DD /0 */
-  ins_encode( Push_Reg_FPR(src0),
-              FMul_ST_reg(src1),
-              FAddP_reg_ST(src2) );
-  ins_pipe( fpu_reg_reg_reg );
-%}
-
-
-// MACRO3 -- subDPR a mulDPR
-instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
-  predicate( UseSSE<=1 );
-  match(Set src2 (SubD (MulD src0 src1) src2));
-  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
-            "DMUL   ST,$src1\n\t"
-            "DSUBRp $src2,ST" %}
-  ins_cost(250);
-  ins_encode( Push_Reg_FPR(src0),
-              FMul_ST_reg(src1),
-              Opcode(0xDE), Opc_plus(0xE0,src2));
-  ins_pipe( fpu_reg_reg_reg );
-%}
-
-
-instruct divDPR_reg(regDPR dst, regDPR src) %{
-  predicate( UseSSE<=1 );
-  match(Set dst (DivD dst src));
-
-  format %{ "FLD    $src\n\t"
-            "FDIVp  $dst,ST" %}
-  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_cost(150);
-  ins_encode( Push_Reg_DPR(src),
-              OpcP, RegOpc(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Strict FP instruction biases argument before division then
-// biases result, to avoid double rounding of subnormals.
-//
-// scale dividend by multiplying dividend by 2^(-15360)
-// load divisor
-// divide scaled dividend by divisor
-// rescale quotient by 2^(15360)
-//
-instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
-  predicate (UseSSE<=1);
-  match(Set dst (DivD dst src));
-  predicate( UseSSE<=1 && Compile::current()->has_method() );
-  ins_cost(01);
-
-  format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
-            "DMULp  $dst,ST\n\t"
-            "FLD    $src\n\t"
-            "FDIVp  $dst,ST\n\t"
-            "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
-            "DMULp  $dst,ST\n\t" %}
-  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_encode( strictfp_bias1(dst),
-              Push_Reg_DPR(src),
-              OpcP, RegOpc(dst),
-              strictfp_bias2(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct atanDPR_reg(regDPR dst, regDPR src) %{
-  predicate (UseSSE<=1);
-  match(Set dst(AtanD dst src));
-  format %{ "DATA   $dst,$src" %}
-  opcode(0xD9, 0xF3);
-  ins_encode( Push_Reg_DPR(src),
-              OpcP, OpcS, RegOpc(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
-  predicate (UseSSE>=2);
-  match(Set dst(AtanD dst src));
-  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
-  format %{ "DATA   $dst,$src" %}
-  opcode(0xD9, 0xF3);
-  ins_encode( Push_SrcD(src),
-              OpcP, OpcS, Push_ResultD(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
-  predicate (UseSSE<=1);
-  match(Set dst (SqrtD src));
-  format %{ "DSQRT  $dst,$src" %}
-  opcode(0xFA, 0xD9);
-  ins_encode( Push_Reg_DPR(src),
-              OpcS, OpcP, Pop_Reg_DPR(dst) );
-  ins_pipe( pipe_slow );
-%}
-
-//-------------Float Instructions-------------------------------
-// Float Math
-
-// Code for float compare:
-//     fcompp();
-//     fwait(); fnstsw_ax();
-//     sahf();
-//     movl(dst, unordered_result);
-//     jcc(Assembler::parity, exit);
-//     movl(dst, less_result);
-//     jcc(Assembler::below, exit);
-//     movl(dst, equal_result);
-//     jcc(Assembler::equal, exit);
-//     movl(dst, greater_result);
-//   exit:
-
-// P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
-  predicate(VM_Version::supports_cmov() && UseSSE == 0);
-  match(Set cr (CmpF src1 src2));
-  effect(KILL rax);
-  ins_cost(150);
-  format %{ "FLD    $src1\n\t"
-            "FUCOMIP ST,$src2  // P6 instruction\n\t"
-            "JNP    exit\n\t"
-            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
-            "SAHF\n"
-     "exit:\tNOP               // avoid branch to branch" %}
-  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2),
-              cmpF_P6_fixup );
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
-  predicate(VM_Version::supports_cmov() && UseSSE == 0);
-  match(Set cr (CmpF src1 src2));
-  ins_cost(100);
-  format %{ "FLD    $src1\n\t"
-            "FUCOMIP ST,$src2  // P6 instruction" %}
-  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2));
-  ins_pipe( pipe_slow );
-%}
-
-
-// Compare & branch
-instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
-  predicate(UseSSE == 0);
-  match(Set cr (CmpF src1 src2));
-  effect(KILL rax);
-  ins_cost(200);
-  format %{ "FLD    $src1\n\t"
-            "FCOMp  $src2\n\t"
-            "FNSTSW AX\n\t"
-            "TEST   AX,0x400\n\t"
-            "JZ,s   flags\n\t"
-            "MOV    AH,1\t# unordered treat as LT\n"
-    "flags:\tSAHF" %}
-  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2),
-              fpu_flags);
-  ins_pipe( pipe_slow );
-%}
-
-// Compare vs zero into -1,0,1
-instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
-  predicate(UseSSE == 0);
-  match(Set dst (CmpF3 src1 zero));
-  effect(KILL cr, KILL rax);
-  ins_cost(280);
-  format %{ "FTSTF  $dst,$src1" %}
-  opcode(0xE4, 0xD9);
-  ins_encode( Push_Reg_DPR(src1),
-              OpcS, OpcP, PopFPU,
-              CmpF_Result(dst));
-  ins_pipe( pipe_slow );
-%}
-
-// Compare into -1,0,1
-instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
-  predicate(UseSSE == 0);
-  match(Set dst (CmpF3 src1 src2));
-  effect(KILL cr, KILL rax);
-  ins_cost(300);
-  format %{ "FCMPF  $dst,$src1,$src2" %}
-  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
-  ins_encode( Push_Reg_DPR(src1),
-              OpcP, RegOpc(src2),
-              CmpF_Result(dst));
-  ins_pipe( pipe_slow );
-%}
-
-// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
-  predicate(UseSSE>=1);
-  match(Set cr (CmpF src1 src2));
-  ins_cost(145);
-  format %{ "UCOMISS $src1,$src2\n\t"
-            "JNP,s   exit\n\t"
-            "PUSHF\t# saw NaN, set CF\n\t"
-            "AND     [rsp], #0xffffff2b\n\t"
-            "POPF\n"
-    "exit:" %}
-  ins_encode %{
-    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp_fixup(masm);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
-  predicate(UseSSE>=1);
-  match(Set cr (CmpF src1 src2));
-  ins_cost(100);
-  format %{ "UCOMISS $src1,$src2" %}
-  ins_encode %{
-    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
-  predicate(UseSSE>=1);
-  match(Set cr (CmpF src1 (LoadF src2)));
-  ins_cost(165);
-  format %{ "UCOMISS $src1,$src2\n\t"
-            "JNP,s   exit\n\t"
-            "PUSHF\t# saw NaN, set CF\n\t"
-            "AND     [rsp], #0xffffff2b\n\t"
-            "POPF\n"
-    "exit:" %}
-  ins_encode %{
-    __ ucomiss($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp_fixup(masm);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
-  predicate(UseSSE>=1);
-  match(Set cr (CmpF src1 (LoadF src2)));
-  ins_cost(100);
-  format %{ "UCOMISS $src1,$src2" %}
-  ins_encode %{
-    __ ucomiss($src1$$XMMRegister, $src2$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Compare into -1,0,1 in XMM
-instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
-  predicate(UseSSE>=1);
-  match(Set dst (CmpF3 src1 src2));
-  effect(KILL cr);
-  ins_cost(255);
-  format %{ "UCOMISS $src1, $src2\n\t"
-            "MOV     $dst, #-1\n\t"
-            "JP,s    done\n\t"
-            "JB,s    done\n\t"
-            "SETNE   $dst\n\t"
-            "MOVZB   $dst, $dst\n"
-    "done:" %}
-  ins_encode %{
-    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
-    emit_cmpfp3(masm, $dst$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Compare into -1,0,1 in XMM and memory
-instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
-  predicate(UseSSE>=1);
-  match(Set dst (CmpF3 src1 (LoadF src2)));
-  effect(KILL cr);
-  ins_cost(275);
-  format %{ "UCOMISS $src1, $src2\n\t"
-            "MOV     $dst, #-1\n\t"
-            "JP,s    done\n\t"
-            "JB,s    done\n\t"
-            "SETNE   $dst\n\t"
-            "MOVZB   $dst, $dst\n"
-    "done:" %}
-  ins_encode %{
-    __ ucomiss($src1$$XMMRegister, $src2$$Address);
-    emit_cmpfp3(masm, $dst$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Spill to obtain 24-bit precision
-instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (SubF src1 src2));
-
-  format %{ "FSUB   $dst,$src1 - $src2" %}
-  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
-  ins_encode( Push_Reg_FPR(src1),
-              OpcReg_FPR(src2),
-              Pop_Mem_FPR(dst) );
-  ins_pipe( fpu_mem_reg_reg );
-%}
-//
-// This instruction does not round to 24-bits
-instruct subFPR_reg(regFPR dst, regFPR src) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (SubF dst src));
-
-  format %{ "FSUB   $dst,$src" %}
-  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
-  ins_encode( Push_Reg_FPR(src),
-              OpcP, RegOpc(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Spill to obtain 24-bit precision
-instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF src1 src2));
-
-  format %{ "FADD   $dst,$src1,$src2" %}
-  opcode(0xD8, 0x0); /* D8 C0+i */
-  ins_encode( Push_Reg_FPR(src2),
-              OpcReg_FPR(src1),
-              Pop_Mem_FPR(dst) );
-  ins_pipe( fpu_mem_reg_reg );
-%}
-//
-// This instruction does not round to 24-bits
-instruct addFPR_reg(regFPR dst, regFPR src) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF dst src));
-
-  format %{ "FLD    $src\n\t"
-            "FADDp  $dst,ST" %}
-  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
-  ins_encode( Push_Reg_FPR(src),
-              OpcP, RegOpc(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
-  predicate(UseSSE==0);
-  match(Set dst (AbsF src));
-  ins_cost(100);
-  format %{ "FABS" %}
-  opcode(0xE1, 0xD9);
-  ins_encode( OpcS, OpcP );
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
-  predicate(UseSSE==0);
-  match(Set dst (NegF src));
-  ins_cost(100);
-  format %{ "FCHS" %}
-  opcode(0xE0, 0xD9);
-  ins_encode( OpcS, OpcP );
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Cisc-alternate to addFPR_reg
-// Spill to obtain 24-bit precision
-instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF src1 (LoadF src2)));
-
-  format %{ "FLD    $src2\n\t"
-            "FADD   ST,$src1\n\t"
-            "FSTP_S $dst" %}
-  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_FPR(src1),
-              Pop_Mem_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_mem_reg_mem );
-%}
-//
-// Cisc-alternate to addFPR_reg
-// This instruction does not round to 24-bits
-instruct addFPR_reg_mem(regFPR dst, memory src) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF dst (LoadF src)));
-
-  format %{ "FADD   $dst,$src" %}
-  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
-              OpcP, RegOpc(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-// // Following two instructions for _222_mpegaudio
-// Spill to obtain 24-bit precision
-instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF src1 src2));
-
-  format %{ "FADD   $dst,$src1,$src2" %}
-  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
-              OpcReg_FPR(src2),
-              Pop_Mem_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_mem_reg_mem );
-%}
-
-// Cisc-spill variant
-// Spill to obtain 24-bit precision
-instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF src1 (LoadF src2)));
-
-  format %{ "FADD   $dst,$src1,$src2 cisc" %}
-  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_FPR(dst),
-              ClearInstMark);
-  ins_pipe( fpu_mem_mem_mem );
-%}
-
-// Spill to obtain 24-bit precision
-instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF src1 src2));
-
-  format %{ "FADD   $dst,$src1,$src2" %}
-  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_FPR(dst),
-              ClearInstMark);
-  ins_pipe( fpu_mem_mem_mem );
-%}
-
-
-// Spill to obtain 24-bit precision
-instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF src con));
-  format %{ "FLD    $src\n\t"
-            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
-            "FSTP_S $dst"  %}
-  ins_encode %{
-    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
-    __ fadd_s($constantaddress($con));
-    __ fstp_s(Address(rsp, $dst$$disp));
-  %}
-  ins_pipe(fpu_mem_reg_con);
-%}
-//
-// This instruction does not round to 24-bits
-instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF src con));
-  format %{ "FLD    $src\n\t"
-            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
-            "FSTP   $dst"  %}
-  ins_encode %{
-    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
-    __ fadd_s($constantaddress($con));
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_reg_con);
-%}
-
-// Spill to obtain 24-bit precision
-instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF src1 src2));
-
-  format %{ "FLD    $src1\n\t"
-            "FMUL   $src2\n\t"
-            "FSTP_S $dst"  %}
-  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
-  ins_encode( Push_Reg_FPR(src1),
-              OpcReg_FPR(src2),
-              Pop_Mem_FPR(dst) );
-  ins_pipe( fpu_mem_reg_reg );
-%}
-//
-// This instruction does not round to 24-bits
-instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF src1 src2));
-
-  format %{ "FLD    $src1\n\t"
-            "FMUL   $src2\n\t"
-            "FSTP_S $dst"  %}
-  opcode(0xD8, 0x1); /* D8 C8+i */
-  ins_encode( Push_Reg_FPR(src2),
-              OpcReg_FPR(src1),
-              Pop_Reg_FPR(dst) );
-  ins_pipe( fpu_reg_reg_reg );
-%}
-
-
-// Spill to obtain 24-bit precision
-// Cisc-alternate to reg-reg multiply
-instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF src1 (LoadF src2)));
-
-  format %{ "FLD_S  $src2\n\t"
-            "FMUL   $src1\n\t"
-            "FSTP_S $dst"  %}
-  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_FPR(src1),
-              Pop_Mem_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_mem_reg_mem );
-%}
-//
-// This instruction does not round to 24-bits
-// Cisc-alternate to reg-reg multiply
-instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF src1 (LoadF src2)));
-
-  format %{ "FMUL   $dst,$src1,$src2" %}
-  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcReg_FPR(src1),
-              Pop_Reg_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_reg_mem );
-%}
-
-// Spill to obtain 24-bit precision
-instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF src1 src2));
-
-  format %{ "FMUL   $dst,$src1,$src2" %}
-  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
-              OpcP, RMopc_Mem(secondary,src1),
-              Pop_Mem_FPR(dst),
-              ClearInstMark );
-  ins_pipe( fpu_mem_mem_mem );
-%}
-
-// Spill to obtain 24-bit precision
-instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF src con));
-
-  format %{ "FLD    $src\n\t"
-            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
-            "FSTP_S $dst"  %}
-  ins_encode %{
-    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
-    __ fmul_s($constantaddress($con));
-    __ fstp_s(Address(rsp, $dst$$disp));
-  %}
-  ins_pipe(fpu_mem_reg_con);
-%}
-//
-// This instruction does not round to 24-bits
-instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF src con));
-
-  format %{ "FLD    $src\n\t"
-            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
-            "FSTP   $dst"  %}
-  ins_encode %{
-    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
-    __ fmul_s($constantaddress($con));
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe(fpu_reg_reg_con);
-%}
-
-
-//
-// MACRO1 -- subsume unshared load into mulFPR
-// This instruction does not round to 24-bits
-instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (MulF (LoadF mem1) src));
-
-  format %{ "FLD    $mem1    ===MACRO1===\n\t"
-            "FMUL   ST,$src\n\t"
-            "FSTP   $dst" %}
-  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
-  ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
-              OpcReg_FPR(src),
-              Pop_Reg_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_reg_mem );
-%}
-//
-// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
-// This instruction does not round to 24-bits
-instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
-  ins_cost(95);
-
-  format %{ "FLD    $mem1     ===MACRO2===\n\t"
-            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
-            "FADD   ST,$src2\n\t"
-            "FSTP   $dst" %}
-  opcode(0xD9); /* LoadF D9 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
-              FMul_ST_reg(src1),
-              FAdd_ST_reg(src2),
-              Pop_Reg_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem_reg_reg );
-%}
-
-// MACRO3 -- addFPR a mulFPR
-// This instruction does not round to 24-bits.  It is a '2-address'
-// instruction in that the result goes back to src2.  This eliminates
-// a move from the macro; possibly the register allocator will have
-// to add it back (and maybe not).
-instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set src2 (AddF (MulF src0 src1) src2));
-
-  format %{ "FLD    $src0     ===MACRO3===\n\t"
-            "FMUL   ST,$src1\n\t"
-            "FADDP  $src2,ST" %}
-  opcode(0xD9); /* LoadF D9 /0 */
-  ins_encode( Push_Reg_FPR(src0),
-              FMul_ST_reg(src1),
-              FAddP_reg_ST(src2) );
-  ins_pipe( fpu_reg_reg_reg );
-%}
-
-// MACRO4 -- divFPR subFPR
-// This instruction does not round to 24-bits
-instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (DivF (SubF src2 src1) src3));
-
-  format %{ "FLD    $src2   ===MACRO4===\n\t"
-            "FSUB   ST,$src1\n\t"
-            "FDIV   ST,$src3\n\t"
-            "FSTP  $dst" %}
-  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_encode( Push_Reg_FPR(src2),
-              subFPR_divFPR_encode(src1,src3),
-              Pop_Reg_FPR(dst) );
-  ins_pipe( fpu_reg_reg_reg_reg );
-%}
-
-// Spill to obtain 24-bit precision
-instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
-  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (DivF src1 src2));
-
-  format %{ "FDIV   $dst,$src1,$src2" %}
-  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
-  ins_encode( Push_Reg_FPR(src1),
-              OpcReg_FPR(src2),
-              Pop_Mem_FPR(dst) );
-  ins_pipe( fpu_mem_reg_reg );
-%}
-//
-// This instruction does not round to 24-bits
-instruct divFPR_reg(regFPR dst, regFPR src) %{
-  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (DivF dst src));
-
-  format %{ "FDIV   $dst,$src" %}
-  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
-  ins_encode( Push_Reg_FPR(src),
-              OpcP, RegOpc(dst) );
-  ins_pipe( fpu_reg_reg );
-%}
-
-
-//----------Arithmetic Conversion Instructions---------------------------------
-// The conversions operations are all Alpha sorted.  Please keep it that way!
-
-instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
-  predicate(UseSSE==0);
-  match(Set dst (RoundFloat src));
-  ins_cost(125);
-  format %{ "FST_S  $dst,$src\t# F-round" %}
-  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (RoundDouble src));
-  ins_cost(125);
-  format %{ "FST_D  $dst,$src\t# D-round" %}
-  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-// Force rounding to 24-bit precision and 6-bit exponent
-instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
-  predicate(UseSSE==0);
-  match(Set dst (ConvD2F src));
-  format %{ "FST_S  $dst,$src\t# F-round" %}
-  expand %{
-    roundFloat_mem_reg(dst,src);
-  %}
-%}
-
-// Force rounding to 24-bit precision and 6-bit exponent
-instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
-  predicate(UseSSE==1);
-  match(Set dst (ConvD2F src));
-  effect( KILL cr );
-  format %{ "SUB    ESP,4\n\t"
-            "FST_S  [ESP],$src\t# F-round\n\t"
-            "MOVSS  $dst,[ESP]\n\t"
-            "ADD ESP,4" %}
-  ins_encode %{
-    __ subptr(rsp, 4);
-    if ($src$$reg != FPR1L_enc) {
-      __ fld_s($src$$reg-1);
-      __ fstp_s(Address(rsp, 0));
-    } else {
-      __ fst_s(Address(rsp, 0));
-    }
-    __ movflt($dst$$XMMRegister, Address(rsp, 0));
-    __ addptr(rsp, 4);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Force rounding double precision to single precision
-instruct convD2F_reg(regF dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (ConvD2F src));
-  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
-  ins_encode %{
-    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
-  predicate(UseSSE==0);
-  match(Set dst (ConvF2D src));
-  format %{ "FST_S  $dst,$src\t# D-round" %}
-  ins_encode( Pop_Reg_Reg_DPR(dst, src));
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
-  predicate(UseSSE==1);
-  match(Set dst (ConvF2D src));
-  format %{ "FST_D  $dst,$src\t# D-round" %}
-  expand %{
-    roundDouble_mem_reg(dst,src);
-  %}
-%}
-
-instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
-  predicate(UseSSE==1);
-  match(Set dst (ConvF2D src));
-  effect( KILL cr );
-  format %{ "SUB    ESP,4\n\t"
-            "MOVSS  [ESP] $src\n\t"
-            "FLD_S  [ESP]\n\t"
-            "ADD    ESP,4\n\t"
-            "FSTP   $dst\t# D-round" %}
-  ins_encode %{
-    __ subptr(rsp, 4);
-    __ movflt(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_s(Address(rsp, 0));
-    __ addptr(rsp, 4);
-    __ fstp_d($dst$$reg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct convF2D_reg(regD dst, regF src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (ConvF2D src));
-  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
-  ins_encode %{
-    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
-instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
-  predicate(UseSSE<=1);
-  match(Set dst (ConvD2I src));
-  effect( KILL tmp, KILL cr );
-  format %{ "FLD    $src\t# Convert double to int \n\t"
-            "FLDCW  trunc mode\n\t"
-            "SUB    ESP,4\n\t"
-            "FISTp  [ESP + #0]\n\t"
-            "FLDCW  std/24-bit mode\n\t"
-            "POP    EAX\n\t"
-            "CMP    EAX,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "FLD_D  $src\n\t"
-            "CALL   d2i_wrapper\n"
-      "fast:" %}
-  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
-  ins_pipe( pipe_slow );
-%}
-
-// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
-instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
-  predicate(UseSSE>=2);
-  match(Set dst (ConvD2I src));
-  effect( KILL tmp, KILL cr );
-  format %{ "CVTTSD2SI $dst, $src\n\t"
-            "CMP    $dst,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "SUB    ESP, 8\n\t"
-            "MOVSD  [ESP], $src\n\t"
-            "FLD_D  [ESP]\n\t"
-            "ADD    ESP, 8\n\t"
-            "CALL   d2i_wrapper\n"
-      "fast:" %}
-  ins_encode %{
-    Label fast;
-    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
-    __ cmpl($dst$$Register, 0x80000000);
-    __ jccb(Assembler::notEqual, fast);
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_d(Address(rsp, 0));
-    __ addptr(rsp, 8);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
-    __ post_call_nop();
-    __ bind(fast);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
-  predicate(UseSSE<=1);
-  match(Set dst (ConvD2L src));
-  effect( KILL cr );
-  format %{ "FLD    $src\t# Convert double to long\n\t"
-            "FLDCW  trunc mode\n\t"
-            "SUB    ESP,8\n\t"
-            "FISTp  [ESP + #0]\n\t"
-            "FLDCW  std/24-bit mode\n\t"
-            "POP    EAX\n\t"
-            "POP    EDX\n\t"
-            "CMP    EDX,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "TEST   EAX,EAX\n\t"
-            "JNE,s  fast\n\t"
-            "FLD    $src\n\t"
-            "CALL   d2l_wrapper\n"
-      "fast:" %}
-  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
-  ins_pipe( pipe_slow );
-%}
-
-// XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
-  predicate (UseSSE>=2);
-  match(Set dst (ConvD2L src));
-  effect( KILL cr );
-  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
-            "MOVSD  [ESP],$src\n\t"
-            "FLD_D  [ESP]\n\t"
-            "FLDCW  trunc mode\n\t"
-            "FISTp  [ESP + #0]\n\t"
-            "FLDCW  std/24-bit mode\n\t"
-            "POP    EAX\n\t"
-            "POP    EDX\n\t"
-            "CMP    EDX,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "TEST   EAX,EAX\n\t"
-            "JNE,s  fast\n\t"
-            "SUB    ESP,8\n\t"
-            "MOVSD  [ESP],$src\n\t"
-            "FLD_D  [ESP]\n\t"
-            "ADD    ESP,8\n\t"
-            "CALL   d2l_wrapper\n"
-      "fast:" %}
-  ins_encode %{
-    Label fast;
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_d(Address(rsp, 0));
-    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
-    __ fistp_d(Address(rsp, 0));
-    // Restore the rounding mode, mask the exception
-    if (Compile::current()->in_24_bit_fp_mode()) {
-      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
-    } else {
-      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-    }
-    // Load the converted long, adjust CPU stack
-    __ pop(rax);
-    __ pop(rdx);
-    __ cmpl(rdx, 0x80000000);
-    __ jccb(Assembler::notEqual, fast);
-    __ testl(rax, rax);
-    __ jccb(Assembler::notEqual, fast);
-    __ subptr(rsp, 8);
-    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_d(Address(rsp, 0));
-    __ addptr(rsp, 8);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
-    __ post_call_nop();
-    __ bind(fast);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Convert a double to an int.  Java semantics require we do complex
-// manglations in the corner cases.  So we set the rounding mode to
-// 'zero', store the darned double down as an int, and reset the
-// rounding mode to 'nearest'.  The hardware stores a flag value down
-// if we would overflow or converted a NAN; we check for this and
-// and go the slow path if needed.
-instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
-  predicate(UseSSE==0);
-  match(Set dst (ConvF2I src));
-  effect( KILL tmp, KILL cr );
-  format %{ "FLD    $src\t# Convert float to int \n\t"
-            "FLDCW  trunc mode\n\t"
-            "SUB    ESP,4\n\t"
-            "FISTp  [ESP + #0]\n\t"
-            "FLDCW  std/24-bit mode\n\t"
-            "POP    EAX\n\t"
-            "CMP    EAX,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "FLD    $src\n\t"
-            "CALL   d2i_wrapper\n"
-      "fast:" %}
-  // DPR2I_encoding works for FPR2I
-  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
-  ins_pipe( pipe_slow );
-%}
-
-// Convert a float in xmm to an int reg.
-instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
-  predicate(UseSSE>=1);
-  match(Set dst (ConvF2I src));
-  effect( KILL tmp, KILL cr );
-  format %{ "CVTTSS2SI $dst, $src\n\t"
-            "CMP    $dst,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "SUB    ESP, 4\n\t"
-            "MOVSS  [ESP], $src\n\t"
-            "FLD    [ESP]\n\t"
-            "ADD    ESP, 4\n\t"
-            "CALL   d2i_wrapper\n"
-      "fast:" %}
-  ins_encode %{
-    Label fast;
-    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
-    __ cmpl($dst$$Register, 0x80000000);
-    __ jccb(Assembler::notEqual, fast);
-    __ subptr(rsp, 4);
-    __ movflt(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_s(Address(rsp, 0));
-    __ addptr(rsp, 4);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
-    __ post_call_nop();
-    __ bind(fast);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
-  predicate(UseSSE==0);
-  match(Set dst (ConvF2L src));
-  effect( KILL cr );
-  format %{ "FLD    $src\t# Convert float to long\n\t"
-            "FLDCW  trunc mode\n\t"
-            "SUB    ESP,8\n\t"
-            "FISTp  [ESP + #0]\n\t"
-            "FLDCW  std/24-bit mode\n\t"
-            "POP    EAX\n\t"
-            "POP    EDX\n\t"
-            "CMP    EDX,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "TEST   EAX,EAX\n\t"
-            "JNE,s  fast\n\t"
-            "FLD    $src\n\t"
-            "CALL   d2l_wrapper\n"
-      "fast:" %}
-  // DPR2L_encoding works for FPR2L
-  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
-  ins_pipe( pipe_slow );
-%}
-
-// XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
-  predicate (UseSSE>=1);
-  match(Set dst (ConvF2L src));
-  effect( KILL cr );
-  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
-            "MOVSS  [ESP],$src\n\t"
-            "FLD_S  [ESP]\n\t"
-            "FLDCW  trunc mode\n\t"
-            "FISTp  [ESP + #0]\n\t"
-            "FLDCW  std/24-bit mode\n\t"
-            "POP    EAX\n\t"
-            "POP    EDX\n\t"
-            "CMP    EDX,0x80000000\n\t"
-            "JNE,s  fast\n\t"
-            "TEST   EAX,EAX\n\t"
-            "JNE,s  fast\n\t"
-            "SUB    ESP,4\t# Convert float to long\n\t"
-            "MOVSS  [ESP],$src\n\t"
-            "FLD_S  [ESP]\n\t"
-            "ADD    ESP,4\n\t"
-            "CALL   d2l_wrapper\n"
-      "fast:" %}
-  ins_encode %{
-    Label fast;
-    __ subptr(rsp, 8);
-    __ movflt(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_s(Address(rsp, 0));
-    __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
-    __ fistp_d(Address(rsp, 0));
-    // Restore the rounding mode, mask the exception
-    if (Compile::current()->in_24_bit_fp_mode()) {
-      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
-    } else {
-      __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
-    }
-    // Load the converted long, adjust CPU stack
-    __ pop(rax);
-    __ pop(rdx);
-    __ cmpl(rdx, 0x80000000);
-    __ jccb(Assembler::notEqual, fast);
-    __ testl(rax, rax);
-    __ jccb(Assembler::notEqual, fast);
-    __ subptr(rsp, 4);
-    __ movflt(Address(rsp, 0), $src$$XMMRegister);
-    __ fld_s(Address(rsp, 0));
-    __ addptr(rsp, 4);
-    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
-    __ post_call_nop();
-    __ bind(fast);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
-  predicate( UseSSE<=1 );
-  match(Set dst (ConvI2D src));
-  format %{ "FILD   $src\n\t"
-            "FSTP   $dst" %}
-  opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
-  ins_pipe( fpu_reg_mem );
-%}
-
-instruct convI2D_reg(regD dst, rRegI src) %{
-  predicate( UseSSE>=2 && !UseXmmI2D );
-  match(Set dst (ConvI2D src));
-  format %{ "CVTSI2SD $dst,$src" %}
-  ins_encode %{
-    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct convI2D_mem(regD dst, memory mem) %{
-  predicate( UseSSE>=2 );
-  match(Set dst (ConvI2D (LoadI mem)));
-  format %{ "CVTSI2SD $dst,$mem" %}
-  ins_encode %{
-    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct convXI2D_reg(regD dst, rRegI src)
-%{
-  predicate( UseSSE>=2 && UseXmmI2D );
-  match(Set dst (ConvI2D src));
-
-  format %{ "MOVD  $dst,$src\n\t"
-            "CVTDQ2PD $dst,$dst\t# i2d" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe(pipe_slow); // XXX
-%}
-
-instruct convI2DPR_mem(regDPR dst, memory mem) %{
-  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (ConvI2D (LoadI mem)));
-  format %{ "FILD   $mem\n\t"
-            "FSTP   $dst" %}
-  opcode(0xDB);      /* DB /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_DPR(dst), ClearInstMark);
-  ins_pipe( fpu_reg_mem );
-%}
-
-// Convert a byte to a float; no rounding step needed.
-instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
-  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
-  match(Set dst (ConvI2F src));
-  format %{ "FILD   $src\n\t"
-            "FSTP   $dst" %}
-
-  opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
-  ins_pipe( fpu_reg_mem );
-%}
-
-// In 24-bit mode, force exponent rounding by storing back out
-instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
-  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (ConvI2F src));
-  ins_cost(200);
-  format %{ "FILD   $src\n\t"
-            "FSTP_S $dst" %}
-  opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode( Push_Mem_I(src),
-              Pop_Mem_FPR(dst));
-  ins_pipe( fpu_mem_mem );
-%}
-
-// In 24-bit mode, force exponent rounding by storing back out
-instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
-  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
-  match(Set dst (ConvI2F (LoadI mem)));
-  ins_cost(200);
-  format %{ "FILD   $mem\n\t"
-            "FSTP_S $dst" %}
-  opcode(0xDB);  /* DB /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
-              Pop_Mem_FPR(dst), ClearInstMark);
-  ins_pipe( fpu_mem_mem );
-%}
-
-// This instruction does not round to 24-bits
-instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
-  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (ConvI2F src));
-  format %{ "FILD   $src\n\t"
-            "FSTP   $dst" %}
-  opcode(0xDB, 0x0);  /* DB /0 */
-  ins_encode( Push_Mem_I(src),
-              Pop_Reg_FPR(dst));
-  ins_pipe( fpu_reg_mem );
-%}
-
-// This instruction does not round to 24-bits
-instruct convI2FPR_mem(regFPR dst, memory mem) %{
-  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
-  match(Set dst (ConvI2F (LoadI mem)));
-  format %{ "FILD   $mem\n\t"
-            "FSTP   $dst" %}
-  opcode(0xDB);      /* DB /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
-              Pop_Reg_FPR(dst), ClearInstMark);
-  ins_pipe( fpu_reg_mem );
-%}
-
-// Convert an int to a float in xmm; no rounding step needed.
-instruct convI2F_reg(regF dst, rRegI src) %{
-  predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
-  match(Set dst (ConvI2F src));
-  format %{ "CVTSI2SS $dst, $src" %}
-  ins_encode %{
-    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
- instruct convXI2F_reg(regF dst, rRegI src)
-%{
-  predicate( UseSSE>=2 && UseXmmI2F );
-  match(Set dst (ConvI2F src));
-
-  format %{ "MOVD  $dst,$src\n\t"
-            "CVTDQ2PS $dst,$dst\t# i2f" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe(pipe_slow); // XXX
-%}
-
-instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
-  match(Set dst (ConvI2L src));
-  effect(KILL cr);
-  ins_cost(375);
-  format %{ "MOV    $dst.lo,$src\n\t"
-            "MOV    $dst.hi,$src\n\t"
-            "SAR    $dst.hi,31" %}
-  ins_encode(convert_int_long(dst,src));
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Zero-extend convert int to long
-instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
-  match(Set dst (AndL (ConvI2L src) mask) );
-  effect( KILL flags );
-  ins_cost(250);
-  format %{ "MOV    $dst.lo,$src\n\t"
-            "XOR    $dst.hi,$dst.hi" %}
-  opcode(0x33); // XOR
-  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Zero-extend long
-instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
-  match(Set dst (AndL src mask) );
-  effect( KILL flags );
-  ins_cost(250);
-  format %{ "MOV    $dst.lo,$src.lo\n\t"
-            "XOR    $dst.hi,$dst.hi\n\t" %}
-  opcode(0x33); // XOR
-  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
-  predicate (UseSSE<=1);
-  match(Set dst (ConvL2D src));
-  effect( KILL cr );
-  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
-            "PUSH   $src.lo\n\t"
-            "FILD   ST,[ESP + #0]\n\t"
-            "ADD    ESP,8\n\t"
-            "FSTP_D $dst\t# D-round" %}
-  opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
-  predicate (UseSSE>=2);
-  match(Set dst (ConvL2D src));
-  effect( KILL cr );
-  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
-            "PUSH   $src.lo\n\t"
-            "FILD_D [ESP]\n\t"
-            "FSTP_D [ESP]\n\t"
-            "MOVSD  $dst,[ESP]\n\t"
-            "ADD    ESP,8" %}
-  opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double2(src), Push_ResultD(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
-  predicate (UseSSE>=1);
-  match(Set dst (ConvL2F src));
-  effect( KILL cr );
-  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
-            "PUSH   $src.lo\n\t"
-            "FILD_D [ESP]\n\t"
-            "FSTP_S [ESP]\n\t"
-            "MOVSS  $dst,[ESP]\n\t"
-            "ADD    ESP,8" %}
-  opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
-  match(Set dst (ConvL2F src));
-  effect( KILL cr );
-  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
-            "PUSH   $src.lo\n\t"
-            "FILD   ST,[ESP + #0]\n\t"
-            "ADD    ESP,8\n\t"
-            "FSTP_S $dst\t# F-round" %}
-  opcode(0xDF, 0x5);  /* DF /5 */
-  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
-  ins_pipe( pipe_slow );
-%}
-
-instruct convL2I_reg( rRegI dst, eRegL src ) %{
-  match(Set dst (ConvL2I src));
-  effect( DEF dst, USE src );
-  format %{ "MOV    $dst,$src.lo" %}
-  ins_encode(enc_CopyL_Lo(dst,src));
-  ins_pipe( ialu_reg_reg );
-%}
-
-instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
-  match(Set dst (MoveF2I src));
-  effect( DEF dst, USE src );
-  ins_cost(100);
-  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
-  ins_encode %{
-    __ movl($dst$$Register, Address(rsp, $src$$disp));
-  %}
-  ins_pipe( ialu_reg_mem );
-%}
-
-instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
-  predicate(UseSSE==0);
-  match(Set dst (MoveF2I src));
-  effect( DEF dst, USE src );
-
-  ins_cost(125);
-  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
-  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MoveF2I src));
-  effect( DEF dst, USE src );
-
-  ins_cost(95);
-  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
-  ins_encode %{
-    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MoveF2I src));
-  effect( DEF dst, USE src );
-  ins_cost(85);
-  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
-  ins_encode %{
-    __ movdl($dst$$Register, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
-  match(Set dst (MoveI2F src));
-  effect( DEF dst, USE src );
-
-  ins_cost(100);
-  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
-  ins_encode %{
-    __ movl(Address(rsp, $dst$$disp), $src$$Register);
-  %}
-  ins_pipe( ialu_mem_reg );
-%}
-
-
-instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
-  predicate(UseSSE==0);
-  match(Set dst (MoveI2F src));
-  effect(DEF dst, USE src);
-
-  ins_cost(125);
-  format %{ "FLD_S  $src\n\t"
-            "FSTP   $dst\t# MoveI2F_stack_reg" %}
-  opcode(0xD9);               /* D9 /0, FLD m32real */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_FPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
-  predicate(UseSSE>=1);
-  match(Set dst (MoveI2F src));
-  effect( DEF dst, USE src );
-
-  ins_cost(95);
-  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
-  ins_encode %{
-    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MoveI2F src));
-  effect( DEF dst, USE src );
-
-  ins_cost(85);
-  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
-  match(Set dst (MoveD2L src));
-  effect(DEF dst, USE src);
-
-  ins_cost(250);
-  format %{ "MOV    $dst.lo,$src\n\t"
-            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
-  opcode(0x8B, 0x8B);
-  ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
-  ins_pipe( ialu_mem_long_reg );
-%}
-
-instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (MoveD2L src));
-  effect(DEF dst, USE src);
-
-  ins_cost(125);
-  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
-  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
-  ins_pipe( fpu_mem_reg );
-%}
-
-instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MoveD2L src));
-  effect(DEF dst, USE src);
-  ins_cost(95);
-  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
-  ins_encode %{
-    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MoveD2L src));
-  effect(DEF dst, USE src, TEMP tmp);
-  ins_cost(85);
-  format %{ "MOVD   $dst.lo,$src\n\t"
-            "PSHUFLW $tmp,$src,0x4E\n\t"
-            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
-  ins_encode %{
-    __ movdl($dst$$Register, $src$$XMMRegister);
-    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
-    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
-  match(Set dst (MoveL2D src));
-  effect(DEF dst, USE src);
-
-  ins_cost(200);
-  format %{ "MOV    $dst,$src.lo\n\t"
-            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
-  opcode(0x89, 0x89);
-  ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
-  ins_pipe( ialu_mem_long_reg );
-%}
-
-
-instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
-  predicate(UseSSE<=1);
-  match(Set dst (MoveL2D src));
-  effect(DEF dst, USE src);
-  ins_cost(125);
-
-  format %{ "FLD_D  $src\n\t"
-            "FSTP   $dst\t# MoveL2D_stack_reg" %}
-  opcode(0xDD);               /* DD /0, FLD m64real */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
-              Pop_Reg_DPR(dst), ClearInstMark );
-  ins_pipe( fpu_reg_mem );
-%}
-
-
-instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
-  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
-  match(Set dst (MoveL2D src));
-  effect(DEF dst, USE src);
-
-  ins_cost(95);
-  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
-  ins_encode %{
-    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
-  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
-  match(Set dst (MoveL2D src));
-  effect(DEF dst, USE src);
-
-  ins_cost(95);
-  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
-  ins_encode %{
-    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
-  predicate(UseSSE>=2);
-  match(Set dst (MoveL2D src));
-  effect(TEMP dst, USE src, TEMP tmp);
-  ins_cost(85);
-  format %{ "MOVD   $dst,$src.lo\n\t"
-            "MOVD   $tmp,$src.hi\n\t"
-            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
-    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-//----------------------------- CompressBits/ExpandBits ------------------------
-
-instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
-  predicate(n->bottom_type()->isa_long());
-  match(Set dst (CompressBits src mask));
-  effect(TEMP rtmp, TEMP xtmp, KILL cr);
-  format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
-  ins_encode %{
-    Label exit, partail_result;
-    // Parallely extract both upper and lower 32 bits of source into destination register pair.
-    // Merge the results of upper and lower destination registers such that upper destination
-    // results are contiguously laid out after the lower destination result.
-    __ pextl($dst$$Register, $src$$Register, $mask$$Register);
-    __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
-    __ popcntl($rtmp$$Register, $mask$$Register);
-    // Skip merging if bit count of lower mask register is equal to 32 (register size).
-    __ cmpl($rtmp$$Register, 32);
-    __ jccb(Assembler::equal, exit);
-    // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
-    __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
-    // Shift left the contents of upper destination register by true bit count of lower mask register
-    // and merge with lower destination register.
-    __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
-    __ orl($dst$$Register, $rtmp$$Register);
-    __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
-    // Zero out upper destination register if true bit count of lower 32 bit mask is zero
-    // since contents of upper destination have already been copied to lower destination
-    // register.
-    __ cmpl($rtmp$$Register, 0);
-    __ jccb(Assembler::greater, partail_result);
-    __ movl(HIGH_FROM_LOW($dst$$Register), 0);
-    __ jmp(exit);
-    __ bind(partail_result);
-    // Perform right shift over upper destination register to move out bits already copied
-    // to lower destination register.
-    __ subl($rtmp$$Register, 32);
-    __ negl($rtmp$$Register);
-    __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
-    __ bind(exit);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
-  predicate(n->bottom_type()->isa_long());
-  match(Set dst (ExpandBits src mask));
-  effect(TEMP rtmp, TEMP xtmp, KILL cr);
-  format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
-  ins_encode %{
-    // Extraction operation sequentially reads the bits from source register starting from LSB
-    // and lays them out into destination register at bit locations corresponding to true bits
-    // in mask register. Thus number of source bits read are equal to combined true bit count
-    // of mask register pair.
-    Label exit, mask_clipping;
-    __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
-    __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
-    __ popcntl($rtmp$$Register, $mask$$Register);
-    // If true bit count of lower mask register is 32 then none of bit of lower source register
-    // will feed to upper destination register.
-    __ cmpl($rtmp$$Register, 32);
-    __ jccb(Assembler::equal, exit);
-    // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
-    __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
-    // Shift right the contents of lower source register to remove already consumed bits.
-    __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
-    // Extract the bits from lower source register starting from LSB under the influence
-    // of upper mask register.
-    __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
-    __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
-    __ subl($rtmp$$Register, 32);
-    __ negl($rtmp$$Register);
-    __ movdl($xtmp$$XMMRegister, $mask$$Register);
-    __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
-    // Clear the set bits in upper mask register which have been used to extract the contents
-    // from lower source register.
-    __ bind(mask_clipping);
-    __ blsrl($mask$$Register, $mask$$Register);
-    __ decrementl($rtmp$$Register, 1);
-    __ jccb(Assembler::greater, mask_clipping);
-    // Starting from LSB extract the bits from upper source register under the influence of
-    // remaining set bits in upper mask register.
-    __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
-    // Merge the partial results extracted from lower and upper source register bits.
-    __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
-    __ movdl($mask$$Register, $xtmp$$XMMRegister);
-    __ bind(exit);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// =======================================================================
-// Fast clearing of an array
-// Small non-constant length ClearArray for non-AVX512 targets.
-instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
-  predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
-  match(Set dummy (ClearArray cnt base));
-  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
-
-  format %{ $$template
-    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
-    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
-    $$emit$$"JG     LARGE\n\t"
-    $$emit$$"SHL    ECX, 1\n\t"
-    $$emit$$"DEC    ECX\n\t"
-    $$emit$$"JS     DONE\t# Zero length\n\t"
-    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
-    $$emit$$"DEC    ECX\n\t"
-    $$emit$$"JGE    LOOP\n\t"
-    $$emit$$"JMP    DONE\n\t"
-    $$emit$$"# LARGE:\n\t"
-    if (UseFastStosb) {
-       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
-       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
-    } else if (UseXMMForObjInit) {
-       $$emit$$"MOV     RDI,RAX\n\t"
-       $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
-       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
-       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
-       $$emit$$"ADD     0x40,RAX\n\t"
-       $$emit$$"# L_zero_64_bytes:\n\t"
-       $$emit$$"SUB     0x8,RCX\n\t"
-       $$emit$$"JGE     L_loop\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JL      L_tail\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x20,RAX\n\t"
-       $$emit$$"SUB     0x4,RCX\n\t"
-       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JLE     L_end\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
-       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x8,RAX\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"JGE     L_sloop\n\t"
-       $$emit$$"# L_end:\n\t"
-    } else {
-       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
-       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
-    }
-    $$emit$$"# DONE"
-  %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
-                 $tmp$$XMMRegister, false, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Small non-constant length ClearArray for AVX512 targets.
-instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
-  predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
-  match(Set dummy (ClearArray cnt base));
-  ins_cost(125);
-  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
-
-  format %{ $$template
-    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
-    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
-    $$emit$$"JG     LARGE\n\t"
-    $$emit$$"SHL    ECX, 1\n\t"
-    $$emit$$"DEC    ECX\n\t"
-    $$emit$$"JS     DONE\t# Zero length\n\t"
-    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
-    $$emit$$"DEC    ECX\n\t"
-    $$emit$$"JGE    LOOP\n\t"
-    $$emit$$"JMP    DONE\n\t"
-    $$emit$$"# LARGE:\n\t"
-    if (UseFastStosb) {
-       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
-       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
-    } else if (UseXMMForObjInit) {
-       $$emit$$"MOV     RDI,RAX\n\t"
-       $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
-       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
-       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
-       $$emit$$"ADD     0x40,RAX\n\t"
-       $$emit$$"# L_zero_64_bytes:\n\t"
-       $$emit$$"SUB     0x8,RCX\n\t"
-       $$emit$$"JGE     L_loop\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JL      L_tail\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x20,RAX\n\t"
-       $$emit$$"SUB     0x4,RCX\n\t"
-       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JLE     L_end\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
-       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x8,RAX\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"JGE     L_sloop\n\t"
-       $$emit$$"# L_end:\n\t"
-    } else {
-       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
-       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
-    }
-    $$emit$$"# DONE"
-  %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
-                 $tmp$$XMMRegister, false, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Large non-constant length ClearArray for non-AVX512 targets.
-instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
-  predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
-  match(Set dummy (ClearArray cnt base));
-  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
-  format %{ $$template
-    if (UseFastStosb) {
-       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
-       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
-       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
-    } else if (UseXMMForObjInit) {
-       $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
-       $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
-       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
-       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
-       $$emit$$"ADD     0x40,RAX\n\t"
-       $$emit$$"# L_zero_64_bytes:\n\t"
-       $$emit$$"SUB     0x8,RCX\n\t"
-       $$emit$$"JGE     L_loop\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JL      L_tail\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x20,RAX\n\t"
-       $$emit$$"SUB     0x4,RCX\n\t"
-       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JLE     L_end\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
-       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x8,RAX\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"JGE     L_sloop\n\t"
-       $$emit$$"# L_end:\n\t"
-    } else {
-       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
-       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
-       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
-    }
-    $$emit$$"# DONE"
-  %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
-                 $tmp$$XMMRegister, true, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Large non-constant length ClearArray for AVX512 targets.
-instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
-  predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
-  match(Set dummy (ClearArray cnt base));
-  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
-  format %{ $$template
-    if (UseFastStosb) {
-       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
-       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
-       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
-    } else if (UseXMMForObjInit) {
-       $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
-       $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
-       $$emit$$"JMPQ    L_zero_64_bytes\n\t"
-       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
-       $$emit$$"ADD     0x40,RAX\n\t"
-       $$emit$$"# L_zero_64_bytes:\n\t"
-       $$emit$$"SUB     0x8,RCX\n\t"
-       $$emit$$"JGE     L_loop\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JL      L_tail\n\t"
-       $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x20,RAX\n\t"
-       $$emit$$"SUB     0x4,RCX\n\t"
-       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
-       $$emit$$"ADD     0x4,RCX\n\t"
-       $$emit$$"JLE     L_end\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
-       $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
-       $$emit$$"ADD     0x8,RAX\n\t"
-       $$emit$$"DEC     RCX\n\t"
-       $$emit$$"JGE     L_sloop\n\t"
-       $$emit$$"# L_end:\n\t"
-    } else {
-       $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
-       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
-       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
-    }
-    $$emit$$"# DONE"
-  %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
-                 $tmp$$XMMRegister, true, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Small constant length ClearArray for AVX512 targets.
-instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
-%{
-  predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
-  match(Set dummy (ClearArray cnt base));
-  ins_cost(100);
-  effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
-  format %{ "clear_mem_imm $base , $cnt  \n\t" %}
-  ins_encode %{
-   __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
-  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                              eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
-  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
-  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                              eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
-  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
-  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
-  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
-                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
-  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str2$$Register, $str1$$Register,
-                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
-                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
-  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
-
-  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
-  ins_encode %{
-    __ string_compare($str2$$Register, $str1$$Register,
-                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
-                      $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// fast string equals
-instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
-                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
-  predicate(!VM_Version::supports_avx512vlbw());
-  match(Set result (StrEquals (Binary str1 str2) cnt));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
-
-  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
-  ins_encode %{
-    __ arrays_equals(false, $str1$$Register, $str2$$Register,
-                     $cnt$$Register, $result$$Register, $tmp3$$Register,
-                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
-  %}
-
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
-                            regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
-  predicate(VM_Version::supports_avx512vlbw());
-  match(Set result (StrEquals (Binary str1 str2) cnt));
-  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
-
-  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
-  ins_encode %{
-    __ arrays_equals(false, $str1$$Register, $str2$$Register,
-                     $cnt$$Register, $result$$Register, $tmp3$$Register,
-                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
-  %}
-
-  ins_pipe( pipe_slow );
-%}
-
-
-// fast search of substring with known size.
-instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
-                             eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
-  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
-
-  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
-  ins_encode %{
-    int icnt2 = (int)$int_cnt2$$constant;
-    if (icnt2 >= 16) {
-      // IndexOf for constant substrings with size >= 16 elements
-      // which don't need to be loaded through stack.
-      __ string_indexofC8($str1$$Register, $str2$$Register,
-                          $cnt1$$Register, $cnt2$$Register,
-                          icnt2, $result$$Register,
-                          $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
-    } else {
-      // Small strings are loaded through stack if they cross page boundary.
-      __ string_indexof($str1$$Register, $str2$$Register,
-                        $cnt1$$Register, $cnt2$$Register,
-                        icnt2, $result$$Register,
-                        $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
-    }
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// fast search of substring with known size.
-instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
-                             eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
-  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
-
-  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
-  ins_encode %{
-    int icnt2 = (int)$int_cnt2$$constant;
-    if (icnt2 >= 8) {
-      // IndexOf for constant substrings with size >= 8 elements
-      // which don't need to be loaded through stack.
-      __ string_indexofC8($str1$$Register, $str2$$Register,
-                          $cnt1$$Register, $cnt2$$Register,
-                          icnt2, $result$$Register,
-                          $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
-    } else {
-      // Small strings are loaded through stack if they cross page boundary.
-      __ string_indexof($str1$$Register, $str2$$Register,
-                        $cnt1$$Register, $cnt2$$Register,
-                        icnt2, $result$$Register,
-                        $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
-    }
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// fast search of substring with known size.
-instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
-                             eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
-  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
-
-  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
-  ins_encode %{
-    int icnt2 = (int)$int_cnt2$$constant;
-    if (icnt2 >= 8) {
-      // IndexOf for constant substrings with size >= 8 elements
-      // which don't need to be loaded through stack.
-      __ string_indexofC8($str1$$Register, $str2$$Register,
-                          $cnt1$$Register, $cnt2$$Register,
-                          icnt2, $result$$Register,
-                          $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
-    } else {
-      // Small strings are loaded through stack if they cross page boundary.
-      __ string_indexof($str1$$Register, $str2$$Register,
-                        $cnt1$$Register, $cnt2$$Register,
-                        icnt2, $result$$Register,
-                        $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
-    }
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
-                         eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
-  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
-
-  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
-  ins_encode %{
-    __ string_indexof($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register,
-                      (-1), $result$$Register,
-                      $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
-                         eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
-  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
-
-  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
-  ins_encode %{
-    __ string_indexof($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register,
-                      (-1), $result$$Register,
-                      $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
-                         eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
-  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
-
-  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
-  ins_encode %{
-    __ string_indexof($str1$$Register, $str2$$Register,
-                      $cnt1$$Register, $cnt2$$Register,
-                      (-1), $result$$Register,
-                      $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
-  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
-  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-  ins_encode %{
-    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
-  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-  ins_encode %{
-    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-
-// fast array equals
-instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
-%{
-  predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-  match(Set result (AryEq ary1 ary2));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
-  //ins_cost(300);
-
-  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
-  ins_encode %{
-    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
-                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
-                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                       regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
-%{
-  predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-  match(Set result (AryEq ary1 ary2));
-  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
-  //ins_cost(300);
-
-  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
-  ins_encode %{
-    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
-                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
-                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
-%{
-  predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-  match(Set result (AryEq ary1 ary2));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
-  //ins_cost(300);
-
-  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
-  ins_encode %{
-    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
-                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
-                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                            regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
-%{
-  predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-  match(Set result (AryEq ary1 ary2));
-  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
-  //ins_cost(300);
-
-  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
-  ins_encode %{
-    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
-                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
-                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
-                         regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
-%{
-  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
-  match(Set result (CountPositives ary1 len));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
-
-  format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
-  ins_encode %{
-    __ count_positives($ary1$$Register, $len$$Register,
-                       $result$$Register, $tmp3$$Register,
-                       $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
-                              regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
-%{
-  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
-  match(Set result (CountPositives ary1 len));
-  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
-
-  format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
-  ins_encode %{
-    __ count_positives($ary1$$Register, $len$$Register,
-                       $result$$Register, $tmp3$$Register,
-                       $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-
-// fast char[] to byte[] compression
-instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
-                         regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
-  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
-  match(Set result (StrCompressedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
-
-  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
-  ins_encode %{
-    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
-                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
-                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
-                           knoreg, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
-                              regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
-  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
-  match(Set result (StrCompressedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
-
-  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
-  ins_encode %{
-    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
-                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
-                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
-                           $ktmp1$$KRegister, $ktmp2$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// fast byte[] to char[] inflation
-instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
-                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
-  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
-  match(Set dummy (StrInflatedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
-
-  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
-  ins_encode %{
-    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
-                          $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
-                             regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
-  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
-  match(Set dummy (StrInflatedCopy src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
-
-  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
-  ins_encode %{
-    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
-                          $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// encode char[] to byte[] in ISO_8859_1
-instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
-                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
-                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
-  predicate(!((EncodeISOArrayNode*)n)->is_ascii());
-  match(Set result (EncodeISOArray src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
-
-  format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
-  ins_encode %{
-    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
-                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
-                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// encode char[] to byte[] in ASCII
-instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
-                            regD tmp1, regD tmp2, regD tmp3, regD tmp4,
-                            eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
-  predicate(((EncodeISOArrayNode*)n)->is_ascii());
-  match(Set result (EncodeISOArray src (Binary dst len)));
-  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
-
-  format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
-  ins_encode %{
-    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
-                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
-                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-//----------Control Flow Instructions------------------------------------------
-// Signed compare Instructions
-instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
-  match(Set cr (CmpI op1 op2));
-  effect( DEF cr, USE op1, USE op2 );
-  format %{ "CMP    $op1,$op2" %}
-  opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( OpcP, RegReg( op1, op2) );
-  ins_pipe( ialu_cr_reg_reg );
-%}
-
-instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
-  match(Set cr (CmpI op1 op2));
-  effect( DEF cr, USE op1 );
-  format %{ "CMP    $op1,$op2" %}
-  opcode(0x81,0x07);  /* Opcode 81 /7 */
-  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
-  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-// Cisc-spilled version of cmpI_eReg
-instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
-  match(Set cr (CmpI op1 (LoadI op2)));
-
-  format %{ "CMP    $op1,$op2" %}
-  ins_cost(500);
-  opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
-  ins_pipe( ialu_cr_reg_mem );
-%}
-
-instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
-  match(Set cr (CmpI src zero));
-  effect( DEF cr, USE src );
-
-  format %{ "TEST   $src,$src" %}
-  opcode(0x85);
-  ins_encode( OpcP, RegReg( src, src ) );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
-  match(Set cr (CmpI (AndI src con) zero));
-
-  format %{ "TEST   $src,$con" %}
-  opcode(0xF7,0x00);
-  ins_encode( OpcP, RegOpc(src), Con32(con) );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
-  match(Set cr (CmpI (AndI src mem) zero));
-
-  format %{ "TEST   $src,$mem" %}
-  opcode(0x85);
-  ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
-  ins_pipe( ialu_cr_reg_mem );
-%}
-
-// Unsigned compare Instructions; really, same as signed except they
-// produce an eFlagsRegU instead of eFlagsReg.
-instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
-  match(Set cr (CmpU op1 op2));
-
-  format %{ "CMPu   $op1,$op2" %}
-  opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( OpcP, RegReg( op1, op2) );
-  ins_pipe( ialu_cr_reg_reg );
-%}
-
-instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
-  match(Set cr (CmpU op1 op2));
-
-  format %{ "CMPu   $op1,$op2" %}
-  opcode(0x81,0x07);  /* Opcode 81 /7 */
-  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-// // Cisc-spilled version of cmpU_eReg
-instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
-  match(Set cr (CmpU op1 (LoadI op2)));
-
-  format %{ "CMPu   $op1,$op2" %}
-  ins_cost(500);
-  opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
-  ins_pipe( ialu_cr_reg_mem );
-%}
-
-// // Cisc-spilled version of cmpU_eReg
-//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
-//  match(Set cr (CmpU (LoadI op1) op2));
-//
-//  format %{ "CMPu   $op1,$op2" %}
-//  ins_cost(500);
-//  opcode(0x39);  /* Opcode 39 /r */
-//  ins_encode( OpcP, RegMem( op1, op2) );
-//%}
-
-instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
-  match(Set cr (CmpU src zero));
-
-  format %{ "TESTu  $src,$src" %}
-  opcode(0x85);
-  ins_encode( OpcP, RegReg( src, src ) );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-// Unsigned pointer compare Instructions
-instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
-  match(Set cr (CmpP op1 op2));
-
-  format %{ "CMPu   $op1,$op2" %}
-  opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( OpcP, RegReg( op1, op2) );
-  ins_pipe( ialu_cr_reg_reg );
-%}
-
-instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
-  match(Set cr (CmpP op1 op2));
-
-  format %{ "CMPu   $op1,$op2" %}
-  opcode(0x81,0x07);  /* Opcode 81 /7 */
-  ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-// // Cisc-spilled version of cmpP_eReg
-instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
-  match(Set cr (CmpP op1 (LoadP op2)));
-
-  format %{ "CMPu   $op1,$op2" %}
-  ins_cost(500);
-  opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
-  ins_pipe( ialu_cr_reg_mem );
-%}
-
-// // Cisc-spilled version of cmpP_eReg
-//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
-//  match(Set cr (CmpP (LoadP op1) op2));
-//
-//  format %{ "CMPu   $op1,$op2" %}
-//  ins_cost(500);
-//  opcode(0x39);  /* Opcode 39 /r */
-//  ins_encode( OpcP, RegMem( op1, op2) );
-//%}
-
-// Compare raw pointer (used in out-of-heap check).
-// Only works because non-oop pointers must be raw pointers
-// and raw pointers have no anti-dependencies.
-instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
-  predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
-  match(Set cr (CmpP op1 (LoadP op2)));
-
-  format %{ "CMPu   $op1,$op2" %}
-  opcode(0x3B);  /* Opcode 3B /r */
-  ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
-  ins_pipe( ialu_cr_reg_mem );
-%}
-
-//
-// This will generate a signed flags result. This should be ok
-// since any compare to a zero should be eq/neq.
-instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
-  match(Set cr (CmpP src zero));
-
-  format %{ "TEST   $src,$src" %}
-  opcode(0x85);
-  ins_encode( OpcP, RegReg( src, src ) );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-// Cisc-spilled version of testP_reg
-// This will generate a signed flags result. This should be ok
-// since any compare to a zero should be eq/neq.
-instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
-  match(Set cr (CmpP (LoadP op) zero));
-
-  format %{ "TEST   $op,0xFFFFFFFF" %}
-  ins_cost(500);
-  opcode(0xF7);               /* Opcode F7 /0 */
-  ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
-  ins_pipe( ialu_cr_reg_imm );
-%}
-
-// Yanked all unsigned pointer compare operations.
-// Pointer compares are done with CmpP which is already unsigned.
-
-//----------Max and Min--------------------------------------------------------
-// Min Instructions
-////
-//   *** Min and Max using the conditional move are slower than the
-//   *** branch version on a Pentium III.
-// // Conditional move for min
-//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
-//  effect( USE_DEF op2, USE op1, USE cr );
-//  format %{ "CMOVlt $op2,$op1\t! min" %}
-//  opcode(0x4C,0x0F);
-//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
-//  ins_pipe( pipe_cmov_reg );
-//%}
-//
-//// Min Register with Register (P6 version)
-//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
-//  predicate(VM_Version::supports_cmov() );
-//  match(Set op2 (MinI op1 op2));
-//  ins_cost(200);
-//  expand %{
-//    eFlagsReg cr;
-//    compI_eReg(cr,op1,op2);
-//    cmovI_reg_lt(op2,op1,cr);
-//  %}
-//%}
-
-// Min Register with Register (generic version)
-instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
-  match(Set dst (MinI dst src));
-  effect(KILL flags);
-  ins_cost(300);
-
-  format %{ "MIN    $dst,$src" %}
-  opcode(0xCC);
-  ins_encode( min_enc(dst,src) );
-  ins_pipe( pipe_slow );
-%}
-
-// Max Register with Register
-//   *** Min and Max using the conditional move are slower than the
-//   *** branch version on a Pentium III.
-// // Conditional move for max
-//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
-//  effect( USE_DEF op2, USE op1, USE cr );
-//  format %{ "CMOVgt $op2,$op1\t! max" %}
-//  opcode(0x4F,0x0F);
-//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
-//  ins_pipe( pipe_cmov_reg );
-//%}
-//
-// // Max Register with Register (P6 version)
-//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
-//  predicate(VM_Version::supports_cmov() );
-//  match(Set op2 (MaxI op1 op2));
-//  ins_cost(200);
-//  expand %{
-//    eFlagsReg cr;
-//    compI_eReg(cr,op1,op2);
-//    cmovI_reg_gt(op2,op1,cr);
-//  %}
-//%}
-
-// Max Register with Register (generic version)
-instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
-  match(Set dst (MaxI dst src));
-  effect(KILL flags);
-  ins_cost(300);
-
-  format %{ "MAX    $dst,$src" %}
-  opcode(0xCC);
-  ins_encode( max_enc(dst,src) );
-  ins_pipe( pipe_slow );
-%}
-
-// ============================================================================
-// Counted Loop limit node which represents exact final iterator value.
-// Note: the resulting value should fit into integer range since
-// counted loops have limit check on overflow.
-instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
-  match(Set limit (LoopLimit (Binary init limit) stride));
-  effect(TEMP limit_hi, TEMP tmp, KILL flags);
-  ins_cost(300);
-
-  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
-  ins_encode %{
-    int strd = (int)$stride$$constant;
-    assert(strd != 1 && strd != -1, "sanity");
-    int m1 = (strd > 0) ? 1 : -1;
-    // Convert limit to long (EAX:EDX)
-    __ cdql();
-    // Convert init to long (init:tmp)
-    __ movl($tmp$$Register, $init$$Register);
-    __ sarl($tmp$$Register, 31);
-    // $limit - $init
-    __ subl($limit$$Register, $init$$Register);
-    __ sbbl($limit_hi$$Register, $tmp$$Register);
-    // + ($stride - 1)
-    if (strd > 0) {
-      __ addl($limit$$Register, (strd - 1));
-      __ adcl($limit_hi$$Register, 0);
-      __ movl($tmp$$Register, strd);
-    } else {
-      __ addl($limit$$Register, (strd + 1));
-      __ adcl($limit_hi$$Register, -1);
-      __ lneg($limit_hi$$Register, $limit$$Register);
-      __ movl($tmp$$Register, -strd);
-    }
-    // signed division: (EAX:EDX) / pos_stride
-    __ idivl($tmp$$Register);
-    if (strd < 0) {
-      // restore sign
-      __ negl($tmp$$Register);
-    }
-    // (EAX) * stride
-    __ mull($tmp$$Register);
-    // + init (ignore upper bits)
-    __ addl($limit$$Register, $init$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// ============================================================================
-// Branch Instructions
-// Jump Table
-instruct jumpXtnd(rRegI switch_val) %{
-  match(Jump switch_val);
-  ins_cost(350);
-  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
-  ins_encode %{
-    // Jump to Address(table_base + switch_reg)
-    Address index(noreg, $switch_val$$Register, Address::times_1);
-    __ jump(ArrayAddress($constantaddress, index), noreg);
-  %}
-  ins_pipe(pipe_jmp);
-%}
-
-// Jump Direct - Label defines a relative address from JMP+1
-instruct jmpDir(label labl) %{
-  match(Goto);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "JMP    $labl" %}
-  size(5);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jmp(*L, false); // Always long jump
-  %}
-  ins_pipe( pipe_jmp );
-%}
-
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
-  match(If cop cr);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "J$cop    $labl" %}
-  size(6);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-  %}
-  ins_pipe( pipe_jcc );
-%}
-
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
-  match(CountedLoopEnd cop cr);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "J$cop    $labl\t# Loop end" %}
-  size(6);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-  %}
-  ins_pipe( pipe_jcc );
-%}
-
-// Jump Direct Conditional - using unsigned comparison
-instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
-  match(If cop cmp);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "J$cop,u  $labl" %}
-  size(6);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-  %}
-  ins_pipe(pipe_jcc);
-%}
-
-instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
-  match(If cop cmp);
-  effect(USE labl);
-
-  ins_cost(200);
-  format %{ "J$cop,u  $labl" %}
-  size(6);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
-  %}
-  ins_pipe(pipe_jcc);
-%}
-
-instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
-  match(If cop cmp);
-  effect(USE labl);
-
-  ins_cost(200);
-  format %{ $$template
-    if ($cop$$cmpcode == Assembler::notEqual) {
-      $$emit$$"JP,u   $labl\n\t"
-      $$emit$$"J$cop,u   $labl"
-    } else {
-      $$emit$$"JP,u   done\n\t"
-      $$emit$$"J$cop,u   $labl\n\t"
-      $$emit$$"done:"
-    }
-  %}
-  ins_encode %{
-    Label* l = $labl$$label;
-    if ($cop$$cmpcode == Assembler::notEqual) {
-      __ jcc(Assembler::parity, *l, false);
-      __ jcc(Assembler::notEqual, *l, false);
-    } else if ($cop$$cmpcode == Assembler::equal) {
-      Label done;
-      __ jccb(Assembler::parity, done);
-      __ jcc(Assembler::equal, *l, false);
-      __ bind(done);
-    } else {
-       ShouldNotReachHere();
-    }
-  %}
-  ins_pipe(pipe_jcc);
-%}
-
-// ============================================================================
-// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
-// array for an instance of the superklass.  Set a hidden internal cache on a
-// hit (cache is checked with exposed code in gen_subtype_check()).  Return
-// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
-instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
-  match(Set result (PartialSubtypeCheck sub super));
-  effect( KILL rcx, KILL cr );
-
-  ins_cost(1100);  // slightly larger than the next version
-  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
-            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
-            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
-            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
-            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
-            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
-            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
-     "miss:\t" %}
-
-  opcode(0x1); // Force a XOR of EDI
-  ins_encode( enc_PartialSubtypeCheck() );
-  ins_pipe( pipe_slow );
-%}
-
-instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
-  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
-  effect( KILL rcx, KILL result );
-
-  ins_cost(1000);
-  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
-            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
-            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
-            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
-            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
-            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
-     "miss:\t" %}
-
-  opcode(0x0);  // No need to XOR EDI
-  ins_encode( enc_PartialSubtypeCheck() );
-  ins_pipe( pipe_slow );
-%}
-
-// ============================================================================
-// Branch Instructions -- short offset versions
-//
-// These instructions are used to replace jumps of a long offset (the default
-// match) with jumps of a shorter offset.  These instructions are all tagged
-// with the ins_short_branch attribute, which causes the ADLC to suppress the
-// match rules in general matching.  Instead, the ADLC generates a conversion
-// method in the MachNode which can be used to do in-place replacement of the
-// long variant with the shorter variant.  The compiler will determine if a
-// branch can be taken by the is_short_branch_offset() predicate in the machine
-// specific code section of the file.
-
-// Jump Direct - Label defines a relative address from JMP+1
-instruct jmpDir_short(label labl) %{
-  match(Goto);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "JMP,s  $labl" %}
-  size(2);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jmpb(*L);
-  %}
-  ins_pipe( pipe_jmp );
-  ins_short_branch(1);
-%}
-
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
-  match(If cop cr);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "J$cop,s  $labl" %}
-  size(2);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
-  %}
-  ins_pipe( pipe_jcc );
-  ins_short_branch(1);
-%}
-
-// Jump Direct Conditional - Label defines a relative address from Jcc+1
-instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
-  match(CountedLoopEnd cop cr);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "J$cop,s  $labl\t# Loop end" %}
-  size(2);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
-  %}
-  ins_pipe( pipe_jcc );
-  ins_short_branch(1);
-%}
-
-// Jump Direct Conditional - using unsigned comparison
-instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
-  match(If cop cmp);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "J$cop,us $labl" %}
-  size(2);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
-  %}
-  ins_pipe( pipe_jcc );
-  ins_short_branch(1);
-%}
-
-instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
-  match(If cop cmp);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ "J$cop,us $labl" %}
-  size(2);
-  ins_encode %{
-    Label* L = $labl$$label;
-    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
-  %}
-  ins_pipe( pipe_jcc );
-  ins_short_branch(1);
-%}
-
-instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
-  match(If cop cmp);
-  effect(USE labl);
-
-  ins_cost(300);
-  format %{ $$template
-    if ($cop$$cmpcode == Assembler::notEqual) {
-      $$emit$$"JP,u,s   $labl\n\t"
-      $$emit$$"J$cop,u,s   $labl"
-    } else {
-      $$emit$$"JP,u,s   done\n\t"
-      $$emit$$"J$cop,u,s  $labl\n\t"
-      $$emit$$"done:"
-    }
-  %}
-  size(4);
-  ins_encode %{
-    Label* l = $labl$$label;
-    if ($cop$$cmpcode == Assembler::notEqual) {
-      __ jccb(Assembler::parity, *l);
-      __ jccb(Assembler::notEqual, *l);
-    } else if ($cop$$cmpcode == Assembler::equal) {
-      Label done;
-      __ jccb(Assembler::parity, done);
-      __ jccb(Assembler::equal, *l);
-      __ bind(done);
-    } else {
-       ShouldNotReachHere();
-    }
-  %}
-  ins_pipe(pipe_jcc);
-  ins_short_branch(1);
-%}
-
-// ============================================================================
-// Long Compare
-//
-// Currently we hold longs in 2 registers.  Comparing such values efficiently
-// is tricky.  The flavor of compare used depends on whether we are testing
-// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
-// The GE test is the negated LT test.  The LE test can be had by commuting
-// the operands (yielding a GE test) and then negating; negate again for the
-// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
-// NE test is negated from that.
-
-// Due to a shortcoming in the ADLC, it mixes up expressions like:
-// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
-// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
-// are collapsed internally in the ADLC's dfa-gen code.  The match for
-// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
-// foo match ends up with the wrong leaf.  One fix is to not match both
-// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
-// both forms beat the trinary form of long-compare and both are very useful
-// on Intel which has so few registers.
-
-// Manifest a CmpL result in an integer register.  Very painful.
-// This is the test to avoid.
-instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
-  match(Set dst (CmpL3 src1 src2));
-  effect( KILL flags );
-  ins_cost(1000);
-  format %{ "XOR    $dst,$dst\n\t"
-            "CMP    $src1.hi,$src2.hi\n\t"
-            "JLT,s  m_one\n\t"
-            "JGT,s  p_one\n\t"
-            "CMP    $src1.lo,$src2.lo\n\t"
-            "JB,s   m_one\n\t"
-            "JEQ,s  done\n"
-    "p_one:\tINC    $dst\n\t"
-            "JMP,s  done\n"
-    "m_one:\tDEC    $dst\n"
-     "done:" %}
-  ins_encode %{
-    Label p_one, m_one, done;
-    __ xorptr($dst$$Register, $dst$$Register);
-    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
-    __ jccb(Assembler::less,    m_one);
-    __ jccb(Assembler::greater, p_one);
-    __ cmpl($src1$$Register, $src2$$Register);
-    __ jccb(Assembler::below,   m_one);
-    __ jccb(Assembler::equal,   done);
-    __ bind(p_one);
-    __ incrementl($dst$$Register);
-    __ jmpb(done);
-    __ bind(m_one);
-    __ decrementl($dst$$Register);
-    __ bind(done);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-//======
-// Manifest a CmpL result in the normal flags.  Only good for LT or GE
-// compares.  Can be used for LE or GT compares by reversing arguments.
-// NOT GOOD FOR EQ/NE tests.
-instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
-  match( Set flags (CmpL src zero ));
-  ins_cost(100);
-  format %{ "TEST   $src.hi,$src.hi" %}
-  opcode(0x85);
-  ins_encode( OpcP, RegReg_Hi2( src, src ) );
-  ins_pipe( ialu_cr_reg_reg );
-%}
-
-// Manifest a CmpL result in the normal flags.  Only good for LT or GE
-// compares.  Can be used for LE or GT compares by reversing arguments.
-// NOT GOOD FOR EQ/NE tests.
-instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
-  match( Set flags (CmpL src1 src2 ));
-  effect( TEMP tmp );
-  ins_cost(300);
-  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
-            "MOV    $tmp,$src1.hi\n\t"
-            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
-  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
-  ins_pipe( ialu_cr_reg_reg );
-%}
-
-// Long compares reg < zero/req OR reg >= zero/req.
-// Just a wrapper for a normal branch, plus the predicate test.
-instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
-  match(If cmp flags);
-  effect(USE labl);
-  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
-  expand %{
-    jmpCon(cmp,flags,labl);    // JLT or JGE...
-  %}
-%}
-
-//======
-// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
-// compares.  Can be used for LE or GT compares by reversing arguments.
-// NOT GOOD FOR EQ/NE tests.
-instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
-  match(Set flags (CmpUL src zero));
-  ins_cost(100);
-  format %{ "TEST   $src.hi,$src.hi" %}
-  opcode(0x85);
-  ins_encode(OpcP, RegReg_Hi2(src, src));
-  ins_pipe(ialu_cr_reg_reg);
-%}
-
-// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
-// compares.  Can be used for LE or GT compares by reversing arguments.
-// NOT GOOD FOR EQ/NE tests.
-instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
-  match(Set flags (CmpUL src1 src2));
-  effect(TEMP tmp);
-  ins_cost(300);
-  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
-            "MOV    $tmp,$src1.hi\n\t"
-            "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
-  ins_encode(long_cmp_flags2(src1, src2, tmp));
-  ins_pipe(ialu_cr_reg_reg);
-%}
-
-// Unsigned long compares reg < zero/req OR reg >= zero/req.
-// Just a wrapper for a normal branch, plus the predicate test.
-instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
-  match(If cmp flags);
-  effect(USE labl);
-  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
-  expand %{
-    jmpCon(cmp, flags, labl);    // JLT or JGE...
-  %}
-%}
-
-// Compare 2 longs and CMOVE longs.
-instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  ins_cost(400);
-  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
-            "CMOV$cmp $dst.hi,$src.hi" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  ins_cost(500);
-  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
-            "CMOV$cmp $dst.hi,$src.hi" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  ins_cost(400);
-  expand %{
-    cmovLL_reg_LTGE(cmp, flags, dst, src);
-  %}
-%}
-
-instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  ins_cost(500);
-  expand %{
-    cmovLL_mem_LTGE(cmp, flags, dst, src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
-  ins_cost(250);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
-  ins_pipe( pipe_cmov_mem );
-%}
-
-instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovII_reg_LTGE(cmp, flags, dst, src);
-  %}
-%}
-
-instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
-  ins_cost(250);
-  expand %{
-    cmovII_mem_LTGE(cmp, flags, dst, src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE ptrs.
-instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-// Compare 2 unsigned longs and CMOVE ptrs.
-instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovPP_reg_LTGE(cmp,flags,dst,src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE doubles
-instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
-  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovDPR_regS(cmp,flags,dst,src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
-  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovD_regS(cmp,flags,dst,src);
-  %}
-%}
-
-instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
-  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovFPR_regS(cmp,flags,dst,src);
-  %}
-%}
-
-instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
-  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovF_regS(cmp,flags,dst,src);
-  %}
-%}
-
-//======
-// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
-instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
-  match( Set flags (CmpL src zero ));
-  effect(TEMP tmp);
-  ins_cost(200);
-  format %{ "MOV    $tmp,$src.lo\n\t"
-            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
-  ins_encode( long_cmp_flags0( src, tmp ) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
-instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
-  match( Set flags (CmpL src1 src2 ));
-  ins_cost(200+300);
-  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
-            "JNE,s  skip\n\t"
-            "CMP    $src1.hi,$src2.hi\n\t"
-     "skip:\t" %}
-  ins_encode( long_cmp_flags1( src1, src2 ) );
-  ins_pipe( ialu_cr_reg_reg );
-%}
-
-// Long compare reg == zero/reg OR reg != zero/reg
-// Just a wrapper for a normal branch, plus the predicate test.
-instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
-  match(If cmp flags);
-  effect(USE labl);
-  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
-  expand %{
-    jmpCon(cmp,flags,labl);    // JEQ or JNE...
-  %}
-%}
-
-//======
-// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
-instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
-  match(Set flags (CmpUL src zero));
-  effect(TEMP tmp);
-  ins_cost(200);
-  format %{ "MOV    $tmp,$src.lo\n\t"
-            "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
-  ins_encode(long_cmp_flags0(src, tmp));
-  ins_pipe(ialu_reg_reg_long);
-%}
-
-// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
-instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
-  match(Set flags (CmpUL src1 src2));
-  ins_cost(200+300);
-  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
-            "JNE,s  skip\n\t"
-            "CMP    $src1.hi,$src2.hi\n\t"
-     "skip:\t" %}
-  ins_encode(long_cmp_flags1(src1, src2));
-  ins_pipe(ialu_cr_reg_reg);
-%}
-
-// Unsigned long compare reg == zero/reg OR reg != zero/reg
-// Just a wrapper for a normal branch, plus the predicate test.
-instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
-  match(If cmp flags);
-  effect(USE labl);
-  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
-  expand %{
-    jmpCon(cmp, flags, labl);    // JEQ or JNE...
-  %}
-%}
-
-// Compare 2 longs and CMOVE longs.
-instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  ins_cost(400);
-  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
-            "CMOV$cmp $dst.hi,$src.hi" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  ins_cost(500);
-  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
-            "CMOV$cmp $dst.hi,$src.hi" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-// Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
-  ins_cost(250);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
-  ins_pipe( pipe_cmov_mem );
-%}
-
-instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovII_reg_EQNE(cmp, flags, dst, src);
-  %}
-%}
-
-instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
-  ins_cost(250);
-  expand %{
-    cmovII_mem_EQNE(cmp, flags, dst, src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE ptrs.
-instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-// Compare 2 unsigned longs and CMOVE ptrs.
-instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovPP_reg_EQNE(cmp,flags,dst,src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE doubles
-instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
-  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovDPR_regS(cmp,flags,dst,src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
-  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovD_regS(cmp,flags,dst,src);
-  %}
-%}
-
-instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
-  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovFPR_regS(cmp,flags,dst,src);
-  %}
-%}
-
-instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
-  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovF_regS(cmp,flags,dst,src);
-  %}
-%}
-
-//======
-// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
-// Same as cmpL_reg_flags_LEGT except must negate src
-instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
-  match( Set flags (CmpL src zero ));
-  effect( TEMP tmp );
-  ins_cost(300);
-  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
-            "CMP    $tmp,$src.lo\n\t"
-            "SBB    $tmp,$src.hi\n\t" %}
-  ins_encode( long_cmp_flags3(src, tmp) );
-  ins_pipe( ialu_reg_reg_long );
-%}
-
-// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
-// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
-// requires a commuted test to get the same result.
-instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
-  match( Set flags (CmpL src1 src2 ));
-  effect( TEMP tmp );
-  ins_cost(300);
-  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
-            "MOV    $tmp,$src2.hi\n\t"
-            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
-  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
-  ins_pipe( ialu_cr_reg_reg );
-%}
-
-// Long compares reg < zero/req OR reg >= zero/req.
-// Just a wrapper for a normal branch, plus the predicate test
-instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
-  match(If cmp flags);
-  effect(USE labl);
-  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
-  ins_cost(300);
-  expand %{
-    jmpCon(cmp,flags,labl);    // JGT or JLE...
-  %}
-%}
-
-//======
-// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
-// Same as cmpUL_reg_flags_LEGT except must negate src
-instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
-  match(Set flags (CmpUL src zero));
-  effect(TEMP tmp);
-  ins_cost(300);
-  format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
-            "CMP    $tmp,$src.lo\n\t"
-            "SBB    $tmp,$src.hi\n\t" %}
-  ins_encode(long_cmp_flags3(src, tmp));
-  ins_pipe(ialu_reg_reg_long);
-%}
-
-// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
-// Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
-// requires a commuted test to get the same result.
-instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
-  match(Set flags (CmpUL src1 src2));
-  effect(TEMP tmp);
-  ins_cost(300);
-  format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
-            "MOV    $tmp,$src2.hi\n\t"
-            "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
-  ins_encode(long_cmp_flags2( src2, src1, tmp));
-  ins_pipe(ialu_cr_reg_reg);
-%}
-
-// Unsigned long compares reg < zero/req OR reg >= zero/req.
-// Just a wrapper for a normal branch, plus the predicate test
-instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
-  match(If cmp flags);
-  effect(USE labl);
-  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
-  ins_cost(300);
-  expand %{
-    jmpCon(cmp, flags, labl);    // JGT or JLE...
-  %}
-%}
-
-// Compare 2 longs and CMOVE longs.
-instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  ins_cost(400);
-  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
-            "CMOV$cmp $dst.hi,$src.hi" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  ins_cost(500);
-  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
-            "CMOV$cmp $dst.hi,$src.hi+4" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
-  ins_pipe( pipe_cmov_reg_long );
-%}
-
-instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  ins_cost(400);
-  expand %{
-    cmovLL_reg_LEGT(cmp, flags, dst, src);
-  %}
-%}
-
-instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
-  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  ins_cost(500);
-  expand %{
-    cmovLL_mem_LEGT(cmp, flags, dst, src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
-  ins_cost(250);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
-  ins_pipe( pipe_cmov_mem );
-%}
-
-instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovII_reg_LEGT(cmp, flags, dst, src);
-  %}
-%}
-
-instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
-  ins_cost(250);
-  expand %{
-    cmovII_mem_LEGT(cmp, flags, dst, src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE ptrs.
-instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  format %{ "CMOV$cmp $dst,$src" %}
-  opcode(0x0F,0x40);
-  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
-  ins_pipe( pipe_cmov_reg );
-%}
-
-// Compare 2 unsigned longs and CMOVE ptrs.
-instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
-  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    cmovPP_reg_LEGT(cmp,flags,dst,src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE doubles
-instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
-  predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovDPR_regS(cmp,flags,dst,src);
-  %}
-%}
-
-// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
-  predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovD_regS(cmp,flags,dst,src);
-  %}
-%}
-
-instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
-  predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovFPR_regS(cmp,flags,dst,src);
-  %}
-%}
-
-
-instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
-  predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
-  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
-  ins_cost(200);
-  expand %{
-    fcmovF_regS(cmp,flags,dst,src);
-  %}
-%}
-
-
-// ============================================================================
-// Procedure Call/Return Instructions
-// Call Java Static Instruction
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaDirect(method meth) %{
-  match(CallStaticJava);
-  effect(USE meth);
-
-  ins_cost(300);
-  format %{ "CALL,static " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode( pre_call_resets,
-              Java_Static_Call( meth ),
-              call_epilog,
-              post_call_FPU );
-  ins_pipe( pipe_slow );
-  ins_alignment(4);
-%}
-
-// Call Java Dynamic Instruction
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallDynamicJavaDirect(method meth) %{
-  match(CallDynamicJava);
-  effect(USE meth);
-
-  ins_cost(300);
-  format %{ "MOV    EAX,(oop)-1\n\t"
-            "CALL,dynamic" %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode( pre_call_resets,
-              Java_Dynamic_Call( meth ),
-              call_epilog,
-              post_call_FPU );
-  ins_pipe( pipe_slow );
-  ins_alignment(4);
-%}
-
-// Call Runtime Instruction
-instruct CallRuntimeDirect(method meth) %{
-  match(CallRuntime );
-  effect(USE meth);
-
-  ins_cost(300);
-  format %{ "CALL,runtime " %}
-  opcode(0xE8); /* E8 cd */
-  // Use FFREEs to clear entries in float stack
-  ins_encode( pre_call_resets,
-              FFree_Float_Stack_All,
-              Java_To_Runtime( meth ),
-              post_call_FPU );
-  ins_pipe( pipe_slow );
-%}
-
-// Call runtime without safepoint
-instruct CallLeafDirect(method meth) %{
-  match(CallLeaf);
-  effect(USE meth);
-
-  ins_cost(300);
-  format %{ "CALL_LEAF,runtime " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode( pre_call_resets,
-              FFree_Float_Stack_All,
-              Java_To_Runtime( meth ),
-              Verify_FPU_For_Leaf, post_call_FPU );
-  ins_pipe( pipe_slow );
-%}
-
-instruct CallLeafNoFPDirect(method meth) %{
-  match(CallLeafNoFP);
-  effect(USE meth);
-
-  ins_cost(300);
-  format %{ "CALL_LEAF_NOFP,runtime " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode(pre_call_resets, Java_To_Runtime(meth));
-  ins_pipe( pipe_slow );
-%}
-
-
-// Return Instruction
-// Remove the return address & jump to it.
-instruct Ret() %{
-  match(Return);
-  format %{ "RET" %}
-  opcode(0xC3);
-  ins_encode(OpcP);
-  ins_pipe( pipe_jmp );
-%}
-
-// Tail Call; Jump from runtime stub to Java code.
-// Also known as an 'interprocedural jump'.
-// Target of jump will eventually return to caller.
-// TailJump below removes the return address.
-// Don't use ebp for 'jump_target' because a MachEpilogNode has already been
-// emitted just above the TailCall which has reset ebp to the caller state.
-instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
-  match(TailCall jump_target method_ptr);
-  ins_cost(300);
-  format %{ "JMP    $jump_target \t# EBX holds method" %}
-  opcode(0xFF, 0x4);  /* Opcode FF /4 */
-  ins_encode( OpcP, RegOpc(jump_target) );
-  ins_pipe( pipe_jmp );
-%}
-
-
-// Tail Jump; remove the return address; jump to target.
-// TailCall above leaves the return address around.
-instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
-  match( TailJump jump_target ex_oop );
-  ins_cost(300);
-  format %{ "POP    EDX\t# pop return address into dummy\n\t"
-            "JMP    $jump_target " %}
-  opcode(0xFF, 0x4);  /* Opcode FF /4 */
-  ins_encode( enc_pop_rdx,
-              OpcP, RegOpc(jump_target) );
-  ins_pipe( pipe_jmp );
-%}
-
-// Forward exception.
-instruct ForwardExceptionjmp()
-%{
-  match(ForwardException);
-
-  format %{ "JMP    forward_exception_stub" %}
-  ins_encode %{
-    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
-  %}
-  ins_pipe(pipe_jmp);
-%}
-
-// Create exception oop: created by stack-crawling runtime code.
-// Created exception is now available to this handler, and is setup
-// just prior to jumping to this handler.  No code emitted.
-instruct CreateException( eAXRegP ex_oop )
-%{
-  match(Set ex_oop (CreateEx));
-
-  size(0);
-  // use the following format syntax
-  format %{ "# exception oop is in EAX; no code emitted" %}
-  ins_encode();
-  ins_pipe( empty );
-%}
-
-
-// Rethrow exception:
-// The exception oop will come in the first argument position.
-// Then JUMP (not call) to the rethrow stub code.
-instruct RethrowException()
-%{
-  match(Rethrow);
-
-  // use the following format syntax
-  format %{ "JMP    rethrow_stub" %}
-  ins_encode(enc_rethrow);
-  ins_pipe( pipe_jmp );
-%}
-
-// inlined locking and unlocking
-
-instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
-  predicate(LockingMode != LM_LIGHTWEIGHT);
-  match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
-  ins_cost(300);
-  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
-  ins_encode %{
-    __ get_thread($thread$$Register);
-    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
-                 $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
-  predicate(LockingMode != LM_LIGHTWEIGHT);
-  match(Set cr (FastUnlock object box));
-  effect(TEMP tmp, USE_KILL box);
-  ins_cost(300);
-  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
-  ins_encode %{
-    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
-  predicate(LockingMode == LM_LIGHTWEIGHT);
-  match(Set cr (FastLock object box));
-  effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
-  ins_cost(300);
-  format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
-  ins_encode %{
-    __ get_thread($thread$$Register);
-    __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
-  predicate(LockingMode == LM_LIGHTWEIGHT);
-  match(Set cr (FastUnlock object eax_reg));
-  effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
-  ins_cost(300);
-  format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
-  ins_encode %{
-    __ get_thread($thread$$Register);
-    __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
-  predicate(Matcher::vector_length(n) <= 32);
-  match(Set dst (MaskAll src));
-  format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
-  ins_encode %{
-    int mask_len = Matcher::vector_length(this);
-    __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
-  predicate(Matcher::vector_length(n) > 32);
-  match(Set dst (MaskAll src));
-  effect(TEMP ktmp);
-  format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
-  ins_encode %{
-    int mask_len = Matcher::vector_length(this);
-    __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
-  predicate(Matcher::vector_length(n) > 32);
-  match(Set dst (MaskAll src));
-  effect(TEMP ktmp);
-  format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
-  ins_encode %{
-    int mask_len = Matcher::vector_length(this);
-    __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// ============================================================================
-// Safepoint Instruction
-instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
-  match(SafePoint poll);
-  effect(KILL cr, USE poll);
-
-  format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
-  ins_cost(125);
-  // EBP would need size(3)
-  size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
-  ins_encode %{
-    __ set_inst_mark();
-    __ relocate(relocInfo::poll_type);
-    __ clear_inst_mark();
-    address pre_pc = __ pc();
-    __ testl(rax, Address($poll$$Register, 0));
-    address post_pc = __ pc();
-    guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
-  %}
-  ins_pipe(ialu_reg_mem);
-%}
-
-
-// ============================================================================
-// This name is KNOWN by the ADLC and cannot be changed.
-// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-// for this guy.
-instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
-  match(Set dst (ThreadLocal));
-  effect(DEF dst, KILL cr);
-
-  format %{ "MOV    $dst, Thread::current()" %}
-  ins_encode %{
-    Register dstReg = as_Register($dst$$reg);
-    __ get_thread(dstReg);
-  %}
-  ins_pipe( ialu_reg_fat );
-%}
-
-
-
-//----------PEEPHOLE RULES-----------------------------------------------------
-// These must follow all instruction definitions as they use the names
-// defined in the instructions definitions.
-//
-// peepmatch ( root_instr_name [preceding_instruction]* );
-//
-// peepconstraint %{
-// (instruction_number.operand_name relational_op instruction_number.operand_name
-//  [, ...] );
-// // instruction numbers are zero-based using left to right order in peepmatch
-//
-// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
-// // provide an instruction_number.operand_name for each operand that appears
-// // in the replacement instruction's match rule
-//
-// ---------VM FLAGS---------------------------------------------------------
-//
-// All peephole optimizations can be turned off using -XX:-OptoPeephole
-//
-// Each peephole rule is given an identifying number starting with zero and
-// increasing by one in the order seen by the parser.  An individual peephole
-// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
-// on the command-line.
-//
-// ---------CURRENT LIMITATIONS----------------------------------------------
-//
-// Only match adjacent instructions in same basic block
-// Only equality constraints
-// Only constraints between operands, not (0.dest_reg == EAX_enc)
-// Only one replacement instruction
-//
-// ---------EXAMPLE----------------------------------------------------------
-//
-// // pertinent parts of existing instructions in architecture description
-// instruct movI(rRegI dst, rRegI src) %{
-//   match(Set dst (CopyI src));
-// %}
-//
-// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
-//   match(Set dst (AddI dst src));
-//   effect(KILL cr);
-// %}
-//
-// // Change (inc mov) to lea
-// peephole %{
-//   // increment preceded by register-register move
-//   peepmatch ( incI_eReg movI );
-//   // require that the destination register of the increment
-//   // match the destination register of the move
-//   peepconstraint ( 0.dst == 1.dst );
-//   // construct a replacement instruction that sets
-//   // the destination to ( move's source register + one )
-//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
-// %}
-//
-// Implementation no longer uses movX instructions since
-// machine-independent system no longer uses CopyX nodes.
-//
-// peephole %{
-//   peepmatch ( incI_eReg movI );
-//   peepconstraint ( 0.dst == 1.dst );
-//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
-// %}
-//
-// peephole %{
-//   peepmatch ( decI_eReg movI );
-//   peepconstraint ( 0.dst == 1.dst );
-//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
-// %}
-//
-// peephole %{
-//   peepmatch ( addI_eReg_imm movI );
-//   peepconstraint ( 0.dst == 1.dst );
-//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
-// %}
-//
-// peephole %{
-//   peepmatch ( addP_eReg_imm movP );
-//   peepconstraint ( 0.dst == 1.dst );
-//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
-// %}
-
-// // Change load of spilled value to only a spill
-// instruct storeI(memory mem, rRegI src) %{
-//   match(Set mem (StoreI mem src));
-// %}
-//
-// instruct loadI(rRegI dst, memory mem) %{
-//   match(Set dst (LoadI mem));
-// %}
-//
-peephole %{
-  peepmatch ( loadI storeI );
-  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
-  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
-%}
-
-//----------SMARTSPILL RULES---------------------------------------------------
-// These must follow all instruction definitions as they use the names
-// defined in the instructions definitions.
diff --git a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S b/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S
deleted file mode 100644
index 7d8892bcd87..00000000000
--- a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S
+++ /dev/null
@@ -1,525 +0,0 @@
-#
-# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-#include "defs.S.inc"
-
-        # NOTE WELL!  The _Copy functions are called directly
-        # from server-compiler-generated code via CallLeafNoFP,
-        # which means that they *must* either not use floating
-        # point or use it in the same manner as does the server
-        # compiler.
-
-        .text
-
-# Set fpu to 53 bit precision.  This happens too early to use a stub.
-        .p2align 4,,15
-DECLARE_FUNC(fixcw):
-        pushl    $0x27f
-        fldcw    0(%esp)
-        popl     %eax
-        ret
-
-        .p2align 4,,15
-DECLARE_FUNC(SpinPause):
-        rep
-        nop
-        movl    $1, %eax
-        ret
-
-        # Support for void Copy::arrayof_conjoint_bytes(void* from,
-        #                                               void* to,
-        #                                               size_t count)
-        #
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_arrayof_conjoint_bytes):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -1(%esi,%ecx),%eax   # from + count - 1
-        jbe      acb_CopyRight
-        cmpl     %eax,%edi
-        jbe      acb_CopyLeft
-        # copy from low to high
-acb_CopyRight:
-        cmpl     $3,%ecx
-        jbe      5f
-1:      movl     %ecx,%eax
-        shrl     $2,%ecx
-        jz       4f
-        cmpl     $32,%ecx
-        ja       3f
-        # copy aligned dwords
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-        # copy aligned dwords
-3:      rep;     smovl
-4:      movl     %eax,%ecx
-5:      andl     $3,%ecx
-        jz       7f
-        # copy suffix
-        xorl     %eax,%eax
-6:      movb     (%esi,%eax,1),%dl
-        movb     %dl,(%edi,%eax,1)
-        addl     $1,%eax
-        subl     $1,%ecx
-        jnz      6b
-7:      popl     %edi
-        popl     %esi
-        ret
-acb_CopyLeft:
-        std
-        leal     -4(%edi,%ecx),%edi   # to + count - 4
-        movl     %eax,%esi            # from + count - 1
-        movl     %ecx,%eax
-        subl     $3,%esi              # from + count - 4
-        cmpl     $3,%ecx
-        jbe      5f
-1:      shrl     $2,%ecx
-        jz       4f
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        rep;     smovl
-        jmp      4f
-        .space 8
-2:      subl     %esi,%edi
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      3b
-        addl     %esi,%edi
-4:      movl     %eax,%ecx
-5:      andl     $3,%ecx
-        jz       7f
-        subl     %esi,%edi
-        addl     $3,%esi
-6:      movb     (%esi),%dl
-        movb     %dl,(%edi,%esi,1)
-        subl     $1,%esi
-        subl     $1,%ecx
-        jnz      6b
-7:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::conjoint_jshorts_atomic(void* from,
-        #                                                void* to,
-        #                                                size_t count)
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_conjoint_jshorts_atomic):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
-        jbe      cs_CopyRight
-        cmpl     %eax,%edi
-        jbe      cs_CopyLeft
-        # copy from low to high
-cs_CopyRight:
-        # align source address at dword address boundary
-        movl     %esi,%eax            # original from
-        andl     $3,%eax              # either 0 or 2
-        jz       1f                   # no prefix
-        # copy prefix
-        subl     $1,%ecx
-        jl       5f                   # zero count
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-        addl     %eax,%esi            # %eax == 2
-        addl     %eax,%edi
-1:      movl     %ecx,%eax            # word count less prefix
-        sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        # copy aligned dwords
-        rep;     smovl
-        jmp      4f
-        # copy aligned dwords
-2:      subl     %esi,%edi
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      3b
-        addl     %esi,%edi
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      popl     %edi
-        popl     %esi
-        ret
-        # copy from high to low
-cs_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
-        movl     %eax,%esi            # from + count*2 - 2
-        movl     %ecx,%eax
-        subl     $2,%esi              # from + count*2 - 4
-1:      sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        ja       3f                   # > 32 dwords
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-3:      rep;     smovl
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        addl     $2,%esi
-        addl     $2,%edi
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
-        #                                                 void* to,
-        #                                                 size_t count)
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
-        jbe      acs_CopyRight
-        cmpl     %eax,%edi
-        jbe      acs_CopyLeft
-acs_CopyRight:
-        movl     %ecx,%eax            # word count
-        sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        # copy aligned dwords
-        rep;     smovl
-        jmp      4f
-        # copy aligned dwords
-        .space 5
-2:      subl     %esi,%edi
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      3b
-        addl     %esi,%edi
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      popl     %edi
-        popl     %esi
-        ret
-acs_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
-        movl     %eax,%esi            # from + count*2 - 2
-        movl     %ecx,%eax
-        subl     $2,%esi              # from + count*2 - 4
-        sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        ja       3f                   # > 32 dwords
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-3:      rep;     smovl
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        addl     $2,%esi
-        addl     $2,%edi
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::conjoint_jints_atomic(void* from,
-        #                                              void* to,
-        #                                              size_t count)
-        # Equivalent to
-        #   arrayof_conjoint_jints
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_conjoint_jints_atomic):
-DECLARE_FUNC(_Copy_arrayof_conjoint_jints):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
-        jbe      ci_CopyRight
-        cmpl     %eax,%edi
-        jbe      ci_CopyLeft
-ci_CopyRight:
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        rep;     smovl
-        popl     %edi
-        popl     %esi
-        ret
-        .space 10
-2:      subl     %esi,%edi
-        jmp      4f
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-4:      subl     $1,%ecx
-        jge      3b
-        popl     %edi
-        popl     %esi
-        ret
-ci_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
-        cmpl     $32,%ecx
-        ja       4f                   # > 32 dwords
-        subl     %eax,%edi            # eax == from + count*4 - 4
-        jmp      3f
-        .p2align 4,,15
-2:      movl     (%eax),%edx
-        movl     %edx,(%edi,%eax,1)
-        subl     $4,%eax
-3:      subl     $1,%ecx
-        jge      2b
-        cld
-        popl     %edi
-        popl     %esi
-        ret
-4:      movl     %eax,%esi            # from + count*4 - 4
-        rep;     smovl
-        cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
-        #                                               jlong* to,
-        #                                               size_t count)
-        #
-        # 32-bit
-        #
-        # count treated as signed
-        #
-        # // if (from > to) {
-        #   while (--count >= 0) {
-        #     *to++ = *from++;
-        #   }
-        # } else {
-        #   while (--count >= 0) {
-        #     to[count] = from[count];
-        #   }
-        # }
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_conjoint_jlongs_atomic):
-        movl     4+8(%esp),%ecx       # count
-        movl     4+0(%esp),%eax       # from
-        movl     4+4(%esp),%edx       # to
-        cmpl     %eax,%edx
-        jae      cla_CopyLeft
-cla_CopyRight:
-        subl     %eax,%edx
-        jmp      2f
-        .p2align 4,,15
-1:      fildll   (%eax)
-        fistpll  (%edx,%eax,1)
-        addl     $8,%eax
-2:      subl     $1,%ecx
-        jge      1b
-        ret
-        .p2align 4,,15
-3:      fildll   (%eax,%ecx,8)
-        fistpll  (%edx,%ecx,8)
-cla_CopyLeft:
-        subl     $1,%ecx
-        jge      3b
-        ret
-
-        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
-        #                                                 void* to,
-        #                                                 size_t count)
-        .p2align 4,,15
-DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts):
-        pushl    %esi
-        movl     4+12(%esp),%ecx
-        pushl    %edi
-        movl     8+ 4(%esp),%esi
-        movl     8+ 8(%esp),%edi
-        cmpl     %esi,%edi
-        leal     -2(%esi,%ecx,2),%eax
-        jbe      mmx_acs_CopyRight
-        cmpl     %eax,%edi
-        jbe      mmx_acs_CopyLeft
-mmx_acs_CopyRight:
-        movl     %ecx,%eax
-        sarl     %ecx
-        je       5f
-        cmpl     $33,%ecx
-        jae      3f
-1:      subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      5f
-3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
-        subl     $1,%ecx
-4:      .p2align 4,,15
-        movq     0(%esi),%mm0
-        addl     $64,%edi
-        movq     8(%esi),%mm1
-        subl     $16,%ecx
-        movq     16(%esi),%mm2
-        movq     %mm0,-64(%edi)
-        movq     24(%esi),%mm0
-        movq     %mm1,-56(%edi)
-        movq     32(%esi),%mm1
-        movq     %mm2,-48(%edi)
-        movq     40(%esi),%mm2
-        movq     %mm0,-40(%edi)
-        movq     48(%esi),%mm0
-        movq     %mm1,-32(%edi)
-        movq     56(%esi),%mm1
-        movq     %mm2,-24(%edi)
-        movq     %mm0,-16(%edi)
-        addl     $64,%esi
-        movq     %mm1,-8(%edi)
-        cmpl     $16,%ecx
-        jge      4b
-        emms
-        testl    %ecx,%ecx
-        ja       1b
-5:      andl     $1,%eax
-        je       7f
-6:      movw     (%esi),%dx
-        movw     %dx,(%edi)
-7:      popl     %edi
-        popl     %esi
-        ret
-mmx_acs_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,2),%edi
-        movl     %eax,%esi
-        movl     %ecx,%eax
-        subl     $2,%esi
-        sarl     %ecx
-        je       4f
-        cmpl     $32,%ecx
-        ja       3f
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-3:      rep;     smovl
-4:      andl     $1,%eax
-        je       6f
-        addl     $2,%esi
-        addl     $2,%edi
-5:      movw     (%esi),%dx
-        movw     %dx,(%edi)
-6:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-
-        # Support for int64_t Atomic::cmpxchg(int64_t compare_value,
-        #                                     volatile int64_t* dest,
-        #                                     int64_t exchange_value)
-        #
-        .p2align 4,,15
-DECLARE_FUNC(_Atomic_cmpxchg_long):
-                                   #  8(%esp) : return PC
-        pushl    %ebx              #  4(%esp) : old %ebx
-        pushl    %edi              #  0(%esp) : old %edi
-        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
-        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
-        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
-        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
-        movl     20(%esp), %edi    # 20(%esp) : dest
-        lock
-        cmpxchg8b (%edi)
-        popl     %edi
-        popl     %ebx
-        ret
-
-
-        # Support for int64_t Atomic::load and Atomic::store.
-        # void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst)
-        .p2align 4,,15
-DECLARE_FUNC(_Atomic_move_long):
-        movl     4(%esp), %eax   # src
-        fildll    (%eax)
-        movl     8(%esp), %eax   # dest
-        fistpll   (%eax)
-        ret
diff --git a/src/hotspot/os_cpu/linux_x86/linux_x86_32.S b/src/hotspot/os_cpu/linux_x86/linux_x86_32.S
deleted file mode 100644
index 43a9a38e57f..00000000000
--- a/src/hotspot/os_cpu/linux_x86/linux_x86_32.S
+++ /dev/null
@@ -1,518 +0,0 @@
-#
-# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-#include "defs.S.inc"
-
-        # NOTE WELL!  The _Copy functions are called directly
-        # from server-compiler-generated code via CallLeafNoFP,
-        # which means that they *must* either not use floating
-        # point or use it in the same manner as does the server
-        # compiler.
-
-        .text
-
-        .p2align 4,,15
-DECLARE_FUNC(SpinPause):
-        rep
-        nop
-        movl    $1, %eax
-        ret
-
-        # Support for void Copy::arrayof_conjoint_bytes(void* from,
-        #                                               void* to,
-        #                                               size_t count)
-        #
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_arrayof_conjoint_bytes):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -1(%esi,%ecx),%eax   # from + count - 1
-        jbe      acb_CopyRight
-        cmpl     %eax,%edi
-        jbe      acb_CopyLeft
-        # copy from low to high
-acb_CopyRight:
-        cmpl     $3,%ecx
-        jbe      5f
-1:      movl     %ecx,%eax
-        shrl     $2,%ecx
-        jz       4f
-        cmpl     $32,%ecx
-        ja       3f
-        # copy aligned dwords
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-        # copy aligned dwords
-3:      rep;     smovl
-4:      movl     %eax,%ecx
-5:      andl     $3,%ecx
-        jz       7f
-        # copy suffix
-        xorl     %eax,%eax
-6:      movb     (%esi,%eax,1),%dl
-        movb     %dl,(%edi,%eax,1)
-        addl     $1,%eax
-        subl     $1,%ecx
-        jnz      6b
-7:      popl     %edi
-        popl     %esi
-        ret
-acb_CopyLeft:
-        std
-        leal     -4(%edi,%ecx),%edi   # to + count - 4
-        movl     %eax,%esi            # from + count - 1
-        movl     %ecx,%eax
-        subl     $3,%esi              # from + count - 4
-        cmpl     $3,%ecx
-        jbe      5f
-1:      shrl     $2,%ecx
-        jz       4f
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        rep;     smovl
-        jmp      4f
-        .space 8
-2:      subl     %esi,%edi
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      3b
-        addl     %esi,%edi
-4:      movl     %eax,%ecx
-5:      andl     $3,%ecx
-        jz       7f
-        subl     %esi,%edi
-        addl     $3,%esi
-6:      movb     (%esi),%dl
-        movb     %dl,(%edi,%esi,1)
-        subl     $1,%esi
-        subl     $1,%ecx
-        jnz      6b
-7:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::conjoint_jshorts_atomic(void* from,
-        #                                                void* to,
-        #                                                size_t count)
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_conjoint_jshorts_atomic):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
-        jbe      cs_CopyRight
-        cmpl     %eax,%edi
-        jbe      cs_CopyLeft
-        # copy from low to high
-cs_CopyRight:
-        # align source address at dword address boundary
-        movl     %esi,%eax            # original from
-        andl     $3,%eax              # either 0 or 2
-        jz       1f                   # no prefix
-        # copy prefix
-        subl     $1,%ecx
-        jl       5f                   # zero count
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-        addl     %eax,%esi            # %eax == 2
-        addl     %eax,%edi
-1:      movl     %ecx,%eax            # word count less prefix
-        sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        # copy aligned dwords
-        rep;     smovl
-        jmp      4f
-        # copy aligned dwords
-2:      subl     %esi,%edi
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      3b
-        addl     %esi,%edi
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      popl     %edi
-        popl     %esi
-        ret
-        # copy from high to low
-cs_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
-        movl     %eax,%esi            # from + count*2 - 2
-        movl     %ecx,%eax
-        subl     $2,%esi              # from + count*2 - 4
-1:      sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        ja       3f                   # > 32 dwords
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-3:      rep;     smovl
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        addl     $2,%esi
-        addl     $2,%edi
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
-        #                                                 void* to,
-        #                                                 size_t count)
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -2(%esi,%ecx,2),%eax # from + count*2 - 2
-        jbe      acs_CopyRight
-        cmpl     %eax,%edi
-        jbe      acs_CopyLeft
-acs_CopyRight:
-        movl     %ecx,%eax            # word count
-        sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        # copy aligned dwords
-        rep;     smovl
-        jmp      4f
-        # copy aligned dwords
-        .space 5
-2:      subl     %esi,%edi
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      3b
-        addl     %esi,%edi
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      popl     %edi
-        popl     %esi
-        ret
-acs_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,2),%edi # to + count*2 - 4
-        movl     %eax,%esi            # from + count*2 - 2
-        movl     %ecx,%eax
-        subl     $2,%esi              # from + count*2 - 4
-        sarl     %ecx                 # dword count
-        jz       4f                   # no dwords to move
-        cmpl     $32,%ecx
-        ja       3f                   # > 32 dwords
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-3:      rep;     smovl
-4:      andl     $1,%eax              # suffix count
-        jz       5f                   # no suffix
-        # copy suffix
-        addl     $2,%esi
-        addl     $2,%edi
-        movw     (%esi),%dx
-        movw     %dx,(%edi)
-5:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::conjoint_jints_atomic(void* from,
-        #                                              void* to,
-        #                                              size_t count)
-        # Equivalent to
-        #   arrayof_conjoint_jints
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_conjoint_jints_atomic):
-DECLARE_FUNC(_Copy_arrayof_conjoint_jints):
-        pushl    %esi
-        movl     4+12(%esp),%ecx      # count
-        pushl    %edi
-        movl     8+ 4(%esp),%esi      # from
-        movl     8+ 8(%esp),%edi      # to
-        cmpl     %esi,%edi
-        leal     -4(%esi,%ecx,4),%eax # from + count*4 - 4
-        jbe      ci_CopyRight
-        cmpl     %eax,%edi
-        jbe      ci_CopyLeft
-ci_CopyRight:
-        cmpl     $32,%ecx
-        jbe      2f                   # <= 32 dwords
-        rep;     smovl
-        popl     %edi
-        popl     %esi
-        ret
-        .space 10
-2:      subl     %esi,%edi
-        jmp      4f
-        .p2align 4,,15
-3:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-4:      subl     $1,%ecx
-        jge      3b
-        popl     %edi
-        popl     %esi
-        ret
-ci_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,4),%edi # to + count*4 - 4
-        cmpl     $32,%ecx
-        ja       4f                   # > 32 dwords
-        subl     %eax,%edi            # eax == from + count*4 - 4
-        jmp      3f
-        .p2align 4,,15
-2:      movl     (%eax),%edx
-        movl     %edx,(%edi,%eax,1)
-        subl     $4,%eax
-3:      subl     $1,%ecx
-        jge      2b
-        cld
-        popl     %edi
-        popl     %esi
-        ret
-4:      movl     %eax,%esi            # from + count*4 - 4
-        rep;     smovl
-        cld
-        popl     %edi
-        popl     %esi
-        ret
-
-        # Support for void Copy::conjoint_jlongs_atomic(jlong* from,
-        #                                               jlong* to,
-        #                                               size_t count)
-        #
-        # 32-bit
-        #
-        # count treated as signed
-        /*
-        #
-        # if (from > to) {
-        #   while (--count >= 0) {
-        #     *to++ = *from++;
-        #   }
-        # } else {
-        #   while (--count >= 0) {
-        #     to[count] = from[count];
-        #   }
-        # }
-        */
-        .p2align 4,,15
-DECLARE_FUNC(_Copy_conjoint_jlongs_atomic):
-        movl     4+8(%esp),%ecx       # count
-        movl     4+0(%esp),%eax       # from
-        movl     4+4(%esp),%edx       # to
-        cmpl     %eax,%edx
-        jae      cla_CopyLeft
-cla_CopyRight:
-        subl     %eax,%edx
-        jmp      2f
-        .p2align 4,,15
-1:      fildll   (%eax)
-        fistpll  (%edx,%eax,1)
-        addl     $8,%eax
-2:      subl     $1,%ecx
-        jge      1b
-        ret
-        .p2align 4,,15
-3:      fildll   (%eax,%ecx,8)
-        fistpll  (%edx,%ecx,8)
-cla_CopyLeft:
-        subl     $1,%ecx
-        jge      3b
-        ret
-
-        # Support for void Copy::arrayof_conjoint_jshorts(void* from,
-        #                                                 void* to,
-        #                                                 size_t count)
-        .p2align 4,,15
-DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts):
-        pushl    %esi
-        movl     4+12(%esp),%ecx
-        pushl    %edi
-        movl     8+ 4(%esp),%esi
-        movl     8+ 8(%esp),%edi
-        cmpl     %esi,%edi
-        leal     -2(%esi,%ecx,2),%eax
-        jbe      mmx_acs_CopyRight
-        cmpl     %eax,%edi
-        jbe      mmx_acs_CopyLeft
-mmx_acs_CopyRight:
-        movl     %ecx,%eax
-        sarl     %ecx
-        je       5f
-        cmpl     $33,%ecx
-        jae      3f
-1:      subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        addl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      5f
-3:      smovl # align to 8 bytes, we know we are 4 byte aligned to start
-        subl     $1,%ecx
-4:      .p2align 4,,15
-        movq     0(%esi),%mm0
-        addl     $64,%edi
-        movq     8(%esi),%mm1
-        subl     $16,%ecx
-        movq     16(%esi),%mm2
-        movq     %mm0,-64(%edi)
-        movq     24(%esi),%mm0
-        movq     %mm1,-56(%edi)
-        movq     32(%esi),%mm1
-        movq     %mm2,-48(%edi)
-        movq     40(%esi),%mm2
-        movq     %mm0,-40(%edi)
-        movq     48(%esi),%mm0
-        movq     %mm1,-32(%edi)
-        movq     56(%esi),%mm1
-        movq     %mm2,-24(%edi)
-        movq     %mm0,-16(%edi)
-        addl     $64,%esi
-        movq     %mm1,-8(%edi)
-        cmpl     $16,%ecx
-        jge      4b
-        emms
-        testl    %ecx,%ecx
-        ja       1b
-5:      andl     $1,%eax
-        je       7f
-6:      movw     (%esi),%dx
-        movw     %dx,(%edi)
-7:	popl     %edi
-        popl     %esi
-        ret
-mmx_acs_CopyLeft:
-        std
-        leal     -4(%edi,%ecx,2),%edi
-        movl     %eax,%esi
-        movl     %ecx,%eax
-        subl     $2,%esi
-        sarl     %ecx
-        je       4f
-        cmpl     $32,%ecx
-        ja       3f
-        subl     %esi,%edi
-        .p2align 4,,15
-2:      movl     (%esi),%edx
-        movl     %edx,(%edi,%esi,1)
-        subl     $4,%esi
-        subl     $1,%ecx
-        jnz      2b
-        addl     %esi,%edi
-        jmp      4f
-3:      rep;     smovl
-4:      andl     $1,%eax
-        je       6f
-        addl     $2,%esi
-        addl     $2,%edi
-5:      movw     (%esi),%dx
-        movw     %dx,(%edi)
-6:      cld
-        popl     %edi
-        popl     %esi
-        ret
-
-
-        # Support for jlong Atomic::cmpxchg(volatile jlong* dest,
-        #                                   jlong compare_value,
-        #                                   jlong exchange_value)
-        #
-        .p2align 4,,15
-DECLARE_FUNC(_Atomic_cmpxchg_long):
-                                   #  8(%esp) : return PC
-        pushl    %ebx              #  4(%esp) : old %ebx
-        pushl    %edi              #  0(%esp) : old %edi
-        movl     12(%esp), %ebx    # 12(%esp) : exchange_value (low)
-        movl     16(%esp), %ecx    # 16(%esp) : exchange_value (high)
-        movl     24(%esp), %eax    # 24(%esp) : compare_value (low)
-        movl     28(%esp), %edx    # 28(%esp) : compare_value (high)
-        movl     20(%esp), %edi    # 20(%esp) : dest
-        lock cmpxchg8b (%edi)
-        popl     %edi
-        popl     %ebx
-        ret
-
-
-        # Support for jlong Atomic::load and Atomic::store.
-        # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst)
-        .p2align 4,,15
-DECLARE_FUNC(_Atomic_move_long):
-        movl     4(%esp), %eax   # src
-        fildll    (%eax)
-        movl     8(%esp), %eax   # dest
-        fistpll   (%eax)
-        ret
diff --git a/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S b/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S
deleted file mode 100644
index 73f6cdf38c9..00000000000
--- a/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Copyright (c) 2022 SAP SE. All rights reserved.
-# Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-
-#include "defs.S.inc"
-
-    .text
-
-    # Support for int SafeFetch32(int* address, int defaultval);
-    #
-    #  8(%esp) : default value
-    #  4(%esp) : crash address
-    #  0(%esp) : return pc
-DECLARE_FUNC(SafeFetch32_impl):
-    movl 4(%esp),%ecx         # load address from stack
-DECLARE_FUNC(_SafeFetch32_fault):
-    movl (%ecx), %eax         # load target value, may fault
-    ret
-DECLARE_FUNC(_SafeFetch32_continuation):
-    movl 8(%esp),%eax         # load default value from stack
-    ret